diff options
86 files changed, 1908 insertions, 689 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index acfe9df83139..b07e86d4597f 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -223,3 +223,13 @@ Description: write only Writing 1 will issue a PERST to card which may cause the card to reload the FPGA depending on load_image_on_perst. Users: https://github.com/ibm-capi/libcxl + +What: /sys/class/cxl/<card>/perst_reloads_same_image +Date: July 2015 +Contact: [email protected] +Description: read/write + Trust that when an image is reloaded via PERST, it will not + have changed. + 0 = don't trust, the image may be different (default) + 1 = trust that the image will not change. +Users: https://github.com/ibm-capi/libcxl diff --git a/Documentation/devicetree/bindings/leds/leds-powernv.txt b/Documentation/devicetree/bindings/leds/leds-powernv.txt new file mode 100644 index 000000000000..66655690f749 --- /dev/null +++ b/Documentation/devicetree/bindings/leds/leds-powernv.txt @@ -0,0 +1,26 @@ +Device Tree binding for LEDs on IBM Power Systems +------------------------------------------------- + +Required properties: +- compatible : Should be "ibm,opal-v3-led". +- led-mode : Should be "lightpath" or "guidinglight". + +Each location code of FRU/Enclosure must be expressed in the +form of a sub-node. + +Required properties for the sub nodes: +- led-types : Supported LED types (attention/identify/fault) provided + in the form of string array. + +Example: + +leds { + compatible = "ibm,opal-v3-led"; + led-mode = "lightpath"; + + U78C9.001.RST0027-P1-C1 { + led-types = "identify", "fault"; + }; + ... + ... +}; diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 7ddb1e319f84..87bb4aa6a6b9 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -346,6 +346,11 @@ of ftrace. Here is a list of some of the key files: x86-tsc: Architectures may define their own clocks. For example, x86 uses its own TSC cycle clock here. + ppc-tb: This uses the powerpc timebase register value. + This is in sync across CPUs and can also be used + to correlate events across hypervisor/guest if + tb_offset is known. + To set a clock, simply echo the clock name into this file. echo global > trace_clock diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fe2f2c595fd9..b447918b9e2c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -159,6 +159,7 @@ config PPC select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB select ARCH_HAS_DMA_SET_COHERENT_MASK + select HAVE_ARCH_SECCOMP_FILTER config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -518,11 +519,6 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES -config PPC_HAS_HASH_64K - bool - depends on PPC64 - default n - config STDBINUTILS bool "Using standard binutils settings" depends on 44x @@ -564,16 +560,16 @@ config PPC_4K_PAGES bool "4k page size" config PPC_16K_PAGES - bool "16k page size" if 44x || PPC_8xx + bool "16k page size" + depends on 44x || PPC_8xx config PPC_64K_PAGES - bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64 - depends on !PPC_FSL_BOOK3E - select PPC_HAS_HASH_64K if PPC_STD_MMU_64 + bool "64k page size" + depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64) config PPC_256K_PAGES - bool "256k page size" if 44x - depends on !STDBINUTILS + bool "256k page size" + depends on 44x && !STDBINUTILS help Make the page size 256k. diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index a97efc2146fd..6bc0ee4b1070 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -355,3 +355,6 @@ CONFIG_CRYPTO_DEV_NX_ENCRYPT=m CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=m +CONFIG_LEDS_POWERNV=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 0d9efcedaf34..7991f37e5fe2 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -190,7 +190,8 @@ CONFIG_HVC_RTAS=y CONFIG_HVCS=m CONFIG_VIRTIO_CONSOLE=m CONFIG_IBM_BSR=m -CONFIG_GEN_RTC=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_GENERIC=y CONFIG_RAW_DRIVER=y CONFIG_MAX_RAW_DEVS=1024 CONFIG_FB=y @@ -319,3 +320,6 @@ CONFIG_CRYPTO_DEV_NX_ENCRYPT=m CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=m +CONFIG_LEDS_POWERNV=m diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 050712e1ce41..ab9f4e0ed4cf 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -6,5 +6,4 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h -generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 0cc6eedc4780..85e88f7a59c0 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -7,14 +7,23 @@ static inline int arch_get_random_long(unsigned long *v) { - if (ppc_md.get_random_long) - return ppc_md.get_random_long(v); - return 0; } static inline int arch_get_random_int(unsigned int *v) { + return 0; +} + +static inline int arch_get_random_seed_long(unsigned long *v) +{ + if (ppc_md.get_random_seed) + return ppc_md.get_random_seed(v); + + return 0; +} +static inline int arch_get_random_seed_int(unsigned int *v) +{ unsigned long val; int rc; @@ -27,22 +36,13 @@ static inline int arch_get_random_int(unsigned int *v) static inline int arch_has_random(void) { - return !!ppc_md.get_random_long; -} - -static inline int arch_get_random_seed_long(unsigned long *v) -{ - return 0; -} -static inline int arch_get_random_seed_int(unsigned int *v) -{ return 0; } + static inline int arch_has_random_seed(void) { - return 0; + return !!ppc_md.get_random_seed; } - #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index b142b8e0ed9e..4f2df589ec1d 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -174,6 +174,13 @@ typedef struct compat_siginfo { int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + /* SIGSYS */ + struct { + unsigned int _call_addr; /* calling insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 952579f5e79a..cab6753f1be5 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -249,7 +249,7 @@ struct machdep_calls { #endif #ifdef CONFIG_ARCH_RANDOM - int (*get_random_long)(unsigned long *v); + int (*get_random_seed)(unsigned long *v); #endif }; diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 442995bacb33..9784c9241c70 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -154,7 +154,10 @@ #define OPAL_FLASH_WRITE 111 #define OPAL_FLASH_ERASE 112 #define OPAL_PRD_MSG 113 -#define OPAL_LAST 113 +#define OPAL_LEDS_GET_INDICATOR 114 +#define OPAL_LEDS_SET_INDICATOR 115 +#define OPAL_CEC_REBOOT2 116 +#define OPAL_LAST 116 /* Device tree flags */ @@ -340,6 +343,18 @@ enum OpalPciResetState { OPAL_ASSERT_RESET = 1 }; +enum OpalSlotLedType { + OPAL_SLOT_LED_TYPE_ID = 0, /* IDENTIFY LED */ + OPAL_SLOT_LED_TYPE_FAULT = 1, /* FAULT LED */ + OPAL_SLOT_LED_TYPE_ATTN = 2, /* System Attention LED */ + OPAL_SLOT_LED_TYPE_MAX = 3 +}; + +enum OpalSlotLedState { + OPAL_SLOT_LED_STATE_OFF = 0, /* LED is OFF */ + OPAL_SLOT_LED_STATE_ON = 1 /* LED is ON */ +}; + /* * Address cycle types for LPC accesses. These also correspond * to the content of the first cell of the "reg" property for @@ -437,6 +452,7 @@ struct OpalMemoryErrorData { /* HMI interrupt event */ enum OpalHMI_Version { OpalHMIEvt_V1 = 1, + OpalHMIEvt_V2 = 2, }; enum OpalHMI_Severity { @@ -467,6 +483,49 @@ enum OpalHMI_ErrType { OpalHMI_ERROR_CAPP_RECOVERY, }; +enum OpalHMI_XstopType { + CHECKSTOP_TYPE_UNKNOWN = 0, + CHECKSTOP_TYPE_CORE = 1, + CHECKSTOP_TYPE_NX = 2, +}; + +enum OpalHMI_CoreXstopReason { + CORE_CHECKSTOP_IFU_REGFILE = 0x00000001, + CORE_CHECKSTOP_IFU_LOGIC = 0x00000002, + CORE_CHECKSTOP_PC_DURING_RECOV = 0x00000004, + CORE_CHECKSTOP_ISU_REGFILE = 0x00000008, + CORE_CHECKSTOP_ISU_LOGIC = 0x00000010, + CORE_CHECKSTOP_FXU_LOGIC = 0x00000020, + CORE_CHECKSTOP_VSU_LOGIC = 0x00000040, + CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE = 0x00000080, + CORE_CHECKSTOP_LSU_REGFILE = 0x00000100, + CORE_CHECKSTOP_PC_FWD_PROGRESS = 0x00000200, + CORE_CHECKSTOP_LSU_LOGIC = 0x00000400, + CORE_CHECKSTOP_PC_LOGIC = 0x00000800, + CORE_CHECKSTOP_PC_HYP_RESOURCE = 0x00001000, + CORE_CHECKSTOP_PC_HANG_RECOV_FAILED = 0x00002000, + CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED = 0x00004000, + CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ = 0x00008000, + CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ = 0x00010000, +}; + +enum OpalHMI_NestAccelXstopReason { + NX_CHECKSTOP_SHM_INVAL_STATE_ERR = 0x00000001, + NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1 = 0x00000002, + NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2 = 0x00000004, + NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR = 0x00000008, + NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR = 0x00000010, + NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR = 0x00000020, + NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR = 0x00000040, + NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR = 0x00000080, + NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR = 0x00000100, + NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR = 0x00000200, + NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR = 0x00000400, + NX_CHECKSTOP_DMA_CRB_UE = 0x00000800, + NX_CHECKSTOP_DMA_CRB_SUE = 0x00001000, + NX_CHECKSTOP_PBI_ISN_UE = 0x00002000, +}; + struct OpalHMIEvent { uint8_t version; /* 0x00 */ uint8_t severity; /* 0x01 */ @@ -477,6 +536,23 @@ struct OpalHMIEvent { __be64 hmer; /* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */ __be64 tfmr; + + /* version 2 and later */ + union { + /* + * checkstop info (Core/NX). + * Valid for OpalHMI_ERROR_MALFUNC_ALERT. + */ + struct { + uint8_t xstop_type; /* enum OpalHMI_XstopType */ + uint8_t reserved_1[3]; + __be32 xstop_reason; + union { + __be32 pir; /* for CHECKSTOP_TYPE_CORE */ + __be32 chip_id; /* for CHECKSTOP_TYPE_NX */ + } u; + } xstop_error; + } u; }; enum { @@ -796,6 +872,12 @@ enum OpalSysCooling { OPAL_SYSCOOL_INSF = 0x0001, /* System insufficient cooling */ }; +/* Argument to OPAL_CEC_REBOOT2() */ +enum { + OPAL_REBOOT_NORMAL = 0, + OPAL_REBOOT_PLATFORM_ERROR = 1, +}; + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a091c2701d94..800115910e43 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -44,6 +44,7 @@ int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day, uint32_t hour_min); int64_t opal_cec_power_down(uint64_t request); int64_t opal_cec_reboot(void); +int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag); int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset); int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset); int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask); @@ -196,6 +197,10 @@ int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id, struct opal_i2c_request *oreq); int64_t opal_prd_msg(struct opal_prd_msg *msg); +int64_t opal_leds_get_ind(char *loc_code, __be64 *led_mask, + __be64 *led_value, __be64 *max_led_type); +int64_t opal_leds_set_ind(uint64_t token, char *loc_code, const u64 led_mask, + const u64 led_value, __be64 *max_led_type); int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf, uint64_t size, uint64_t token); diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 712add590445..37fc53587bb4 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -42,6 +42,7 @@ struct pci_controller_ops { #endif int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask); + u64 (*dma_get_required_mask)(struct pci_dev *dev); void (*shutdown)(struct pci_controller *); }; diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 3bb7488bd24b..fa1dfb7f7b48 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -134,11 +134,11 @@ #define pte_iterate_hashed_end() } while(0) -#ifdef CONFIG_PPC_HAS_HASH_64K -#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr) -#else +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ #define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K -#endif #endif /* __real_pte */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 4122a86d6858..ca0c5bff7849 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -61,6 +61,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_state_clear(struct eeh_pe *pe, int state); +void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state); void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode); void eeh_sysfs_add_device(struct pci_dev *pdev); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bb2758819bc9..aa1cc5f015ee 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1280,6 +1280,15 @@ struct pt_regs; extern void ppc_save_regs(struct pt_regs *regs); +static inline void update_power8_hid0(unsigned long hid0) +{ + /* + * The HID0 update on Power8 should at the very least be + * preceded by a a SYNC instruction followed by an ISYNC + * instruction + */ + asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); +} #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_REG_H */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 7a4ede16b283..b77ef369c0f0 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -343,6 +343,7 @@ extern void rtas_power_off(void); extern void rtas_halt(void); extern void rtas_os_term(char *str); extern int rtas_get_sensor(int sensor, int index, int *state); +extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); extern bool rtas_indicator_present(int token, int *maxindex); diff --git a/arch/powerpc/include/asm/spu_csa.h b/arch/powerpc/include/asm/spu_csa.h index a40fd491250c..51f80b41cda3 100644 --- a/arch/powerpc/include/asm/spu_csa.h +++ b/arch/powerpc/include/asm/spu_csa.h @@ -241,12 +241,6 @@ struct spu_priv2_collapsed { */ struct spu_state { struct spu_lscsa *lscsa; -#ifdef CONFIG_SPU_FS_64K_LS - int use_big_pages; - /* One struct page per 64k page */ -#define SPU_LSCSA_NUM_BIG_PAGES (sizeof(struct spu_lscsa) / 0x10000) - struct page *lscsa_pages[SPU_LSCSA_NUM_BIG_PAGES]; -#endif struct spu_problem_collapsed prob; struct spu_priv1_collapsed priv1; struct spu_priv2_collapsed priv2; diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index ff21b7a2f0cc..ab9f3f0a8637 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -22,10 +22,15 @@ extern const unsigned long sys_call_table[]; #endif /* CONFIG_FTRACE_SYSCALLS */ -static inline long syscall_get_nr(struct task_struct *task, - struct pt_regs *regs) +static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return TRAP(regs) == 0xc00 ? regs->gpr[0] : -1L; + /* + * Note that we are returning an int here. That means 0xffffffff, ie. + * 32-bit negative 1, will be interpreted as -1 on a 64-bit kernel. + * This is important for seccomp so that compat tasks can set r0 = -1 + * to reject the syscall. + */ + return TRAP(regs) == 0xc00 ? regs->gpr[0] : -1; } static inline void syscall_rollback(struct task_struct *task, @@ -34,12 +39,6 @@ static inline void syscall_rollback(struct task_struct *task, regs->gpr[3] = regs->orig_gpr3; } -static inline long syscall_get_error(struct task_struct *task, - struct pt_regs *regs) -{ - return (regs->ccr & 0x10000000) ? -regs->gpr[3] : 0; -} - static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { @@ -50,9 +49,15 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { + /* + * In the general case it's not obvious that we must deal with CCR + * here, as the syscall exit path will also do that for us. However + * there are some places, eg. the signal code, which check ccr to + * decide if the value in r3 is actually an error. + */ if (error) { regs->ccr |= 0x10000000L; - regs->gpr[3] = -error; + regs->gpr[3] = error; } else { regs->ccr &= ~0x10000000L; regs->gpr[3] = val; @@ -64,19 +69,22 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned int i, unsigned int n, unsigned long *args) { + unsigned long val, mask = -1UL; + BUG_ON(i + n > 6); -#ifdef CONFIG_PPC64 - if (test_tsk_thread_flag(task, TIF_32BIT)) { - /* - * Zero-extend 32-bit argument values. The high bits are - * garbage ignored by the actual syscall dispatch. - */ - while (n-- > 0) - args[n] = (u32) regs->gpr[3 + i + n]; - return; - } + +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_32BIT)) + mask = 0xffffffff; #endif - memcpy(args, ®s->gpr[3 + i], n * sizeof(args[0])); + while (n--) { + if (n == 0 && i == 0) + val = regs->orig_gpr3; + else + val = regs->gpr[3 + i + n]; + + args[n] = val & mask; + } } static inline void syscall_set_arguments(struct task_struct *task, @@ -86,6 +94,10 @@ static inline void syscall_set_arguments(struct task_struct *task, { BUG_ON(i + n > 6); memcpy(®s->gpr[3 + i], args, n * sizeof(args[0])); + + /* Also copy the first argument into orig_gpr3 */ + if (i == 0 && n > 0) + regs->orig_gpr3 = args[0]; } static inline int syscall_get_arch(void) diff --git a/arch/powerpc/include/asm/trace_clock.h b/arch/powerpc/include/asm/trace_clock.h new file mode 100644 index 000000000000..cf1ee75ca069 --- /dev/null +++ b/arch/powerpc/include/asm/trace_clock.h @@ -0,0 +1,19 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2015 Naveen N. Rao, IBM Corporation + */ + +#ifndef _ASM_PPC_TRACE_CLOCK_H +#define _ASM_PPC_TRACE_CLOCK_H + +#include <linux/compiler.h> +#include <linux/types.h> + +extern u64 notrace trace_clock_ppc_tb(void); + +#define ARCH_TRACE_CLOCKS { trace_clock_ppc_tb, "ppc-tb", 0 }, + +#endif /* _ASM_PPC_TRACE_CLOCK_H */ diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index f44a027818af..dab3717e3ea0 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -6,6 +6,7 @@ header-y += bitsperlong.h header-y += bootx.h header-y += byteorder.h header-y += cputable.h +header-y += eeh.h header-y += elf.h header-y += epapr_hcalls.h header-y += errno.h diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h index 8c145fd17d86..e8b6b5f7de7c 100644 --- a/arch/powerpc/include/uapi/asm/errno.h +++ b/arch/powerpc/include/uapi/asm/errno.h @@ -6,6 +6,4 @@ #undef EDEADLOCK #define EDEADLOCK 58 /* File locking deadlock error */ -#define _LAST_ERRNO 516 - #endif /* _ASM_POWERPC_ERRNO_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 12868b1c4e05..ba336930d448 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -118,6 +118,7 @@ obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_TRACING) += trace_clock.o ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) obj-y += iomap.o diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 1558f81ac1ff..59503ed98e5f 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -353,6 +353,13 @@ u64 dma_get_required_mask(struct device *dev) if (ppc_md.dma_get_required_mask) return ppc_md.dma_get_required_mask(dev); + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_controller *phb = pci_bus_to_host(pdev->bus); + if (phb->controller_ops.dma_get_required_mask) + return phb->controller_ops.dma_get_required_mask(pdev); + } + return __dma_get_required_mask(dev); } EXPORT_SYMBOL_GPL(dma_get_required_mask); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index af9b597b10af..58c598400028 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -750,14 +750,14 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_pe_state_clear(pe, EEH_PE_ISOLATED); break; case pcie_hot_reset: - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); @@ -1116,9 +1116,6 @@ void eeh_add_device_late(struct pci_dev *dev) return; } - if (eeh_has_flag(EEH_PROBE_MODE_DEV)) - eeh_ops->probe(pdn, NULL); - /* * The EEH cache might not be removed correctly because of * unbalanced kref to the device during unplug time, which @@ -1142,6 +1139,9 @@ void eeh_add_device_late(struct pci_dev *dev) dev->dev.archdata.edev = NULL; } + if (eeh_has_flag(EEH_PROBE_MODE_DEV)) + eeh_ops->probe(pdn, NULL); + edev->pdev = dev; dev->dev.archdata.edev = edev; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 35f0b62259bb..8654cb166c19 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -657,6 +657,28 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state) eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); } +/** + * eeh_pe_state_mark_with_cfg - Mark PE state with unblocked config space + * @pe: PE + * @state: PE state to be set + * + * Set specified flag to PE and its child PEs. The PCI config space + * of some PEs is blocked automatically when EEH_PE_ISOLATED is set, + * which isn't needed in some situations. The function allows to set + * the specified flag to indicated PEs without blocking their PCI + * config space. + */ +void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state) +{ + eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); + if (!(state & EEH_PE_ISOLATED)) + return; + + /* Clear EEH_PE_CFG_BLOCKED, which might be set just now */ + state = EEH_PE_CFG_BLOCKED; + eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); +} + /* * Some PCI bridges (e.g. PLX bridges) have primary/secondary * buses assigned explicitly by firmware, and we probably have diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 46fc0f4d8982..2405631e91a2 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -20,6 +20,7 @@ */ #include <linux/errno.h> +#include <linux/err.h> #include <linux/sys.h> #include <linux/threads.h> #include <asm/reg.h> @@ -354,7 +355,7 @@ ret_from_syscall: SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - li r8,-_LAST_ERRNO + li r8,-MAX_ERRNO andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work cmplw 0,r3,r8 @@ -457,6 +458,10 @@ syscall_dotrace: lwz r7,GPR7(r1) lwz r8,GPR8(r1) REST_NVGPRS(r1) + + cmplwi r0,NR_syscalls + /* Return code is already in r3 thanks to do_syscall_trace_enter() */ + bge- ret_from_syscall b syscall_dotrace_cont syscall_exit_work: diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 579e0f9a2d57..a94f155db78e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -19,6 +19,7 @@ */ #include <linux/errno.h> +#include <linux/err.h> #include <asm/unistd.h> #include <asm/processor.h> #include <asm/page.h> @@ -150,8 +151,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) CURRENT_THREAD_INFO(r11, r1) ld r10,TI_FLAGS(r11) andi. r11,r10,_TIF_SYSCALL_DOTRACE - bne syscall_dotrace -.Lsyscall_dotrace_cont: + bne syscall_dotrace /* does not return */ cmpldi 0,r0,NR_syscalls bge- syscall_enosys @@ -207,7 +207,7 @@ system_call: /* label this so stack traces look sane */ #endif /* CONFIG_PPC_BOOK3E */ ld r9,TI_FLAGS(r12) - li r11,-_LAST_ERRNO + li r11,-MAX_ERRNO andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work cmpld r3,r11 @@ -245,22 +245,34 @@ syscall_dotrace: bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_enter + /* - * Restore argument registers possibly just changed. - * We use the return value of do_syscall_trace_enter - * for the call number to look up in the table (r0). + * We use the return value of do_syscall_trace_enter() as the syscall + * number. If the syscall was rejected for any reason do_syscall_trace_enter() + * returns an invalid syscall number and the test below against + * NR_syscalls will fail. */ mr r0,r3 + + /* Restore argument registers just clobbered and/or possibly changed. */ ld r3,GPR3(r1) ld r4,GPR4(r1) ld r5,GPR5(r1) ld r6,GPR6(r1) ld r7,GPR7(r1) ld r8,GPR8(r1) + + /* Repopulate r9 and r10 for the system_call path */ addi r9,r1,STACK_FRAME_OVERHEAD CURRENT_THREAD_INFO(r10, r1) ld r10,TI_FLAGS(r10) - b .Lsyscall_dotrace_cont + + cmpldi r0,NR_syscalls + blt+ system_call + + /* Return code is already in r3 thanks to do_syscall_trace_enter() */ + b .Lsyscall_exit + syscall_enosys: li r3,-ENOSYS @@ -277,7 +289,7 @@ syscall_exit_work: beq+ 0f REST_NVGPRS(r1) b 2f -0: cmpld r3,r11 /* r10 is -LAST_ERRNO */ +0: cmpld r3,r11 /* r11 is -MAX_ERRNO */ blt+ 1f andi. r0,r9,_TIF_NOERROR bne- 1f diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 33aa4ddf597d..9ad37f827a97 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -649,7 +649,6 @@ static void kvm_check_ins(u32 *inst, u32 features) kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); } break; - break; #endif } diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4e314b90c75d..6e4168cf4698 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -475,9 +475,18 @@ _GLOBAL(kexec_wait) #ifdef CONFIG_KEXEC /* use no memory without kexec */ lwz r4,0(r5) cmpwi 0,r4,0 - bnea 0x60 + beq 99b +#ifdef CONFIG_PPC_BOOK3S_64 + li r10,0x60 + mfmsr r11 + clrrdi r11,r11,1 /* Clear MSR_LE */ + mtsrr0 r10 + mtsrr1 r11 + rfid +#else + ba 0x60 +#endif #endif - b 99b /* this can be in text because we won't change it until we are * running in real anyways diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 1e703f8ebad4..98ba106a59ef 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -541,10 +541,9 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, time->tv_sec = be64_to_cpu(oops_hdr->timestamp); time->tv_nsec = 0; } - *buf = kmalloc(length, GFP_KERNEL); + *buf = kmemdup(buff + hdr_size, length, GFP_KERNEL); if (*buf == NULL) return -ENOMEM; - memcpy(*buf, buff + hdr_size, length); kfree(buff); if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) @@ -582,9 +581,10 @@ static int nvram_pstore_init(void) spin_lock_init(&nvram_pstore_info.buf_lock); rc = pstore_register(&nvram_pstore_info); - if (rc != 0) - pr_err("nvram: pstore_register() failed, defaults to " - "kmsg_dump; returned %d\n", rc); + if (rc && (rc != -EPERM)) + /* Print error only when pstore.backend == nvram */ + pr_err("nvram: pstore_register() failed, returned %d. " + "Defaults to kmsg_dump\n", rc); return rc; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index b9de34d44fcb..7587b2ae5f77 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -823,23 +823,15 @@ static void pcibios_fixup_resources(struct pci_dev *dev) (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { /* Only print message if not re-assigning */ if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] " - "is unassigned\n", - pci_name(dev), i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned int)res->flags); + pr_debug("PCI:%s Resource %d %pR is unassigned\n", + pci_name(dev), i, res); res->end -= res->start; res->start = 0; res->flags |= IORESOURCE_UNSET; continue; } - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n", - pci_name(dev), i, - (unsigned long long)res->start,\ - (unsigned long long)res->end, - (unsigned int)res->flags); + pr_debug("PCI:%s Resource %d %pR\n", pci_name(dev), i, res); } /* Call machine specific resource fixup */ @@ -943,11 +935,7 @@ static void pcibios_fixup_bridge(struct pci_bus *bus) continue; } - pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n", - pci_name(dev), i, - (unsigned long long)res->start,\ - (unsigned long long)res->end, - (unsigned int)res->flags); + pr_debug("PCI:%s Bus rsrc %d %pR\n", pci_name(dev), i, res); /* Try to detect uninitialized P2P bridge resources, * and clear them out so they get re-assigned later @@ -1132,10 +1120,8 @@ static int reparent_resources(struct resource *parent, *pp = NULL; for (p = res->child; p != NULL; p = p->sibling) { p->parent = res; - pr_debug("PCI: Reparented %s [%llx..%llx] under %s\n", - p->name, - (unsigned long long)p->start, - (unsigned long long)p->end, res->name); + pr_debug("PCI: Reparented %s %pR under %s\n", + p->name, p, res->name); } return 0; } @@ -1204,14 +1190,9 @@ static void pcibios_allocate_bus_resources(struct pci_bus *bus) } } - pr_debug("PCI: %s (bus %d) bridge rsrc %d: %016llx-%016llx " - "[0x%x], parent %p (%s)\n", - bus->self ? pci_name(bus->self) : "PHB", - bus->number, i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned int)res->flags, - pr, (pr && pr->name) ? pr->name : "nil"); + pr_debug("PCI: %s (bus %d) bridge rsrc %d: %pR, parent %p (%s)\n", + bus->self ? pci_name(bus->self) : "PHB", bus->number, + i, res, pr, (pr && pr->name) ? pr->name : "nil"); if (pr && !(pr->flags & IORESOURCE_UNSET)) { struct pci_dev *dev = bus->self; @@ -1253,11 +1234,8 @@ static inline void alloc_resource(struct pci_dev *dev, int idx) { struct resource *pr, *r = &dev->resource[idx]; - pr_debug("PCI: Allocating %s: Resource %d: %016llx..%016llx [%x]\n", - pci_name(dev), idx, - (unsigned long long)r->start, - (unsigned long long)r->end, - (unsigned int)r->flags); + pr_debug("PCI: Allocating %s: Resource %d: %pR\n", + pci_name(dev), idx, r); pr = pci_find_parent_resource(dev, r); if (!pr || (pr->flags & IORESOURCE_UNSET) || @@ -1265,11 +1243,7 @@ static inline void alloc_resource(struct pci_dev *dev, int idx) printk(KERN_WARNING "PCI: Cannot allocate resource region %d" " of device %s, will remap\n", idx, pci_name(dev)); if (pr) - pr_debug("PCI: parent is %p: %016llx-%016llx [%x]\n", - pr, - (unsigned long long)pr->start, - (unsigned long long)pr->end, - (unsigned int)pr->flags); + pr_debug("PCI: parent is %p: %pR\n", pr, pr); /* We'll assign a new address later */ r->flags |= IORESOURCE_UNSET; r->end -= r->start; @@ -1431,12 +1405,8 @@ void pcibios_claim_one_bus(struct pci_bus *bus) if (r->parent || !r->start || !r->flags) continue; - pr_debug("PCI: Claiming %s: " - "Resource %d: %016llx..%016llx [%x]\n", - pci_name(dev), i, - (unsigned long long)r->start, - (unsigned long long)r->end, - (unsigned int)r->flags); + pr_debug("PCI: Claiming %s: Resource %d: %pR\n", + pci_name(dev), i, r); if (pci_claim_resource(dev, i) == 0) continue; @@ -1520,11 +1490,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, } else { offset = pcibios_io_space_offset(hose); - pr_debug("PCI: PHB IO resource = %08llx-%08llx [%lx] off 0x%08llx\n", - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned long)res->flags, - (unsigned long long)offset); + pr_debug("PCI: PHB IO resource = %pR off 0x%08llx\n", + res, (unsigned long long)offset); pci_add_resource_offset(resources, res, offset); } @@ -1541,11 +1508,8 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, offset = hose->mem_offset[i]; - pr_debug("PCI: PHB MEM resource %d = %08llx-%08llx [%lx] off 0x%08llx\n", i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned long)res->flags, - (unsigned long long)offset); + pr_debug("PCI: PHB MEM resource %d = %pR off 0x%08llx\n", i, + res, (unsigned long long)offset); pci_add_resource_offset(resources, res, offset); } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b888b12a475..bef76c5033e4 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -218,22 +218,18 @@ static void __init check_cpu_pa_features(unsigned long node) } #ifdef CONFIG_PPC_STD_MMU_64 -static void __init check_cpu_slb_size(unsigned long node) +static void __init init_mmu_slb_size(unsigned long node) { const __be32 *slb_size_ptr; - slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL); - if (slb_size_ptr != NULL) { - mmu_slb_size = be32_to_cpup(slb_size_ptr); - return; - } - slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL); - if (slb_size_ptr != NULL) { + slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL) ? : + of_get_flat_dt_prop(node, "ibm,slb-size", NULL); + + if (slb_size_ptr) mmu_slb_size = be32_to_cpup(slb_size_ptr); - } } #else -#define check_cpu_slb_size(node) do { } while(0) +#define init_mmu_slb_size(node) do { } while(0) #endif static struct feature_property { @@ -380,7 +376,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, check_cpu_feature_properties(node); check_cpu_pa_features(node); - check_cpu_slb_size(node); + init_mmu_slb_size(node); #ifdef CONFIG_PPC64 if (nthreads > 1) @@ -476,9 +472,10 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) flags = of_read_number(&dm[3], 1); /* skip DRC index, pad, assoc. list index, flags */ dm += 4; - /* skip this block if the reserved bit is set in flags (0x80) - or if the block is not assigned to this partition (0x8) */ - if ((flags & 0x80) || !(flags & 0x8)) + /* skip this block if the reserved bit is set in flags + or if the block is not assigned to this partition */ + if ((flags & DRCONF_MEM_RESERVED) || + !(flags & DRCONF_MEM_ASSIGNED)) continue; size = memblock_size; rngs = 1; diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index f21897b42057..737c0d0b53ac 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1762,26 +1762,81 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -/* - * We must return the syscall number to actually look up in the table. - * This can be -1L to skip running any syscall at all. +#ifdef CONFIG_SECCOMP +static int do_seccomp(struct pt_regs *regs) +{ + if (!test_thread_flag(TIF_SECCOMP)) + return 0; + + /* + * The ABI we present to seccomp tracers is that r3 contains + * the syscall return value and orig_gpr3 contains the first + * syscall parameter. This is different to the ptrace ABI where + * both r3 and orig_gpr3 contain the first syscall parameter. + */ + regs->gpr[3] = -ENOSYS; + + /* + * We use the __ version here because we have already checked + * TIF_SECCOMP. If this fails, there is nothing left to do, we + * have already loaded -ENOSYS into r3, or seccomp has put + * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). + */ + if (__secure_computing()) + return -1; + + /* + * The syscall was allowed by seccomp, restore the register + * state to what ptrace and audit expect. + * Note that we use orig_gpr3, which means a seccomp tracer can + * modify the first syscall parameter (in orig_gpr3) and also + * allow the syscall to proceed. + */ + regs->gpr[3] = regs->orig_gpr3; + + return 0; +} +#else +static inline int do_seccomp(struct pt_regs *regs) { return 0; } +#endif /* CONFIG_SECCOMP */ + +/** + * do_syscall_trace_enter() - Do syscall tracing on kernel entry. + * @regs: the pt_regs of the task to trace (current) + * + * Performs various types of tracing on syscall entry. This includes seccomp, + * ptrace, syscall tracepoints and audit. + * + * The pt_regs are potentially visible to userspace via ptrace, so their + * contents is ABI. + * + * One or more of the tracers may modify the contents of pt_regs, in particular + * to modify arguments or even the syscall number itself. + * + * It's also possible that a tracer can choose to reject the system call. In + * that case this function will return an illegal syscall number, and will put + * an appropriate return value in regs->r3. + * + * Return: the (possibly changed) syscall number. */ long do_syscall_trace_enter(struct pt_regs *regs) { - long ret = 0; + bool abort = false; user_exit(); - secure_computing_strict(regs->gpr[0]); + if (do_seccomp(regs)) + return -1; - if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) + if (test_thread_flag(TIF_SYSCALL_TRACE)) { /* - * Tracing decided this syscall should not happen. - * We'll return a bogus call number to get an ENOSYS - * error, but leave the original number in regs->gpr[0]. + * The tracer may decide to abort the syscall, if so tracehook + * will return !0. Note that the tracer may also just change + * regs->gpr[0] to an invalid syscall number, that is handled + * below on the exit path. */ - ret = -1L; + abort = tracehook_report_syscall_entry(regs) != 0; + } if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gpr[0]); @@ -1798,7 +1853,17 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gpr[5] & 0xffffffff, regs->gpr[6] & 0xffffffff); - return ret ?: regs->gpr[0]; + if (abort || regs->gpr[0] >= NR_syscalls) { + /* + * If we are aborting explicitly, or if the syscall number is + * now invalid, set the return value to -ENOSYS. + */ + regs->gpr[3] = -ENOSYS; + return -1; + } + + /* Return the possibly modified but valid syscall number */ + return regs->gpr[0]; } void do_syscall_trace_leave(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7a488c108410..84bf934cf748 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -478,8 +478,9 @@ unsigned int rtas_busy_delay_time(int status) if (status == RTAS_BUSY) { ms = 1; - } else if (status >= 9900 && status <= 9905) { - order = status - 9900; + } else if (status >= RTAS_EXTENDED_DELAY_MIN && + status <= RTAS_EXTENDED_DELAY_MAX) { + order = status - RTAS_EXTENDED_DELAY_MIN; for (ms = 1; order > 0; order--) ms *= 10; } @@ -584,6 +585,23 @@ int rtas_get_sensor(int sensor, int index, int *state) } EXPORT_SYMBOL(rtas_get_sensor); +int rtas_get_sensor_fast(int sensor, int index, int *state) +{ + int token = rtas_token("get-sensor-state"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + rc = rtas_call(token, 2, 2, state, sensor, index); + WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && + rc <= RTAS_EXTENDED_DELAY_MAX)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + bool rtas_indicator_present(int token, int *maxindex) { int proplen, count, i; @@ -641,7 +659,8 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value); - WARN_ON(rc == -2 || (rc >= 9900 && rc <= 9905)); + WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && + rc <= RTAS_EXTENDED_DELAY_MAX)); if (rc < 0) return rtas_error_rc(rc); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d3a831ac0f92..77f97284124e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -949,6 +949,11 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) err |= __put_user(s->si_overrun, &d->si_overrun); err |= __put_user(s->si_int, &d->si_int); break; + case __SI_SYS >> 16: + err |= __put_user(ptr_to_compat(s->si_call_addr), &d->si_call_addr); + err |= __put_user(s->si_syscall, &d->si_syscall); + err |= __put_user(s->si_arch, &d->si_arch); + break; case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ case __SI_MESGQ >> 16: err |= __put_user(s->si_int, &d->si_int); diff --git a/arch/powerpc/kernel/trace_clock.c b/arch/powerpc/kernel/trace_clock.c new file mode 100644 index 000000000000..49170690946d --- /dev/null +++ b/arch/powerpc/kernel/trace_clock.c @@ -0,0 +1,15 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2015 Naveen N. Rao, IBM Corporation + */ + +#include <asm/trace_clock.h> +#include <asm/time.h> + +u64 notrace trace_clock_ppc_tb(void) +{ + return get_tb(); +} diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 463174a4a647..3b49e3295901 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -701,7 +701,7 @@ htab_pte_insert_failure: #endif /* CONFIG_PPC_64K_PAGES */ -#ifdef CONFIG_PPC_HAS_HASH_64K +#ifdef CONFIG_PPC_64K_PAGES /***************************************************************************** * * @@ -993,7 +993,7 @@ ht64_pte_insert_failure: b ht64_bail -#endif /* CONFIG_PPC_HAS_HASH_64K */ +#endif /* CONFIG_PPC_64K_PAGES */ /***************************************************************************** diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5ec987f65b2c..aee70171355b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -640,7 +640,7 @@ extern u32 ht64_call_hpte_updatepp[]; static void __init htab_finish_init(void) { -#ifdef CONFIG_PPC_HAS_HASH_64K +#ifdef CONFIG_PPC_64K_PAGES patch_branch(ht64_call_hpte_insert1, ppc_function_entry(ppc_md.hpte_insert), BRANCH_SET_LINK); @@ -653,7 +653,7 @@ static void __init htab_finish_init(void) patch_branch(ht64_call_hpte_updatepp, ppc_function_entry(ppc_md.hpte_updatepp), BRANCH_SET_LINK); -#endif /* CONFIG_PPC_HAS_HASH_64K */ +#endif /* CONFIG_PPC_64K_PAGES */ patch_branch(htab_call_hpte_insert1, ppc_function_entry(ppc_md.hpte_insert), @@ -1151,12 +1151,12 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, check_paca_psize(ea, mm, psize, user_region); #endif /* CONFIG_PPC_64K_PAGES */ -#ifdef CONFIG_PPC_HAS_HASH_64K +#ifdef CONFIG_PPC_64K_PAGES if (psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, flags, ssize); else -#endif /* CONFIG_PPC_HAS_HASH_64K */ +#endif /* CONFIG_PPC_64K_PAGES */ { int spp = subpage_protection(mm, ea); if (access & spp) @@ -1264,12 +1264,12 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, update_flags |= HPTE_LOCAL_UPDATE; /* Hash it in */ -#ifdef CONFIG_PPC_HAS_HASH_64K +#ifdef CONFIG_PPC_64K_PAGES if (mm->context.user_psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, update_flags, ssize); else -#endif /* CONFIG_PPC_HAS_HASH_64K */ +#endif /* CONFIG_PPC_64K_PAGES */ rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags, ssize, subpage_protection(mm, ea)); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index bb0bd7025cb8..06c14523b787 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -808,14 +808,6 @@ static int __init add_huge_page_size(unsigned long long size) if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) return -EINVAL; -#ifdef CONFIG_SPU_FS_64K_LS - /* Disable support for 64K huge pages when 64K SPU local store - * support is enabled as the current implementation conflicts. - */ - if (shift == PAGE_SHIFT_64K) - return -EINVAL; -#endif /* CONFIG_SPU_FS_64K_LS */ - BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); /* Return if huge page size has already been setup */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 5e80621d9324..8b9502adaf79 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -225,7 +225,7 @@ static void initialize_distance_lookup_table(int nid, for (i = 0; i < distance_ref_points_depth; i++) { const __be32 *entry; - entry = &associativity[be32_to_cpu(distance_ref_points[i])]; + entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1]; distance_lookup_table[nid][i] = of_read_number(entry, 1); } } @@ -248,8 +248,12 @@ static int associativity_to_nid(const __be32 *associativity) nid = -1; if (nid > 0 && - of_read_number(associativity, 1) >= distance_ref_points_depth) - initialize_distance_lookup_table(nid, associativity); + of_read_number(associativity, 1) >= distance_ref_points_depth) { + /* + * Skip the length field and send start of associativity array + */ + initialize_distance_lookup_table(nid, associativity + 1); + } out: return nid; @@ -507,6 +511,12 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, if (nid == 0xffff || nid >= MAX_NUMNODES) nid = default_nid; + + if (nid > 0) { + index = drmem->aa_index * aa->array_sz; + initialize_distance_lookup_table(nid, + &aa->arrays[index]); + } } return nid; diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 6e450ca66526..8a32a2be3c53 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -41,9 +41,9 @@ static void slb_allocate(unsigned long ea) (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) static inline unsigned long mk_esid_data(unsigned long ea, int ssize, - unsigned long slot) + unsigned long entry) { - return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot; + return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | entry; } static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, @@ -249,11 +249,24 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) static inline void patch_slb_encoding(unsigned int *insn_addr, unsigned int immed) { - int insn = (*insn_addr & 0xffff0000) | immed; + + /* + * This function patches either an li or a cmpldi instruction with + * a new immediate value. This relies on the fact that both li + * (which is actually addi) and cmpldi both take a 16-bit immediate + * value, and it is situated in the same location in the instruction, + * ie. bits 16-31 (Big endian bit order) or the lower 16 bits. + * The signedness of the immediate operand differs between the two + * instructions however this code is only ever patching a small value, + * much less than 1 << 15, so we can get away with it. + * To patch the value we read the existing instruction, clear the + * immediate value, and or in our new value, then write the instruction + * back. + */ + unsigned int insn = (*insn_addr & 0xffff0000) | immed; patch_instruction(insn_addr, insn); } -extern u32 slb_compare_rr_to_size[]; extern u32 slb_miss_kernel_load_linear[]; extern u32 slb_miss_kernel_load_io[]; extern u32 slb_compare_rr_to_size[]; @@ -309,12 +322,11 @@ void slb_initialize(void) lflags = SLB_VSID_KERNEL | linear_llp; vflags = SLB_VSID_KERNEL | vmalloc_llp; - /* Invalidate the entire SLB (even slot 0) & all the ERATS */ + /* Invalidate the entire SLB (even entry 0) & all the ERATS */ asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, 0); - create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, 1); /* For the boot cpu, we're running on the stack in init_thread_union, diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index d90893b76e7c..b0382f3f1095 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -53,7 +53,7 @@ struct cpu_hw_events { /* BHRB bits */ u64 bhrb_filter; /* BHRB HW branch filter */ - int bhrb_users; + unsigned int bhrb_users; void *bhrb_context; struct perf_branch_stack bhrb_stack; struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES]; @@ -369,8 +369,8 @@ static void power_pmu_bhrb_disable(struct perf_event *event) if (!ppmu->bhrb_nr) return; + WARN_ON_ONCE(!cpuhw->bhrb_users); cpuhw->bhrb_users--; - WARN_ON_ONCE(cpuhw->bhrb_users < 0); perf_sched_cb_dec(event->ctx->pmu); if (!cpuhw->disabled && !cpuhw->bhrb_users) { diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index df956295c2a7..527c8b98e97e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -416,7 +416,7 @@ out_val: } static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event, - int nonce) + int nonce) { int nl, dl; char *name = event_name(event, &nl); @@ -444,7 +444,7 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) } static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs, - struct hv_24x7_event_data *event, int nonce) + struct hv_24x7_event_data *event, int nonce) { unsigned i; @@ -512,7 +512,7 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2) } static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2, - size_t s2, unsigned d2) + size_t s2, unsigned d2) { int r = memord(v1, s1, v2, s2); @@ -526,7 +526,7 @@ static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2, } static int event_uniq_add(struct rb_root *root, const char *name, int nl, - unsigned domain) + unsigned domain) { struct rb_node **new = &(root->rb_node), *parent = NULL; struct event_uniq *data; @@ -650,8 +650,8 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, #define MAX_4K (SIZE_MAX / 4096) static int create_events_from_catalog(struct attribute ***events_, - struct attribute ***event_descs_, - struct attribute ***event_long_descs_) + struct attribute ***event_descs_, + struct attribute ***event_long_descs_) { unsigned long hret; size_t catalog_len, catalog_page_len, event_entry_count, @@ -1008,8 +1008,8 @@ static const struct attribute_group *attr_groups[] = { }; static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer, - struct hv_24x7_data_result_buffer *result_buffer, - unsigned long ret) + struct hv_24x7_data_result_buffer *result_buffer, + unsigned long ret) { struct hv_24x7_request *req; @@ -1026,7 +1026,7 @@ static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer, * Start the process for a new H_GET_24x7_DATA hcall. */ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer, - struct hv_24x7_data_result_buffer *result_buffer) + struct hv_24x7_data_result_buffer *result_buffer) { memset(request_buffer, 0, 4096); @@ -1041,7 +1041,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer, * by 'init_24x7_request()' and 'add_event_to_24x7_request()'. */ static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer, - struct hv_24x7_data_result_buffer *result_buffer) + struct hv_24x7_data_result_buffer *result_buffer) { unsigned long ret; @@ -1104,7 +1104,6 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count) unsigned long ret; struct hv_24x7_request_buffer *request_buffer; struct hv_24x7_data_result_buffer *result_buffer; - struct hv_24x7_result *resb; BUILD_BUG_ON(sizeof(*request_buffer) > 4096); BUILD_BUG_ON(sizeof(*result_buffer) > 4096); @@ -1125,8 +1124,7 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count) } /* process result from hcall */ - resb = &result_buffer->results[0]; - *count = be64_to_cpu(resb->elements[0].element_data[0]); + *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]); out: put_cpu_var(hv_24x7_reqb); diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig index 5aa3f4b5332c..48bf38d0de35 100644 --- a/arch/powerpc/platforms/512x/Kconfig +++ b/arch/powerpc/platforms/512x/Kconfig @@ -7,8 +7,8 @@ config PPC_MPC512x select PPC_PCI_CHOICE select FSL_PCI if PCI select ARCH_WANT_OPTIONAL_GPIOLIB - select USB_EHCI_BIG_ENDIAN_MMIO - select USB_EHCI_BIG_ENDIAN_DESC + select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD + select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD config MPC5121_ADS bool "Freescale MPC5121E ADS" diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 2f23133ab3d1..b0ac1773cea6 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -57,21 +57,6 @@ config SPU_FS Units on machines implementing the Broadband Processor Architecture. -config SPU_FS_64K_LS - bool "Use 64K pages to map SPE local store" - # we depend on PPC_MM_SLICES for now rather than selecting - # it because we depend on hugetlbfs hooks being present. We - # will fix that when the generic code has been improved to - # not require hijacking hugetlbfs hooks. - depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES - default y - select PPC_HAS_HASH_64K - help - This option causes SPE local stores to be mapped in process - address spaces using 64K pages while the rest of the kernel - uses 4K pages. This can improve performances of applications - using multiple SPEs by lowering the TLB pressure on them. - config SPU_BASE bool default n diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index d966bbe58b8f..5038fd578e65 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -239,23 +239,6 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long address = (unsigned long)vmf->virtual_address; unsigned long pfn, offset; -#ifdef CONFIG_SPU_FS_64K_LS - struct spu_state *csa = &ctx->csa; - int psize; - - /* Check what page size we are using */ - psize = get_slice_psize(vma->vm_mm, address); - - /* Some sanity checking */ - BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K)); - - /* Wow, 64K, cool, we need to align the address though */ - if (csa->use_big_pages) { - BUG_ON(vma->vm_start & 0xffff); - address &= ~0xfffful; - } -#endif /* CONFIG_SPU_FS_64K_LS */ - offset = vmf->pgoff << PAGE_SHIFT; if (offset >= LS_SIZE) return VM_FAULT_SIGBUS; @@ -310,22 +293,6 @@ static const struct vm_operations_struct spufs_mem_mmap_vmops = { static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) { -#ifdef CONFIG_SPU_FS_64K_LS - struct spu_context *ctx = file->private_data; - struct spu_state *csa = &ctx->csa; - - /* Sanity check VMA alignment */ - if (csa->use_big_pages) { - pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx," - " pgoff=0x%lx\n", vma->vm_start, vma->vm_end, - vma->vm_pgoff); - if (vma->vm_start & 0xffff) - return -EINVAL; - if (vma->vm_pgoff & 0xf) - return -EINVAL; - } -#endif /* CONFIG_SPU_FS_64K_LS */ - if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; @@ -336,25 +303,6 @@ static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -#ifdef CONFIG_SPU_FS_64K_LS -static unsigned long spufs_get_unmapped_area(struct file *file, - unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct spu_context *ctx = file->private_data; - struct spu_state *csa = &ctx->csa; - - /* If not using big pages, fallback to normal MM g_u_a */ - if (!csa->use_big_pages) - return current->mm->get_unmapped_area(file, addr, len, - pgoff, flags); - - /* Else, try to obtain a 64K pages slice */ - return slice_get_unmapped_area(addr, len, flags, - MMU_PAGE_64K, 1); -} -#endif /* CONFIG_SPU_FS_64K_LS */ - static const struct file_operations spufs_mem_fops = { .open = spufs_mem_open, .release = spufs_mem_release, @@ -362,9 +310,6 @@ static const struct file_operations spufs_mem_fops = { .write = spufs_mem_write, .llseek = generic_file_llseek, .mmap = spufs_mem_mmap, -#ifdef CONFIG_SPU_FS_64K_LS - .get_unmapped_area = spufs_get_unmapped_area, -#endif }; static int spufs_ps_fault(struct vm_area_struct *vma, diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c index 147069938cfe..b847e9403566 100644 --- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c +++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -31,7 +31,7 @@ #include "spufs.h" -static int spu_alloc_lscsa_std(struct spu_state *csa) +int spu_alloc_lscsa(struct spu_state *csa) { struct spu_lscsa *lscsa; unsigned char *p; @@ -48,7 +48,7 @@ static int spu_alloc_lscsa_std(struct spu_state *csa) return 0; } -static void spu_free_lscsa_std(struct spu_state *csa) +void spu_free_lscsa(struct spu_state *csa) { /* Clear reserved bit before vfree. */ unsigned char *p; @@ -61,123 +61,3 @@ static void spu_free_lscsa_std(struct spu_state *csa) vfree(csa->lscsa); } - -#ifdef CONFIG_SPU_FS_64K_LS - -#define SPU_64K_PAGE_SHIFT 16 -#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT) -#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER) - -int spu_alloc_lscsa(struct spu_state *csa) -{ - struct page **pgarray; - unsigned char *p; - int i, j, n_4k; - - /* Check availability of 64K pages */ - if (!spu_64k_pages_available()) - goto fail; - - csa->use_big_pages = 1; - - pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n", - csa); - - /* First try to allocate our 64K pages. We need 5 of them - * with the current implementation. In the future, we should try - * to separate the lscsa with the actual local store image, thus - * allowing us to require only 4 64K pages per context - */ - for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) { - /* XXX This is likely to fail, we should use a special pool - * similar to what hugetlbfs does. - */ - csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL, - SPU_64K_PAGE_ORDER); - if (csa->lscsa_pages[i] == NULL) - goto fail; - } - - pr_debug(" success ! creating vmap...\n"); - - /* Now we need to create a vmalloc mapping of these for the kernel - * and SPU context switch code to use. Currently, we stick to a - * normal kernel vmalloc mapping, which in our case will be 4K - */ - n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES; - pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL); - if (pgarray == NULL) - goto fail; - for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) - for (j = 0; j < SPU_64K_PAGE_COUNT; j++) - /* We assume all the struct page's are contiguous - * which should be hopefully the case for an order 4 - * allocation.. - */ - pgarray[i * SPU_64K_PAGE_COUNT + j] = - csa->lscsa_pages[i] + j; - csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL); - kfree(pgarray); - if (csa->lscsa == NULL) - goto fail; - - memset(csa->lscsa, 0, sizeof(struct spu_lscsa)); - - /* Set LS pages reserved to allow for user-space mapping. - * - * XXX isn't that a bit obsolete ? I think we should just - * make sure the page count is high enough. Anyway, won't harm - * for now - */ - for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) - SetPageReserved(vmalloc_to_page(p)); - - pr_debug(" all good !\n"); - - return 0; -fail: - pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n"); - spu_free_lscsa(csa); - return spu_alloc_lscsa_std(csa); -} - -void spu_free_lscsa(struct spu_state *csa) -{ - unsigned char *p; - int i; - - if (!csa->use_big_pages) { - spu_free_lscsa_std(csa); - return; - } - csa->use_big_pages = 0; - - if (csa->lscsa == NULL) - goto free_pages; - - for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) - ClearPageReserved(vmalloc_to_page(p)); - - vunmap(csa->lscsa); - csa->lscsa = NULL; - - free_pages: - - for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) - if (csa->lscsa_pages[i]) - __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER); -} - -#else /* CONFIG_SPU_FS_64K_LS */ - -int spu_alloc_lscsa(struct spu_state *csa) -{ - return spu_alloc_lscsa_std(csa); -} - -void spu_free_lscsa(struct spu_state *csa) -{ - spu_free_lscsa_std(csa); -} - -#endif /* !defined(CONFIG_SPU_FS_64K_LS) */ diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index a8f49d380449..d000f4e21981 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -35,9 +35,134 @@ struct OpalHmiEvtNode { struct list_head list; struct OpalHMIEvent hmi_evt; }; + +struct xstop_reason { + uint32_t xstop_reason; + const char *unit_failed; + const char *description; +}; + static LIST_HEAD(opal_hmi_evt_list); static DEFINE_SPINLOCK(opal_hmi_evt_lock); +static void print_core_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + int i; + static const struct xstop_reason xstop_reason[] = { + { CORE_CHECKSTOP_IFU_REGFILE, "IFU", + "RegFile core check stop" }, + { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_DURING_RECOV, "PC", + "Core checkstop during recovery" }, + { CORE_CHECKSTOP_ISU_REGFILE, "ISU", + "RegFile core check stop (mapper error)" }, + { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" }, + { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" }, + { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC", + "Recovery in maintenance mode" }, + { CORE_CHECKSTOP_LSU_REGFILE, "LSU", + "RegFile core check stop" }, + { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC", + "Forward Progress Error" }, + { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC", + "Hypervisor Resource error - core check stop" }, + { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC", + "Hang Recovery Failed (core check stop)" }, + { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC", + "Ambiguous Hang Detected (unknown source)" }, + { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC", + "Debug Trigger Error inject" }, + { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC", + "Hypervisor check stop via SPRC/SPRD" }, + }; + + /* Validity check */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s Unknown Core check stop.\n", level); + return; + } + + printk("%s CPU PIR: %08x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.pir)); + for (i = 0; i < ARRAY_SIZE(xstop_reason); i++) + if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) & + xstop_reason[i].xstop_reason) + printk("%s [Unit: %-3s] %s\n", level, + xstop_reason[i].unit_failed, + xstop_reason[i].description); +} + +static void print_nx_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + int i; + static const struct xstop_reason xstop_reason[] = { + { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine", + "SHM invalid state error" }, + { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine", + "DMA invalid state error bit 15" }, + { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine", + "DMA invalid state error bit 16" }, + { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine", + "Channel 0 invalid state error" }, + { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine", + "Channel 1 invalid state error" }, + { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine", + "Channel 2 invalid state error" }, + { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine", + "Channel 3 invalid state error" }, + { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine", + "Channel 4 invalid state error" }, + { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine", + "Channel 5 invalid state error" }, + { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine", + "Channel 6 invalid state error" }, + { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine", + "Channel 7 invalid state error" }, + { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine", + "UE error on CRB(CSB address, CCB)" }, + { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine", + "SUE error on CRB(CSB address, CCB)" }, + { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface", + "CRB Kill ISN received while holding ISN with UE error" }, + }; + + /* Validity check */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s Unknown NX check stop.\n", level); + return; + } + + printk("%s NX checkstop on CHIP ID: %x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id)); + for (i = 0; i < ARRAY_SIZE(xstop_reason); i++) + if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) & + xstop_reason[i].xstop_reason) + printk("%s [Unit: %-3s] %s\n", level, + xstop_reason[i].unit_failed, + xstop_reason[i].description); +} + +static void print_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + switch (hmi_evt->u.xstop_error.xstop_type) { + case CHECKSTOP_TYPE_CORE: + print_core_checkstop_reason(level, hmi_evt); + break; + case CHECKSTOP_TYPE_NX: + print_nx_checkstop_reason(level, hmi_evt); + break; + case CHECKSTOP_TYPE_UNKNOWN: + printk("%s Unknown Malfunction Alert.\n", level); + break; + } +} + static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) { const char *level, *sevstr, *error_info; @@ -95,6 +220,13 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) printk("%s TFMR: %016llx\n", level, be64_to_cpu(hmi_evt->tfmr)); + + if (hmi_evt->version < OpalHMIEvt_V2) + return; + + /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */ + if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT) + print_checkstop_reason(level, hmi_evt); } static void hmi_event_handler(struct work_struct *work) @@ -103,6 +235,8 @@ static void hmi_event_handler(struct work_struct *work) struct OpalHMIEvent *hmi_evt; struct OpalHmiEvtNode *msg_node; uint8_t disposition; + struct opal_msg msg; + int unrecoverable = 0; spin_lock_irqsave(&opal_hmi_evt_lock, flags); while (!list_empty(&opal_hmi_evt_list)) { @@ -118,14 +252,53 @@ static void hmi_event_handler(struct work_struct *work) /* * Check if HMI event has been recovered or not. If not - * then we can't continue, invoke panic. + * then kernel can't continue, we need to panic. + * But before we do that, display all the HMI event + * available on the list and set unrecoverable flag to 1. */ if (disposition != OpalHMI_DISPOSITION_RECOVERED) - panic("Unrecoverable HMI exception"); + unrecoverable = 1; spin_lock_irqsave(&opal_hmi_evt_lock, flags); } spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + if (unrecoverable) { + int ret; + + /* Pull all HMI events from OPAL before we panic. */ + while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) { + u32 type; + + type = be32_to_cpu(msg.msg_type); + + /* skip if not HMI event */ + if (type != OPAL_MSG_HMI_EVT) + continue; + + /* HMI event info starts from param[0] */ + hmi_evt = (struct OpalHMIEvent *)&msg.params[0]; + print_hmi_event_info(hmi_evt); + } + + /* + * Unrecoverable HMI exception. We need to inform BMC/OCC + * about this error so that it can collect relevant data + * for error analysis before rebooting. + */ + ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, + "Unrecoverable HMI exception"); + if (ret == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported\n", + OPAL_REBOOT_PLATFORM_ERROR); + } + + /* + * Fall through and panic if opal_cec_reboot2() returns + * OPAL_UNSUPPORTED. + */ + panic("Unrecoverable HMI exception"); + } } static DECLARE_WORK(hmi_event_work, hmi_event_handler); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 88e433357371..b7a464fef7a7 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -202,6 +202,7 @@ OPAL_CALL(opal_rtc_read, OPAL_RTC_READ); OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE); OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN); OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT); +OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2); OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM); OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM); OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT); @@ -298,3 +299,5 @@ OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); +OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); +OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index f084afa0e3ba..230f3a7cdea4 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -441,6 +441,7 @@ static int opal_recover_mce(struct pt_regs *regs, int opal_machine_check(struct pt_regs *regs) { struct machine_check_event evt; + int ret; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return 0; @@ -455,6 +456,40 @@ int opal_machine_check(struct pt_regs *regs) if (opal_recover_mce(regs, &evt)) return 1; + + /* + * Unrecovered machine check, we are heading to panic path. + * + * We may have hit this MCE in very early stage of kernel + * initialization even before opal-prd has started running. If + * this is the case then this MCE error may go un-noticed or + * un-analyzed if we go down panic path. We need to inform + * BMC/OCC about this error so that they can collect relevant + * data for error analysis before rebooting. + * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so. + * This function may not return on BMC based system. + */ + ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, + "Unrecoverable Machine Check exception"); + if (ret == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported\n", + OPAL_REBOOT_PLATFORM_ERROR); + } + + /* + * We reached here. There can be three possibilities: + * 1. We are running on a firmware level that do not support + * opal_cec_reboot2() + * 2. We are running on a firmware level that do not support + * OPAL_REBOOT_PLATFORM_ERROR reboot type. + * 3. We are running on FSP based system that does not need opal + * to trigger checkstop explicitly for error analysis. The FSP + * PRD component would have already got notified about this + * error through other channels. + * + * In any case, let us just fall through. We anyway heading + * down to panic path. + */ return 0; } @@ -648,7 +683,7 @@ static void opal_init_heartbeat(void) static int __init opal_init(void) { - struct device_node *np, *consoles; + struct device_node *np, *consoles, *leds; int rc; opal_node = of_find_node_by_path("/ibm,opal"); @@ -689,6 +724,13 @@ static int __init opal_init(void) /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); + /* Create leds platform devices */ + leds = of_find_node_by_path("/ibm,opal/leds"); + if (leds) { + of_platform_device_create(leds, "opal_leds", NULL); + of_node_put(leds); + } + /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { @@ -841,3 +883,6 @@ EXPORT_SYMBOL_GPL(opal_rtc_write); EXPORT_SYMBOL_GPL(opal_tpo_read); EXPORT_SYMBOL_GPL(opal_tpo_write); EXPORT_SYMBOL_GPL(opal_i2c_request); +/* Export these symbols for PowerNV LED class driver */ +EXPORT_SYMBOL_GPL(opal_leds_get_ind); +EXPORT_SYMBOL_GPL(opal_leds_set_ind); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fdafbaccafbe..4b8d3bd72a78 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -915,8 +915,9 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) res2 = *res; res->start += size * offset; - dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n", - i, &res2, res, num_vfs, offset); + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", + i, &res2, res, (offset > 0) ? "En" : "Dis", + num_vfs, offset); pci_update_resource(dev, i + PCI_IOV_RESOURCES); } return 0; @@ -1559,6 +1560,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); + set_dma_offset(&pdev->dev, pe->tce_bypass_base); set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); /* * Note: iommu_add_device() will fail here as @@ -1597,9 +1599,10 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) return 0; } -static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, - struct pci_dev *pdev) +static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; u64 end, mask; @@ -3024,6 +3027,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .dma_set_mask = pnv_pci_ioda_dma_set_mask, + .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, .shutdown = pnv_pci_ioda_shutdown, }; @@ -3170,7 +3174,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; - phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 765d8ed558d0..3e7f6fda00cf 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -761,17 +761,6 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) phb->dma_dev_setup(phb, pdev); } -u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - - if (phb && phb->dma_get_required_mask) - return phb->dma_get_required_mask(phb, pdev); - - return __dma_get_required_mask(&pdev->dev); -} - void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index e891ff48d7e6..c8ff50e90766 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -105,8 +105,6 @@ struct pnv_phb { unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg); void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); - u64 (*dma_get_required_mask)(struct pnv_phb *phb, - struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); int (*init_m64)(struct pnv_phb *phb); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 9269e30e4ca0..6dbc0a1da1f6 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -12,15 +12,9 @@ struct pci_dev; #ifdef CONFIG_PCI extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); -extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } - -static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) -{ - return 0; -} #endif extern u32 pnv_get_supported_cpuidle_states(void); diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 6eb808ff637e..5dcbdea1afac 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -128,7 +128,7 @@ static __init int rng_create(struct device_node *dn) pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_long = powernv_get_random_long; + ppc_md.get_random_seed = powernv_get_random_long; return 0; } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 53737e019ae3..685b3cbe1362 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -165,14 +165,6 @@ static void pnv_progress(char *s, unsigned short hex) { } -static u64 pnv_dma_get_required_mask(struct device *dev) -{ - if (dev_is_pci(dev)) - return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); - - return __dma_get_required_mask(dev); -} - static void pnv_shutdown(void) { /* Let the PCI code clear up IODA tables */ @@ -243,6 +235,13 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) } else { /* Primary waits for the secondaries to have reached OPAL */ pnv_kexec_wait_secondaries_down(); + + /* + * We might be running as little-endian - now that interrupts + * are disabled, reset the HILE bit to big-endian so we don't + * take interrupts in the wrong endian later + */ + opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); } } #endif /* CONFIG_KEXEC */ @@ -314,7 +313,6 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, - .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, #endif diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index f60f80ada903..503a73f59359 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -190,7 +190,7 @@ static void unsplit_core(void) hid0 = mfspr(SPRN_HID0); hid0 &= ~HID0_POWER8_DYNLPARDIS; - mtspr(SPRN_HID0, hid0); + update_power8_hid0(hid0); update_hid_in_slw(hid0); while (mfspr(SPRN_HID0) & mask) @@ -227,7 +227,7 @@ static void split_core(int new_mode) /* Write new mode */ hid0 = mfspr(SPRN_HID0); hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; - mtspr(SPRN_HID0, hid0); + update_power8_hid0(hid0); update_hid_in_slw(hid0); /* Wait for it to happen */ diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 0ced387e1463..e9ff44cd5d86 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -92,13 +92,12 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn) return NULL; new_prop->name = kstrdup(prop->name, GFP_KERNEL); - new_prop->value = kmalloc(prop->length, GFP_KERNEL); + new_prop->value = kmemdup(prop->value, prop->length, GFP_KERNEL); if (!new_prop->name || !new_prop->value) { dlpar_free_drconf_property(new_prop); return NULL; } - memcpy(new_prop->value, prop->value, prop->length); new_prop->length = prop->length; /* Convert the property to cpu endian-ness */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 02e4a1745516..3b6647e574b6 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -189,7 +189,8 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) int state; int critical; - status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); + status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, + &state); if (state > 3) critical = 1; /* Time Critical */ diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index e09608770909..31ca557af60b 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -38,7 +38,7 @@ static __init int rng_init(void) pr_info("Registering arch random hook.\n"); - ppc_md.get_random_long = pseries_get_random_long; + ppc_md.get_random_seed = pseries_get_random_long; return 0; } diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index 87f9623ca805..502f08cfce61 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -132,7 +132,7 @@ static int hsta_msi_probe(struct platform_device *pdev) struct pci_controller *phb; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (IS_ERR(mem)) { + if (!mem) { dev_err(dev, "Unable to get mmio space\n"); return -EINVAL; } @@ -157,7 +157,7 @@ static int hsta_msi_probe(struct platform_device *pdev) goto out; ppc4xx_hsta_msi.irq_map = kmalloc(sizeof(int) * irq_count, GFP_KERNEL); - if (IS_ERR(ppc4xx_hsta_msi.irq_map)) { + if (!ppc4xx_hsta_msi.irq_map) { ret = -ENOMEM; goto out1; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e599259d84fc..6ef1231c6e9c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1987,7 +1987,6 @@ memex(void) case '^': adrs -= size; break; - break; case '/': if (nslash > 0) adrs -= 1 << nslash; @@ -2731,7 +2730,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid, void dump_segments(void) { int i; - unsigned long esid,vsid,valid; + unsigned long esid,vsid; unsigned long llp; printf("SLB contents of cpu 0x%x\n", smp_processor_id()); @@ -2739,10 +2738,9 @@ void dump_segments(void) for (i = 0; i < mmu_slb_size; i++) { asm volatile("slbmfee %0,%1" : "=r" (esid) : "r" (i)); asm volatile("slbmfev %0,%1" : "=r" (vsid) : "r" (i)); - valid = (esid & SLB_ESID_V); - if (valid | esid | vsid) { + if (esid || vsid) { printf("%02d %016lx %016lx", i, esid, vsid); - if (valid) { + if (esid & SLB_ESID_V) { llp = vsid & SLB_VSID_LLP; if (vsid & SLB_VSID_B_1T) { printf(" 1T ESID=%9lx VSID=%13lx LLP:%3lx \n", diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 9ad35f72ab4c..f218cc3acc10 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -560,6 +560,17 @@ config LEDS_BLINKM This option enables support for the BlinkM RGB LED connected through I2C. Say Y to enable support for the BlinkM LED. +config LEDS_POWERNV + tristate "LED support for PowerNV Platform" + depends on LEDS_CLASS + depends on PPC_POWERNV + depends on OF + help + This option enables support for the system LEDs present on + PowerNV platforms. Say 'y' to enable this support in kernel. + To compile this driver as a module, choose 'm' here: the module + will be called leds-powernv. + config LEDS_SYSCON bool "LED support for LEDs on system controllers" depends on LEDS_CLASS=y diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 8d6a24a2f513..6a943d16ecab 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_LEDS_VERSATILE) += leds-versatile.o obj-$(CONFIG_LEDS_MENF21BMC) += leds-menf21bmc.o obj-$(CONFIG_LEDS_PM8941_WLED) += leds-pm8941-wled.o obj-$(CONFIG_LEDS_KTD2692) += leds-ktd2692.o +obj-$(CONFIG_LEDS_POWERNV) += leds-powernv.o # LED SPI Drivers obj-$(CONFIG_LEDS_DAC124S085) += leds-dac124s085.o diff --git a/drivers/leds/leds-powernv.c b/drivers/leds/leds-powernv.c new file mode 100644 index 000000000000..2c5c5b12ab64 --- /dev/null +++ b/drivers/leds/leds-powernv.c @@ -0,0 +1,345 @@ +/* + * PowerNV LED Driver + * + * Copyright IBM Corp. 2015 + * + * Author: Vasant Hegde <[email protected]> + * Author: Anshuman Khandual <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/leds.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include <asm/opal.h> + +/* Map LED type to description. */ +struct led_type_map { + const int type; + const char *desc; +}; +static const struct led_type_map led_type_map[] = { + {OPAL_SLOT_LED_TYPE_ID, "identify"}, + {OPAL_SLOT_LED_TYPE_FAULT, "fault"}, + {OPAL_SLOT_LED_TYPE_ATTN, "attention"}, + {-1, NULL}, +}; + +struct powernv_led_common { + /* + * By default unload path resets all the LEDs. But on PowerNV + * platform we want to retain LED state across reboot as these + * are controlled by firmware. Also service processor can modify + * the LEDs independent of OS. Hence avoid resetting LEDs in + * unload path. + */ + bool led_disabled; + + /* Max supported LED type */ + __be64 max_led_type; + + /* glabal lock */ + struct mutex lock; +}; + +/* PowerNV LED data */ +struct powernv_led_data { + struct led_classdev cdev; + char *loc_code; /* LED location code */ + int led_type; /* OPAL_SLOT_LED_TYPE_* */ + + struct powernv_led_common *common; +}; + + +/* Returns OPAL_SLOT_LED_TYPE_* for given led type string */ +static int powernv_get_led_type(const char *led_type_desc) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(led_type_map); i++) + if (!strcmp(led_type_map[i].desc, led_type_desc)) + return led_type_map[i].type; + + return -1; +} + +/* + * This commits the state change of the requested LED through an OPAL call. + * This function is called from work queue task context when ever it gets + * scheduled. This function can sleep at opal_async_wait_response call. + */ +static void powernv_led_set(struct powernv_led_data *powernv_led, + enum led_brightness value) +{ + int rc, token; + u64 led_mask, led_value = 0; + __be64 max_type; + struct opal_msg msg; + struct device *dev = powernv_led->cdev.dev; + struct powernv_led_common *powernv_led_common = powernv_led->common; + + /* Prepare for the OPAL call */ + max_type = powernv_led_common->max_led_type; + led_mask = OPAL_SLOT_LED_STATE_ON << powernv_led->led_type; + if (value) + led_value = led_mask; + + /* OPAL async call */ + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + dev_err(dev, "%s: Couldn't get OPAL async token\n", + __func__); + return; + } + + rc = opal_leds_set_ind(token, powernv_led->loc_code, + led_mask, led_value, &max_type); + if (rc != OPAL_ASYNC_COMPLETION) { + dev_err(dev, "%s: OPAL set LED call failed for %s [rc=%d]\n", + __func__, powernv_led->loc_code, rc); + goto out_token; + } + + rc = opal_async_wait_response(token, &msg); + if (rc) { + dev_err(dev, + "%s: Failed to wait for the async response [rc=%d]\n", + __func__, rc); + goto out_token; + } + + rc = be64_to_cpu(msg.params[1]); + if (rc != OPAL_SUCCESS) + dev_err(dev, "%s : OAPL async call returned failed [rc=%d]\n", + __func__, rc); + +out_token: + opal_async_release_token(token); +} + +/* + * This function fetches the LED state for a given LED type for + * mentioned LED classdev structure. + */ +static enum led_brightness powernv_led_get(struct powernv_led_data *powernv_led) +{ + int rc; + __be64 mask, value, max_type; + u64 led_mask, led_value; + struct device *dev = powernv_led->cdev.dev; + struct powernv_led_common *powernv_led_common = powernv_led->common; + + /* Fetch all LED status */ + mask = cpu_to_be64(0); + value = cpu_to_be64(0); + max_type = powernv_led_common->max_led_type; + + rc = opal_leds_get_ind(powernv_led->loc_code, + &mask, &value, &max_type); + if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) { + dev_err(dev, "%s: OPAL get led call failed [rc=%d]\n", + __func__, rc); + return LED_OFF; + } + + led_mask = be64_to_cpu(mask); + led_value = be64_to_cpu(value); + + /* LED status available */ + if (!((led_mask >> powernv_led->led_type) & OPAL_SLOT_LED_STATE_ON)) { + dev_err(dev, "%s: LED status not available for %s\n", + __func__, powernv_led->cdev.name); + return LED_OFF; + } + + /* LED status value */ + if ((led_value >> powernv_led->led_type) & OPAL_SLOT_LED_STATE_ON) + return LED_FULL; + + return LED_OFF; +} + +/* + * LED classdev 'brightness_get' function. This schedules work + * to update LED state. + */ +static void powernv_brightness_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct powernv_led_data *powernv_led = + container_of(led_cdev, struct powernv_led_data, cdev); + struct powernv_led_common *powernv_led_common = powernv_led->common; + + /* Do not modify LED in unload path */ + if (powernv_led_common->led_disabled) + return; + + mutex_lock(&powernv_led_common->lock); + powernv_led_set(powernv_led, value); + mutex_unlock(&powernv_led_common->lock); +} + +/* LED classdev 'brightness_get' function */ +static enum led_brightness powernv_brightness_get(struct led_classdev *led_cdev) +{ + struct powernv_led_data *powernv_led = + container_of(led_cdev, struct powernv_led_data, cdev); + + return powernv_led_get(powernv_led); +} + +/* + * This function registers classdev structure for any given type of LED on + * a given child LED device node. + */ +static int powernv_led_create(struct device *dev, + struct powernv_led_data *powernv_led, + const char *led_type_desc) +{ + int rc; + + /* Make sure LED type is supported */ + powernv_led->led_type = powernv_get_led_type(led_type_desc); + if (powernv_led->led_type == -1) { + dev_warn(dev, "%s: No support for led type : %s\n", + __func__, led_type_desc); + return -EINVAL; + } + + /* Create the name for classdev */ + powernv_led->cdev.name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", + powernv_led->loc_code, + led_type_desc); + if (!powernv_led->cdev.name) { + dev_err(dev, + "%s: Memory allocation failed for classdev name\n", + __func__); + return -ENOMEM; + } + + powernv_led->cdev.brightness_set = powernv_brightness_set; + powernv_led->cdev.brightness_get = powernv_brightness_get; + powernv_led->cdev.brightness = LED_OFF; + powernv_led->cdev.max_brightness = LED_FULL; + + /* Register the classdev */ + rc = devm_led_classdev_register(dev, &powernv_led->cdev); + if (rc) { + dev_err(dev, "%s: Classdev registration failed for %s\n", + __func__, powernv_led->cdev.name); + } + + return rc; +} + +/* Go through LED device tree node and register LED classdev structure */ +static int powernv_led_classdev(struct platform_device *pdev, + struct device_node *led_node, + struct powernv_led_common *powernv_led_common) +{ + const char *cur = NULL; + int rc = -1; + struct property *p; + struct device_node *np; + struct powernv_led_data *powernv_led; + struct device *dev = &pdev->dev; + + for_each_child_of_node(led_node, np) { + p = of_find_property(np, "led-types", NULL); + if (!p) + continue; + + while ((cur = of_prop_next_string(p, cur)) != NULL) { + powernv_led = devm_kzalloc(dev, sizeof(*powernv_led), + GFP_KERNEL); + if (!powernv_led) + return -ENOMEM; + + powernv_led->common = powernv_led_common; + powernv_led->loc_code = (char *)np->name; + + rc = powernv_led_create(dev, powernv_led, cur); + if (rc) + return rc; + } /* while end */ + } + + return rc; +} + +/* Platform driver probe */ +static int powernv_led_probe(struct platform_device *pdev) +{ + struct device_node *led_node; + struct powernv_led_common *powernv_led_common; + struct device *dev = &pdev->dev; + + led_node = of_find_node_by_path("/ibm,opal/leds"); + if (!led_node) { + dev_err(dev, "%s: LED parent device node not found\n", + __func__); + return -EINVAL; + } + + powernv_led_common = devm_kzalloc(dev, sizeof(*powernv_led_common), + GFP_KERNEL); + if (!powernv_led_common) + return -ENOMEM; + + mutex_init(&powernv_led_common->lock); + powernv_led_common->max_led_type = cpu_to_be64(OPAL_SLOT_LED_TYPE_MAX); + + platform_set_drvdata(pdev, powernv_led_common); + + return powernv_led_classdev(pdev, led_node, powernv_led_common); +} + +/* Platform driver remove */ +static int powernv_led_remove(struct platform_device *pdev) +{ + struct powernv_led_common *powernv_led_common; + + /* Disable LED operation */ + powernv_led_common = platform_get_drvdata(pdev); + powernv_led_common->led_disabled = true; + + /* Destroy lock */ + mutex_destroy(&powernv_led_common->lock); + + dev_info(&pdev->dev, "PowerNV led module unregistered\n"); + return 0; +} + +/* Platform driver property match */ +static const struct of_device_id powernv_led_match[] = { + { + .compatible = "ibm,opal-v3-led", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, powernv_led_match); + +static struct platform_driver powernv_led_driver = { + .probe = powernv_led_probe, + .remove = powernv_led_remove, + .driver = { + .name = "powernv-led-driver", + .of_match_table = powernv_led_match, + }, +}; + +module_platform_driver(powernv_led_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("PowerNV LED driver"); +MODULE_AUTHOR("Vasant Hegde <[email protected]>"); diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 109dcaa15934..68dcbcb4fc5b 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -408,6 +408,7 @@ static const struct i2c_device_id therm_windtunnel_id[] = { { "therm_adm1030", adm1030 }, { } }; +MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id); static int do_probe(struct i2c_client *cl, const struct i2c_device_id *id) @@ -459,6 +460,7 @@ static const struct of_device_id therm_of_match[] = {{ .compatible = "adm1030" }, {} }; +MODULE_DEVICE_TABLE(of, therm_of_match); static struct platform_driver therm_of_driver = { .driver = { diff --git a/drivers/macintosh/windfarm.h b/drivers/macintosh/windfarm.h index 028cdac2d33d..901c42f71b5a 100644 --- a/drivers/macintosh/windfarm.h +++ b/drivers/macintosh/windfarm.h @@ -53,11 +53,9 @@ struct wf_control { * the kref and wf_unregister_control will decrement it, thus the * object creating/disposing a given control shouldn't assume it * still exists after wf_unregister_control has been called. - * wf_find_control will inc the refcount for you */ extern int wf_register_control(struct wf_control *ct); extern void wf_unregister_control(struct wf_control *ct); -extern struct wf_control * wf_find_control(const char *name); extern int wf_get_control(struct wf_control *ct); extern void wf_put_control(struct wf_control *ct); @@ -117,7 +115,6 @@ struct wf_sensor { /* Same lifetime rules as controls */ extern int wf_register_sensor(struct wf_sensor *sr); extern void wf_unregister_sensor(struct wf_sensor *sr); -extern struct wf_sensor * wf_find_sensor(const char *name); extern int wf_get_sensor(struct wf_sensor *sr); extern void wf_put_sensor(struct wf_sensor *sr); @@ -144,7 +141,6 @@ extern int wf_unregister_client(struct notifier_block *nb); /* Overtemp conditions. Those are refcounted */ extern void wf_set_overtemp(void); extern void wf_clear_overtemp(void); -extern int wf_is_overtemp(void); #define WF_EVENT_NEW_CONTROL 0 /* param is wf_control * */ #define WF_EVENT_NEW_SENSOR 1 /* param is wf_sensor * */ diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 3ee198b65843..465d770ab0bb 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -72,7 +72,7 @@ static inline void wf_notify(int event, void *param) blocking_notifier_call_chain(&wf_client_list, event, param); } -int wf_critical_overtemp(void) +static int wf_critical_overtemp(void) { static char * critical_overtemp_path = "/sbin/critical_overtemp"; char *argv[] = { critical_overtemp_path, NULL }; @@ -84,7 +84,6 @@ int wf_critical_overtemp(void) return call_usermodehelper(critical_overtemp_path, argv, envp, UMH_WAIT_EXEC); } -EXPORT_SYMBOL_GPL(wf_critical_overtemp); static int wf_thread_func(void *data) { @@ -255,24 +254,6 @@ void wf_unregister_control(struct wf_control *ct) } EXPORT_SYMBOL_GPL(wf_unregister_control); -struct wf_control * wf_find_control(const char *name) -{ - struct wf_control *ct; - - mutex_lock(&wf_lock); - list_for_each_entry(ct, &wf_controls, link) { - if (!strcmp(ct->name, name)) { - if (wf_get_control(ct)) - ct = NULL; - mutex_unlock(&wf_lock); - return ct; - } - } - mutex_unlock(&wf_lock); - return NULL; -} -EXPORT_SYMBOL_GPL(wf_find_control); - int wf_get_control(struct wf_control *ct) { if (!try_module_get(ct->ops->owner)) @@ -368,24 +349,6 @@ void wf_unregister_sensor(struct wf_sensor *sr) } EXPORT_SYMBOL_GPL(wf_unregister_sensor); -struct wf_sensor * wf_find_sensor(const char *name) -{ - struct wf_sensor *sr; - - mutex_lock(&wf_lock); - list_for_each_entry(sr, &wf_sensors, link) { - if (!strcmp(sr->name, name)) { - if (wf_get_sensor(sr)) - sr = NULL; - mutex_unlock(&wf_lock); - return sr; - } - } - mutex_unlock(&wf_lock); - return NULL; -} -EXPORT_SYMBOL_GPL(wf_find_sensor); - int wf_get_sensor(struct wf_sensor *sr) { if (!try_module_get(sr->ops->owner)) @@ -435,7 +398,7 @@ int wf_unregister_client(struct notifier_block *nb) { mutex_lock(&wf_lock); blocking_notifier_chain_unregister(&wf_client_list, nb); - wf_client_count++; + wf_client_count--; if (wf_client_count == 0) wf_stop_thread(); mutex_unlock(&wf_lock); @@ -474,12 +437,6 @@ void wf_clear_overtemp(void) } EXPORT_SYMBOL_GPL(wf_clear_overtemp); -int wf_is_overtemp(void) -{ - return (wf_overtemp != 0); -} -EXPORT_SYMBOL_GPL(wf_is_overtemp); - static int __init windfarm_core_init(void) { DBG("wf: core loaded\n"); diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index b6db9ebd52c2..8756d06e2bb8 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -11,11 +11,16 @@ config CXL_KERNEL_API bool default n +config CXL_EEH + bool + default n + config CXL tristate "Support for IBM Coherent Accelerators (CXL)" - depends on PPC_POWERNV && PCI_MSI + depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE select CXL_KERNEL_API + select CXL_EEH default m help Select this option to enable driver support for IBM Coherent diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 14e3f8219a11..6f484dfe78f9 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -1,3 +1,5 @@ +ccflags-y := -Werror + cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o cxl-y += vphb.o api.o diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 729e0851167d..f49e3e5db58d 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -59,7 +59,7 @@ EXPORT_SYMBOL_GPL(cxl_get_phys_dev); int cxl_release_context(struct cxl_context *ctx) { - if (ctx->status != CLOSED) + if (ctx->status >= STARTED) return -EBUSY; put_device(&ctx->afu->dev); @@ -327,3 +327,10 @@ int cxl_afu_reset(struct cxl_context *ctx) return cxl_afu_check_and_enable(afu); } EXPORT_SYMBOL_GPL(cxl_afu_reset); + +void cxl_perst_reloads_same_image(struct cxl_afu *afu, + bool perst_reloads_same_image) +{ + afu->adapter->perst_same_image = perst_reloads_same_image; +} +EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 1287148629c0..941fda04aa9a 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -126,6 +126,18 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->status != STARTED) { mutex_unlock(&ctx->status_mutex); pr_devel("%s: Context not started, failing problem state access\n", __func__); + if (ctx->mmio_err_ff) { + if (!ctx->ff_page) { + ctx->ff_page = alloc_page(GFP_USER); + if (!ctx->ff_page) + return VM_FAULT_OOM; + memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE); + } + get_page(ctx->ff_page); + vmf->page = ctx->ff_page; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + return 0; + } return VM_FAULT_SIGBUS; } @@ -193,7 +205,11 @@ int __detach_context(struct cxl_context *ctx) if (status != STARTED) return -EBUSY; - WARN_ON(cxl_detach_process(ctx)); + /* Only warn if we detached while the link was OK. + * If detach fails when hw is down, we don't care. + */ + WARN_ON(cxl_detach_process(ctx) && + cxl_adapter_link_ok(ctx->afu->adapter)); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ put_pid(ctx->pid); cxl_ctx_put(); @@ -253,6 +269,8 @@ static void reclaim_ctx(struct rcu_head *rcu) struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu); free_page((u64)ctx->sstp); + if (ctx->ff_page) + __free_page(ctx->ff_page); ctx->sstp = NULL; kfree(ctx); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 4fd66cabde1e..e7af256f60c5 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -34,7 +34,7 @@ extern uint cxl_verbose; * Bump version each time a user API change is made, whether it is * backwards compatible ot not. */ -#define CXL_API_VERSION 1 +#define CXL_API_VERSION 2 #define CXL_API_VERSION_COMPATIBLE 1 /* @@ -418,6 +418,8 @@ struct cxl_context { /* Used to unmap any mmaps when force detaching */ struct address_space *mapping; struct mutex mapping_lock; + struct page *ff_page; + bool mmio_err_ff; spinlock_t sste_lock; /* Protects segment table entries */ struct cxl_sste *sstp; @@ -493,6 +495,7 @@ struct cxl { bool user_image_loaded; bool perst_loads_image; bool perst_select_user; + bool perst_same_image; }; int cxl_alloc_one_irq(struct cxl *adapter); @@ -531,16 +534,33 @@ struct cxl_process_element { __be32 software_state; } __packed; +static inline bool cxl_adapter_link_ok(struct cxl *cxl) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(cxl->dev.parent); + return !pci_channel_offline(pdev); +} + static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) { WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); return cxl->p1_mmio + cxl_reg_off(reg); } -#define cxl_p1_write(cxl, reg, val) \ - out_be64(_cxl_p1_addr(cxl, reg), val) -#define cxl_p1_read(cxl, reg) \ - in_be64(_cxl_p1_addr(cxl, reg)) +static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(cxl))) + out_be64(_cxl_p1_addr(cxl, reg), val); +} + +static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(cxl))) + return in_be64(_cxl_p1_addr(cxl, reg)); + else + return ~0ULL; +} static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) { @@ -548,26 +568,56 @@ static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg return afu->p1n_mmio + cxl_reg_off(reg); } -#define cxl_p1n_write(afu, reg, val) \ - out_be64(_cxl_p1n_addr(afu, reg), val) -#define cxl_p1n_read(afu, reg) \ - in_be64(_cxl_p1n_addr(afu, reg)) +static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + out_be64(_cxl_p1n_addr(afu, reg), val); +} + +static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_be64(_cxl_p1n_addr(afu, reg)); + else + return ~0ULL; +} static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg) { return afu->p2n_mmio + cxl_reg_off(reg); } -#define cxl_p2n_write(afu, reg, val) \ - out_be64(_cxl_p2n_addr(afu, reg), val) -#define cxl_p2n_read(afu, reg) \ - in_be64(_cxl_p2n_addr(afu, reg)) +static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + out_be64(_cxl_p2n_addr(afu, reg), val); +} + +static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_be64(_cxl_p2n_addr(afu, reg)); + else + return ~0ULL; +} +static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + + ((cr) * (afu)->crs_len) + (off)); + else + return ~0ULL; +} -#define cxl_afu_cr_read64(afu, cr, off) \ - in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) -#define cxl_afu_cr_read32(afu, cr, off) \ - in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) +static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off) +{ + if (likely(cxl_adapter_link_ok(afu->adapter))) + return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + + ((cr) * (afu)->crs_len) + (off)); + else + return 0xffffffff; +} u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); @@ -585,6 +635,9 @@ void unregister_cxl_calls(struct cxl_calls *calls); int cxl_alloc_adapter_nr(struct cxl *adapter); void cxl_remove_adapter_nr(struct cxl *adapter); +int cxl_alloc_spa(struct cxl_afu *afu); +void cxl_release_spa(struct cxl_afu *afu); + int cxl_file_init(void); void cxl_file_exit(void); int cxl_register_adapter(struct cxl *adapter); @@ -675,6 +728,7 @@ int cxl_psl_purge(struct cxl_afu *afu); void cxl_stop_trace(struct cxl *cxl); int cxl_pci_vphb_add(struct cxl_afu *afu); +void cxl_pci_vphb_reconfigure(struct cxl_afu *afu); void cxl_pci_vphb_remove(struct cxl_afu *afu); extern struct pci_driver cxl_pci_driver; diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c index 825c412580bc..18df6f44af2a 100644 --- a/drivers/misc/cxl/debugfs.c +++ b/drivers/misc/cxl/debugfs.c @@ -48,7 +48,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, "0x static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode, struct dentry *parent, u64 __iomem *value) { - return debugfs_create_file(name, mode, parent, (void *)value, &fops_io_x64); + return debugfs_create_file(name, mode, parent, (void __force *)value, &fops_io_x64); } int cxl_debugfs_adapter_add(struct cxl *adapter) diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index e3f4b69527a9..a30bf285b5bd 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -73,6 +73,11 @@ static int __afu_open(struct inode *inode, struct file *file, bool master) if (!afu->current_mode) goto err_put_afu; + if (!cxl_adapter_link_ok(adapter)) { + rc = -EIO; + goto err_put_afu; + } + if (!(ctx = cxl_context_alloc())) { rc = -ENOMEM; goto err_put_afu; @@ -179,6 +184,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, if (work.flags & CXL_START_WORK_AMR) amr = work.amr & mfspr(SPRN_UAMOR); + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); + /* * We grab the PID here and not in the file open to allow for the case * where a process (master, some daemon, etc) has opened the chardev on @@ -238,6 +245,9 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ctx->status == CLOSED) return -EIO; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + pr_devel("afu_ioctl\n"); switch (cmd) { case CXL_IOCTL_START_WORK: @@ -251,7 +261,7 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; } -long afu_compat_ioctl(struct file *file, unsigned int cmd, +static long afu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return afu_ioctl(file, cmd, arg); @@ -265,6 +275,9 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm) if (ctx->status != STARTED) return -EIO; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + return cxl_context_iomap(ctx, vm); } @@ -309,6 +322,9 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, int rc; DEFINE_WAIT(wait); + if (!cxl_adapter_link_ok(ctx->afu->adapter)) + return -EIO; + if (count < CXL_READ_MIN_SIZE) return -EINVAL; @@ -319,6 +335,11 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, if (ctx_event_pending(ctx)) break; + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + rc = -EIO; + goto out; + } + if (file->f_flags & O_NONBLOCK) { rc = -EAGAIN; goto out; @@ -396,7 +417,7 @@ const struct file_operations afu_fops = { .mmap = afu_mmap, }; -const struct file_operations afu_master_fops = { +static const struct file_operations afu_master_fops = { .owner = THIS_MODULE, .open = afu_master_open, .poll = afu_poll, @@ -519,7 +540,7 @@ int __init cxl_file_init(void) * If these change we really need to update API. Either change some * flags or update API version number CXL_API_VERSION. */ - BUILD_BUG_ON(CXL_API_VERSION != 1); + BUILD_BUG_ON(CXL_API_VERSION != 2); BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index abfce494ca6b..583b42afeda2 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -341,6 +341,9 @@ int cxl_register_psl_err_irq(struct cxl *adapter) void cxl_release_psl_err_irq(struct cxl *adapter) { + if (adapter->err_virq != irq_find_mapping(NULL, adapter->err_hwirq)) + return; + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); cxl_unmap_irq(adapter->err_virq, adapter); cxl_release_one_irq(adapter, adapter->err_hwirq); @@ -374,6 +377,9 @@ int cxl_register_serr_irq(struct cxl_afu *afu) void cxl_release_serr_irq(struct cxl_afu *afu) { + if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) + return; + cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); cxl_unmap_irq(afu->serr_virq, afu); cxl_release_one_irq(afu->adapter, afu->serr_hwirq); @@ -400,12 +406,15 @@ int cxl_register_psl_irq(struct cxl_afu *afu) void cxl_release_psl_irq(struct cxl_afu *afu) { + if (afu->psl_virq != irq_find_mapping(NULL, afu->psl_hwirq)) + return; + cxl_unmap_irq(afu->psl_virq, afu); cxl_release_one_irq(afu->adapter, afu->psl_hwirq); kfree(afu->psl_irq_name); } -void afu_irq_name_free(struct cxl_context *ctx) +static void afu_irq_name_free(struct cxl_context *ctx) { struct cxl_irq_name *irq_name, *tmp; @@ -421,6 +430,9 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) int rc, r, i, j = 1; struct cxl_irq_name *irq_name; + /* Initialize the list head to hold irq names */ + INIT_LIST_HEAD(&ctx->irq_names); + if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) return rc; @@ -432,13 +444,12 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), sizeof(*ctx->irq_bitmap), GFP_KERNEL); if (!ctx->irq_bitmap) - return -ENOMEM; + goto out; /* * Allocate names first. If any fail, bail out before allocating * actual hardware IRQs. */ - INIT_LIST_HEAD(&ctx->irq_names); for (r = 1; r < CXL_IRQ_RANGES; r++) { for (i = 0; i < ctx->irqs.range[r]; i++) { irq_name = kmalloc(sizeof(struct cxl_irq_name), @@ -460,11 +471,12 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) return 0; out: + cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); afu_irq_name_free(ctx); return -ENOMEM; } -void afu_register_hwirqs(struct cxl_context *ctx) +static void afu_register_hwirqs(struct cxl_context *ctx) { irq_hw_number_t hwirq; struct cxl_irq_name *irq_name; @@ -511,4 +523,8 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie) afu_irq_name_free(ctx); cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + + kfree(ctx->irq_bitmap); + ctx->irq_bitmap = NULL; + ctx->irq_count = 0; } diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index fc9310dd2367..b37f2e8004f5 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -41,6 +41,13 @@ static int afu_control(struct cxl_afu *afu, u64 command, rc = -EBUSY; goto out; } + + if (!cxl_adapter_link_ok(afu->adapter)) { + afu->enabled = enabled; + rc = -EIO; + goto out; + } + pr_devel_ratelimited("AFU control... (0x%016llx)\n", AFU_Cntl | command); cpu_relax(); @@ -85,6 +92,10 @@ int __cxl_afu_reset(struct cxl_afu *afu) int cxl_afu_check_and_enable(struct cxl_afu *afu) { + if (!cxl_adapter_link_ok(afu->adapter)) { + WARN(1, "Refusing to enable afu while link down!\n"); + return -EIO; + } if (afu->enabled) return 0; return afu_enable(afu); @@ -103,6 +114,12 @@ int cxl_psl_purge(struct cxl_afu *afu) pr_devel("PSL purge request\n"); + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); + rc = -EIO; + goto out; + } + if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { WARN(1, "psl_purge request while AFU not disabled!\n"); cxl_afu_disable(afu); @@ -119,6 +136,11 @@ int cxl_psl_purge(struct cxl_afu *afu) rc = -EBUSY; goto out; } + if (!cxl_adapter_link_ok(afu->adapter)) { + rc = -EIO; + goto out; + } + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx PSL_DSISR: 0x%016llx\n", PSL_CNTL, dsisr); if (dsisr & CXL_PSL_DSISR_TRANS) { @@ -161,10 +183,8 @@ static int spa_max_procs(int spa_size) return ((spa_size / 8) - 96) / 17; } -static int alloc_spa(struct cxl_afu *afu) +int cxl_alloc_spa(struct cxl_afu *afu) { - u64 spap; - /* Work out how many pages to allocate */ afu->spa_order = 0; do { @@ -183,6 +203,13 @@ static int alloc_spa(struct cxl_afu *afu) pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs); + return 0; +} + +static void attach_spa(struct cxl_afu *afu) +{ + u64 spap; + afu->sw_command_status = (__be64 *)((char *)afu->spa + ((afu->spa_max_procs + 3) * 128)); @@ -191,14 +218,19 @@ static int alloc_spa(struct cxl_afu *afu) spap |= CXL_PSL_SPAP_V; pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap); cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); - - return 0; } -static void release_spa(struct cxl_afu *afu) +static inline void detach_spa(struct cxl_afu *afu) { cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); - free_pages((unsigned long) afu->spa, afu->spa_order); +} + +void cxl_release_spa(struct cxl_afu *afu) +{ + if (afu->spa) { + free_pages((unsigned long) afu->spa, afu->spa_order); + afu->spa = NULL; + } } int cxl_tlb_slb_invalidate(struct cxl *adapter) @@ -215,6 +247,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); return -EBUSY; } + if (!cxl_adapter_link_ok(adapter)) + return -EIO; cpu_relax(); } @@ -224,6 +258,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); return -EBUSY; } + if (!cxl_adapter_link_ok(adapter)) + return -EIO; cpu_relax(); } return 0; @@ -240,6 +276,11 @@ int cxl_afu_slbia(struct cxl_afu *afu) dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); return -EBUSY; } + /* If the adapter has gone down, we can assume that we + * will PERST it and that will invalidate everything. + */ + if (!cxl_adapter_link_ok(afu->adapter)) + return -EIO; cpu_relax(); } return 0; @@ -279,6 +320,8 @@ static void slb_invalid(struct cxl_context *ctx) cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); while (1) { + if (!cxl_adapter_link_ok(adapter)) + break; slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); if (!(slbia & CXL_TLB_SLB_P)) break; @@ -308,6 +351,11 @@ static int do_process_element_cmd(struct cxl_context *ctx, rc = -EBUSY; goto out; } + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); + rc = -EIO; + goto out; + } state = be64_to_cpup(ctx->afu->sw_command_status); if (state == ~0ULL) { pr_err("cxl: Error adding process element to AFU\n"); @@ -355,8 +403,13 @@ static int terminate_process_element(struct cxl_context *ctx) mutex_lock(&ctx->afu->spa_mutex); pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); - rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, - CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); + /* We could be asked to terminate when the hw is down. That + * should always succeed: it's not running if the hw has gone + * away and is being reset. + */ + if (cxl_adapter_link_ok(ctx->afu->adapter)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, + CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); ctx->elem->software_state = 0; /* Remove Valid bit */ pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); mutex_unlock(&ctx->afu->spa_mutex); @@ -369,7 +422,14 @@ static int remove_process_element(struct cxl_context *ctx) mutex_lock(&ctx->afu->spa_mutex); pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); - if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0))) + + /* We could be asked to remove when the hw is down. Again, if + * the hw is down, the PE is gone, so we succeed. + */ + if (cxl_adapter_link_ok(ctx->afu->adapter)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); + + if (!rc) ctx->pe_inserted = false; slb_invalid(ctx); pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); @@ -397,8 +457,11 @@ static int activate_afu_directed(struct cxl_afu *afu) dev_info(&afu->dev, "Activating AFU directed mode\n"); - if (alloc_spa(afu)) - return -ENOMEM; + if (afu->spa == NULL) { + if (cxl_alloc_spa(afu)) + return -ENOMEM; + } + attach_spa(afu); cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU); cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); @@ -492,9 +555,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) if ((result = cxl_afu_check_and_enable(ctx->afu))) return result; - add_process_element(ctx); - - return 0; + return add_process_element(ctx); } static int deactivate_afu_directed(struct cxl_afu *afu) @@ -511,8 +572,6 @@ static int deactivate_afu_directed(struct cxl_afu *afu) cxl_afu_disable(afu); cxl_psl_purge(afu); - release_spa(afu); - return 0; } @@ -614,6 +673,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) if (!(mode & afu->modes_supported)) return -EINVAL; + if (!cxl_adapter_link_ok(afu->adapter)) { + WARN(1, "Device link is down, refusing to activate!\n"); + return -EIO; + } + if (mode == CXL_MODE_DIRECTED) return activate_afu_directed(afu); if (mode == CXL_MODE_DEDICATED) @@ -624,6 +688,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) { + if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + WARN(1, "Device link is down, refusing to attach process!\n"); + return -EIO; + } + ctx->kernel = kernel; if (ctx->afu->current_mode == CXL_MODE_DIRECTED) return attach_afu_directed(ctx, wed, amr); @@ -668,6 +737,12 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; + /* If the adapter has gone away, we can't get any meaningful + * information. + */ + if (!cxl_adapter_link_ok(afu->adapter)) + return -EIO; + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 1d314f1f95fe..6ca7c3d0ef9b 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -24,6 +24,7 @@ #include <asm/io.h> #include "cxl.h" +#include <misc/cxl.h> #define CXL_PCI_VSEC_ID 0x1280 @@ -133,7 +134,7 @@ u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off) return (val >> ((off & 0x3) * 8)) & 0xff; } -static DEFINE_PCI_DEVICE_TABLE(cxl_pci_tbl) = { +static const struct pci_device_id cxl_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), }, @@ -539,10 +540,18 @@ err: static void cxl_unmap_slice_regs(struct cxl_afu *afu) { - if (afu->p2n_mmio) + if (afu->p2n_mmio) { iounmap(afu->p2n_mmio); - if (afu->p1n_mmio) + afu->p2n_mmio = NULL; + } + if (afu->p1n_mmio) { iounmap(afu->p1n_mmio); + afu->p1n_mmio = NULL; + } + if (afu->afu_desc_mmio) { + iounmap(afu->afu_desc_mmio); + afu->afu_desc_mmio = NULL; + } } static void cxl_release_afu(struct device *dev) @@ -552,6 +561,8 @@ static void cxl_release_afu(struct device *dev) pr_devel("cxl_release_afu\n"); idr_destroy(&afu->contexts_idr); + cxl_release_spa(afu); + kfree(afu); } @@ -743,45 +754,70 @@ ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, return count; } -static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) { - struct cxl_afu *afu; - bool free = true; int rc; - if (!(afu = cxl_alloc_afu(adapter, slice))) - return -ENOMEM; - - if ((rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice))) - goto err1; - if ((rc = cxl_map_slice_regs(afu, adapter, dev))) - goto err1; + return rc; if ((rc = sanitise_afu_regs(afu))) - goto err2; + goto err1; /* We need to reset the AFU before we can read the AFU descriptor */ if ((rc = __cxl_afu_reset(afu))) - goto err2; + goto err1; if (cxl_verbose) dump_afu_descriptor(afu); if ((rc = cxl_read_afu_descriptor(afu))) - goto err2; + goto err1; if ((rc = cxl_afu_descriptor_looks_ok(afu))) - goto err2; + goto err1; if ((rc = init_implementation_afu_regs(afu))) - goto err2; + goto err1; if ((rc = cxl_register_serr_irq(afu))) - goto err2; + goto err1; if ((rc = cxl_register_psl_irq(afu))) - goto err3; + goto err2; + + return 0; + +err2: + cxl_release_serr_irq(afu); +err1: + cxl_unmap_slice_regs(afu); + return rc; +} + +static void cxl_deconfigure_afu(struct cxl_afu *afu) +{ + cxl_release_psl_irq(afu); + cxl_release_serr_irq(afu); + cxl_unmap_slice_regs(afu); +} + +static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +{ + struct cxl_afu *afu; + int rc; + + afu = cxl_alloc_afu(adapter, slice); + if (!afu) + return -ENOMEM; + + rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice); + if (rc) + goto err_free; + + rc = cxl_configure_afu(afu, adapter, dev); + if (rc) + goto err_free; /* Don't care if this fails */ cxl_debugfs_afu_add(afu); @@ -796,10 +832,6 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) if ((rc = cxl_sysfs_afu_add(afu))) goto err_put1; - - if ((rc = cxl_afu_select_best_mode(afu))) - goto err_put2; - adapter->afu[afu->slice] = afu; if ((rc = cxl_pci_vphb_add(afu))) @@ -807,21 +839,16 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) return 0; -err_put2: - cxl_sysfs_afu_remove(afu); err_put1: - device_unregister(&afu->dev); - free = false; + cxl_deconfigure_afu(afu); cxl_debugfs_afu_remove(afu); - cxl_release_psl_irq(afu); -err3: - cxl_release_serr_irq(afu); -err2: - cxl_unmap_slice_regs(afu); -err1: - if (free) - kfree(afu); + device_unregister(&afu->dev); + return rc; + +err_free: + kfree(afu); return rc; + } static void cxl_remove_afu(struct cxl_afu *afu) @@ -841,10 +868,7 @@ static void cxl_remove_afu(struct cxl_afu *afu) cxl_context_detach_all(afu); cxl_afu_deactivate_mode(afu); - cxl_release_psl_irq(afu); - cxl_release_serr_irq(afu); - cxl_unmap_slice_regs(afu); - + cxl_deconfigure_afu(afu); device_unregister(&afu->dev); } @@ -852,16 +876,15 @@ int cxl_reset(struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); int rc; - int i; - u32 val; - - dev_info(&dev->dev, "CXL reset\n"); - for (i = 0; i < adapter->slices; i++) { - cxl_pci_vphb_remove(adapter->afu[i]); - cxl_remove_afu(adapter->afu[i]); + if (adapter->perst_same_image) { + dev_warn(&dev->dev, + "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n"); + return -EINVAL; } + dev_info(&dev->dev, "CXL reset\n"); + /* pcie_warm_reset requests a fundamental pci reset which includes a * PERST assert/deassert. PERST triggers a loading of the image * if "user" or "factory" is selected in sysfs */ @@ -870,20 +893,6 @@ int cxl_reset(struct cxl *adapter) return rc; } - /* the PERST done above fences the PHB. So, reset depends on EEH - * to unbind the driver, tell Sapphire to reinit the PHB, and rebind - * the driver. Do an mmio read explictly to ensure EEH notices the - * fenced PHB. Retry for a few seconds before giving up. */ - i = 0; - while (((val = mmio_read32be(adapter->p1_mmio)) != 0xffffffff) && - (i < 5)) { - msleep(500); - i++; - } - - if (val != 0xffffffff) - dev_err(&dev->dev, "cxl: PERST failed to trigger EEH\n"); - return rc; } @@ -918,10 +927,16 @@ err1: static void cxl_unmap_adapter_regs(struct cxl *adapter) { - if (adapter->p1_mmio) + if (adapter->p1_mmio) { iounmap(adapter->p1_mmio); - if (adapter->p2_mmio) + adapter->p1_mmio = NULL; + pci_release_region(to_pci_dev(adapter->dev.parent), 2); + } + if (adapter->p2_mmio) { iounmap(adapter->p2_mmio); + adapter->p2_mmio = NULL; + pci_release_region(to_pci_dev(adapter->dev.parent), 0); + } } static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) @@ -950,7 +965,6 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); - adapter->perst_loads_image = true; adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); @@ -1010,22 +1024,33 @@ static void cxl_release_adapter(struct device *dev) pr_devel("cxl_release_adapter\n"); + cxl_remove_adapter_nr(adapter); + kfree(adapter); } -static struct cxl *cxl_alloc_adapter(struct pci_dev *dev) +static struct cxl *cxl_alloc_adapter(void) { struct cxl *adapter; if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) return NULL; - adapter->dev.parent = &dev->dev; - adapter->dev.release = cxl_release_adapter; - pci_set_drvdata(dev, adapter); spin_lock_init(&adapter->afu_list_lock); + if (cxl_alloc_adapter_nr(adapter)) + goto err1; + + if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) + goto err2; + return adapter; + +err2: + cxl_remove_adapter_nr(adapter); +err1: + kfree(adapter); + return NULL; } static int sanitise_adapter_regs(struct cxl *adapter) @@ -1034,57 +1059,97 @@ static int sanitise_adapter_regs(struct cxl *adapter) return cxl_tlb_slb_invalidate(adapter); } -static struct cxl *cxl_init_adapter(struct pci_dev *dev) +/* This should contain *only* operations that can safely be done in + * both creation and recovery. + */ +static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) { - struct cxl *adapter; - bool free = true; int rc; + adapter->dev.parent = &dev->dev; + adapter->dev.release = cxl_release_adapter; + pci_set_drvdata(dev, adapter); - if (!(adapter = cxl_alloc_adapter(dev))) - return ERR_PTR(-ENOMEM); + rc = pci_enable_device(dev); + if (rc) { + dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); + return rc; + } if ((rc = cxl_read_vsec(adapter, dev))) - goto err1; + return rc; if ((rc = cxl_vsec_looks_ok(adapter, dev))) - goto err1; + return rc; if ((rc = setup_cxl_bars(dev))) - goto err1; + return rc; if ((rc = switch_card_to_cxl(dev))) - goto err1; - - if ((rc = cxl_alloc_adapter_nr(adapter))) - goto err1; - - if ((rc = dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))) - goto err2; + return rc; if ((rc = cxl_update_image_control(adapter))) - goto err2; + return rc; if ((rc = cxl_map_adapter_regs(adapter, dev))) - goto err2; + return rc; if ((rc = sanitise_adapter_regs(adapter))) - goto err2; + goto err; if ((rc = init_implementation_adapter_regs(adapter, dev))) - goto err3; + goto err; if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) - goto err3; + goto err; /* If recovery happened, the last step is to turn on snooping. * In the non-recovery case this has no effect */ - if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) { - goto err3; - } + if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) + goto err; if ((rc = cxl_register_psl_err_irq(adapter))) - goto err3; + goto err; + + return 0; + +err: + cxl_unmap_adapter_regs(adapter); + return rc; + +} + +static void cxl_deconfigure_adapter(struct cxl *adapter) +{ + struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); + + cxl_release_psl_err_irq(adapter); + cxl_unmap_adapter_regs(adapter); + + pci_disable_device(pdev); +} + +static struct cxl *cxl_init_adapter(struct pci_dev *dev) +{ + struct cxl *adapter; + int rc; + + adapter = cxl_alloc_adapter(); + if (!adapter) + return ERR_PTR(-ENOMEM); + + /* Set defaults for parameters which need to persist over + * configure/reconfigure + */ + adapter->perst_loads_image = true; + adapter->perst_same_image = false; + + rc = cxl_configure_adapter(adapter, dev); + if (rc) { + pci_disable_device(dev); + cxl_release_adapter(&adapter->dev); + return ERR_PTR(rc); + } /* Don't care if this one fails: */ cxl_debugfs_adapter_add(adapter); @@ -1102,37 +1167,25 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) return adapter; err_put1: - device_unregister(&adapter->dev); - free = false; + /* This should mirror cxl_remove_adapter, except without the + * sysfs parts + */ cxl_debugfs_adapter_remove(adapter); - cxl_release_psl_err_irq(adapter); -err3: - cxl_unmap_adapter_regs(adapter); -err2: - cxl_remove_adapter_nr(adapter); -err1: - if (free) - kfree(adapter); + cxl_deconfigure_adapter(adapter); + device_unregister(&adapter->dev); return ERR_PTR(rc); } static void cxl_remove_adapter(struct cxl *adapter) { - struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); - - pr_devel("cxl_release_adapter\n"); + pr_devel("cxl_remove_adapter\n"); cxl_sysfs_adapter_remove(adapter); cxl_debugfs_adapter_remove(adapter); - cxl_release_psl_err_irq(adapter); - cxl_unmap_adapter_regs(adapter); - cxl_remove_adapter_nr(adapter); - device_unregister(&adapter->dev); + cxl_deconfigure_adapter(adapter); - pci_release_region(pdev, 0); - pci_release_region(pdev, 2); - pci_disable_device(pdev); + device_unregister(&adapter->dev); } static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) @@ -1146,21 +1199,21 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) if (cxl_verbose) dump_cxl_config_space(dev); - if ((rc = pci_enable_device(dev))) { - dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); - return rc; - } - adapter = cxl_init_adapter(dev); if (IS_ERR(adapter)) { dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); - pci_disable_device(dev); return PTR_ERR(adapter); } for (slice = 0; slice < adapter->slices; slice++) { - if ((rc = cxl_init_afu(adapter, slice, dev))) + if ((rc = cxl_init_afu(adapter, slice, dev))) { dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); + continue; + } + + rc = cxl_afu_select_best_mode(adapter->afu[slice]); + if (rc) + dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); } return 0; @@ -1184,10 +1237,262 @@ static void cxl_remove(struct pci_dev *dev) cxl_remove_adapter(adapter); } +static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, + pci_channel_state_t state) +{ + struct pci_dev *afu_dev; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; + + /* There should only be one entry, but go through the list + * anyway + */ + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (!afu_dev->driver) + continue; + + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler) + afu_result = afu_dev->driver->err_handler->error_detected(afu_dev, + state); + /* Disconnect trumps all, NONE trumps NEED_RESET */ + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + else if ((afu_result == PCI_ERS_RESULT_NONE) && + (result == PCI_ERS_RESULT_NEED_RESET)) + result = PCI_ERS_RESULT_NONE; + } + return result; +} + +static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + int i; + + /* At this point, we could still have an interrupt pending. + * Let's try to get them out of the way before they do + * anything we don't like. + */ + schedule(); + + /* If we're permanently dead, give up. */ + if (state == pci_channel_io_perm_failure) { + /* Tell the AFU drivers; but we don't care what they + * say, we're going away. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + cxl_vphb_error_detected(afu, state); + } + return PCI_ERS_RESULT_DISCONNECT; + } + + /* Are we reflashing? + * + * If we reflash, we could come back as something entirely + * different, including a non-CAPI card. As such, by default + * we don't participate in the process. We'll be unbound and + * the slot re-probed. (TODO: check EEH doesn't blindly rebind + * us!) + * + * However, this isn't the entire story: for reliablity + * reasons, we usually want to reflash the FPGA on PERST in + * order to get back to a more reliable known-good state. + * + * This causes us a bit of a problem: if we reflash we can't + * trust that we'll come back the same - we could have a new + * image and been PERSTed in order to load that + * image. However, most of the time we actually *will* come + * back the same - for example a regular EEH event. + * + * Therefore, we allow the user to assert that the image is + * indeed the same and that we should continue on into EEH + * anyway. + */ + if (adapter->perst_loads_image && !adapter->perst_same_image) { + /* TODO take the PHB out of CXL mode */ + dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n"); + return PCI_ERS_RESULT_NONE; + } + + /* + * At this point, we want to try to recover. We'll always + * need a complete slot reset: we don't trust any other reset. + * + * Now, we go through each AFU: + * - We send the driver, if bound, an error_detected callback. + * We expect it to clean up, but it can also tell us to give + * up and permanently detach the card. To simplify things, if + * any bound AFU driver doesn't support EEH, we give up on EEH. + * + * - We detach all contexts associated with the AFU. This + * does not free them, but puts them into a CLOSED state + * which causes any the associated files to return useful + * errors to userland. It also unmaps, but does not free, + * any IRQs. + * + * - We clean up our side: releasing and unmapping resources we hold + * so we can wire them up again when the hardware comes back up. + * + * Driver authors should note: + * + * - Any contexts you create in your kernel driver (except + * those associated with anonymous file descriptors) are + * your responsibility to free and recreate. Likewise with + * any attached resources. + * + * - We will take responsibility for re-initialising the + * device context (the one set up for you in + * cxl_pci_enable_device_hook and accessed through + * cxl_get_context). If you've attached IRQs or other + * resources to it, they remains yours to free. + * + * You can call the same functions to release resources as you + * normally would: we make sure that these functions continue + * to work when the hardware is down. + * + * Two examples: + * + * 1) If you normally free all your resources at the end of + * each request, or if you use anonymous FDs, your + * error_detected callback can simply set a flag to tell + * your driver not to start any new calls. You can then + * clear the flag in the resume callback. + * + * 2) If you normally allocate your resources on startup: + * * Set a flag in error_detected as above. + * * Let CXL detach your contexts. + * * In slot_reset, free the old resources and allocate new ones. + * * In resume, clear the flag to allow things to start. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + result = cxl_vphb_error_detected(afu, state); + + /* Only continue if everyone agrees on NEED_RESET */ + if (result != PCI_ERS_RESULT_NEED_RESET) + return result; + + cxl_context_detach_all(afu); + cxl_afu_deactivate_mode(afu); + cxl_deconfigure_afu(afu); + } + cxl_deconfigure_adapter(adapter); + + return result; +} + +static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + struct cxl_context *ctx; + struct pci_dev *afu_dev; + pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED; + pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; + int i; + + if (cxl_configure_adapter(adapter, pdev)) + goto err; + + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + if (cxl_configure_afu(afu, adapter, pdev)) + goto err; + + if (cxl_afu_select_best_mode(afu)) + goto err; + + cxl_pci_vphb_reconfigure(afu); + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + /* Reset the device context. + * TODO: make this less disruptive + */ + ctx = cxl_get_context(afu_dev); + + if (ctx && cxl_release_context(ctx)) + goto err; + + ctx = cxl_dev_context_init(afu_dev); + if (!ctx) + goto err; + + afu_dev->dev.archdata.cxl_ctx = ctx; + + if (cxl_afu_check_and_enable(afu)) + goto err; + + afu_dev->error_state = pci_channel_io_normal; + + /* If there's a driver attached, allow it to + * chime in on recovery. Drivers should check + * if everything has come back OK, but + * shouldn't start new work until we call + * their resume function. + */ + if (!afu_dev->driver) + continue; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->slot_reset) + afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev); + + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + } + } + return result; + +err: + /* All the bits that happen in both error_detected and cxl_remove + * should be idempotent, so we don't need to worry about leaving a mix + * of unconfigured and reconfigured resources. + */ + dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n"); + return PCI_ERS_RESULT_DISCONNECT; +} + +static void cxl_pci_resume(struct pci_dev *pdev) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + struct pci_dev *afu_dev; + int i; + + /* Everything is back now. Drivers should restart work now. + * This is not the place to be checking if everything came back up + * properly, because there's no return value: do that in slot_reset. + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (afu_dev->driver && afu_dev->driver->err_handler && + afu_dev->driver->err_handler->resume) + afu_dev->driver->err_handler->resume(afu_dev); + } + } +} + +static const struct pci_error_handlers cxl_err_handler = { + .error_detected = cxl_pci_error_detected, + .slot_reset = cxl_pci_slot_reset, + .resume = cxl_pci_resume, +}; + struct pci_driver cxl_pci_driver = { .name = "cxl-pci", .id_table = cxl_pci_tbl, .probe = cxl_probe, .remove = cxl_remove, .shutdown = cxl_remove, + .err_handler = &cxl_err_handler, }; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index 31f38bc71a3d..6619cf1f6e1f 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -112,12 +112,38 @@ static ssize_t load_image_on_perst_store(struct device *device, return count; } +static ssize_t perst_reloads_same_image_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image); +} + +static ssize_t perst_reloads_same_image_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + int val; + + rc = sscanf(buf, "%i", &val); + if ((rc != 1) || !(val == 1 || val == 0)) + return -EINVAL; + + adapter->perst_same_image = (val == 1 ? true : false); + return count; +} + static struct device_attribute adapter_attrs[] = { __ATTR_RO(caia_version), __ATTR_RO(psl_revision), __ATTR_RO(base_image), __ATTR_RO(image_loaded), __ATTR_RW(load_image_on_perst), + __ATTR_RW(perst_reloads_same_image), __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), }; diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 2eba002b580b..6dd16a6d153f 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -138,6 +138,26 @@ static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, return 0; } + +static inline bool cxl_config_link_ok(struct pci_bus *bus) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + + /* Config space IO is based on phb->cfg_addr, which is based on + * afu_desc_mmio. This isn't safe to read/write when the link + * goes down, as EEH tears down MMIO space. + * + * Check if the link is OK before proceeding. + */ + + phb = pci_bus_to_host(bus); + if (phb == NULL) + return false; + afu = (struct cxl_afu *)phb->private_data; + return cxl_adapter_link_ok(afu->adapter); +} + static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 *val) { @@ -150,6 +170,9 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, if (rc) return rc; + if (!cxl_config_link_ok(bus)) + return PCIBIOS_DEVICE_NOT_FOUND; + /* Can only read 32 bits */ *val = (in_le32(ioaddr) >> shift) & mask; return PCIBIOS_SUCCESSFUL; @@ -167,6 +190,9 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, if (rc) return rc; + if (!cxl_config_link_ok(bus)) + return PCIBIOS_DEVICE_NOT_FOUND; + /* Can only write 32 bits so do read-modify-write */ mask <<= shift; val <<= shift; @@ -240,6 +266,14 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) return 0; } +void cxl_pci_vphb_reconfigure(struct cxl_afu *afu) +{ + /* When we are reconfigured, the AFU's MMIO space is unmapped + * and remapped. We need to reflect this in the PHB's view of + * the world. + */ + afu->phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; +} void cxl_pci_vphb_remove(struct cxl_afu *afu) { diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c index 41901997c0d6..a75146f600cb 100644 --- a/drivers/tty/hvc/hvsi.c +++ b/drivers/tty/hvc/hvsi.c @@ -240,9 +240,9 @@ static void hvsi_recv_control(struct hvsi_struct *hp, uint8_t *packet, { struct hvsi_control *header = (struct hvsi_control *)packet; - switch (header->verb) { + switch (be16_to_cpu(header->verb)) { case VSV_MODEM_CTL_UPDATE: - if ((header->word & HVSI_TSCD) == 0) { + if ((be32_to_cpu(header->word) & HVSI_TSCD) == 0) { /* CD went away; no more connection */ pr_debug("hvsi%i: CD dropped\n", hp->index); hp->mctrl &= TIOCM_CD; @@ -267,6 +267,7 @@ static void hvsi_recv_control(struct hvsi_struct *hp, uint8_t *packet, static void hvsi_recv_response(struct hvsi_struct *hp, uint8_t *packet) { struct hvsi_query_response *resp = (struct hvsi_query_response *)packet; + uint32_t mctrl_word; switch (hp->state) { case HVSI_WAIT_FOR_VER_RESPONSE: @@ -274,9 +275,10 @@ static void hvsi_recv_response(struct hvsi_struct *hp, uint8_t *packet) break; case HVSI_WAIT_FOR_MCTRL_RESPONSE: hp->mctrl = 0; - if (resp->u.mctrl_word & HVSI_TSDTR) + mctrl_word = be32_to_cpu(resp->u.mctrl_word); + if (mctrl_word & HVSI_TSDTR) hp->mctrl |= TIOCM_DTR; - if (resp->u.mctrl_word & HVSI_TSCD) + if (mctrl_word & HVSI_TSCD) hp->mctrl |= TIOCM_CD; __set_state(hp, HVSI_OPEN); break; @@ -295,10 +297,10 @@ static int hvsi_version_respond(struct hvsi_struct *hp, uint16_t query_seqno) packet.hdr.type = VS_QUERY_RESPONSE_PACKET_HEADER; packet.hdr.len = sizeof(struct hvsi_query_response); - packet.hdr.seqno = atomic_inc_return(&hp->seqno); - packet.verb = VSV_SEND_VERSION_NUMBER; + packet.hdr.seqno = cpu_to_be16(atomic_inc_return(&hp->seqno)); + packet.verb = cpu_to_be16(VSV_SEND_VERSION_NUMBER); packet.u.version = HVSI_VERSION; - packet.query_seqno = query_seqno+1; + packet.query_seqno = cpu_to_be16(query_seqno+1); pr_debug("%s: sending %i bytes\n", __func__, packet.hdr.len); dbg_dump_hex((uint8_t*)&packet, packet.hdr.len); @@ -319,7 +321,7 @@ static void hvsi_recv_query(struct hvsi_struct *hp, uint8_t *packet) switch (hp->state) { case HVSI_WAIT_FOR_VER_QUERY: - hvsi_version_respond(hp, query->hdr.seqno); + hvsi_version_respond(hp, be16_to_cpu(query->hdr.seqno)); __set_state(hp, HVSI_OPEN); break; default: @@ -555,8 +557,8 @@ static int hvsi_query(struct hvsi_struct *hp, uint16_t verb) packet.hdr.type = VS_QUERY_PACKET_HEADER; packet.hdr.len = sizeof(struct hvsi_query); - packet.hdr.seqno = atomic_inc_return(&hp->seqno); - packet.verb = verb; + packet.hdr.seqno = cpu_to_be16(atomic_inc_return(&hp->seqno)); + packet.verb = cpu_to_be16(verb); pr_debug("%s: sending %i bytes\n", __func__, packet.hdr.len); dbg_dump_hex((uint8_t*)&packet, packet.hdr.len); @@ -596,14 +598,14 @@ static int hvsi_set_mctrl(struct hvsi_struct *hp, uint16_t mctrl) struct hvsi_control packet __ALIGNED__; int wrote; - packet.hdr.type = VS_CONTROL_PACKET_HEADER, - packet.hdr.seqno = atomic_inc_return(&hp->seqno); + packet.hdr.type = VS_CONTROL_PACKET_HEADER; + packet.hdr.seqno = cpu_to_be16(atomic_inc_return(&hp->seqno)); packet.hdr.len = sizeof(struct hvsi_control); - packet.verb = VSV_SET_MODEM_CTL; - packet.mask = HVSI_TSDTR; + packet.verb = cpu_to_be16(VSV_SET_MODEM_CTL); + packet.mask = cpu_to_be32(HVSI_TSDTR); if (mctrl & TIOCM_DTR) - packet.word = HVSI_TSDTR; + packet.word = cpu_to_be32(HVSI_TSDTR); pr_debug("%s: sending %i bytes\n", __func__, packet.hdr.len); dbg_dump_hex((uint8_t*)&packet, packet.hdr.len); @@ -680,7 +682,7 @@ static int hvsi_put_chars(struct hvsi_struct *hp, const char *buf, int count) BUG_ON(count > HVSI_MAX_OUTGOING_DATA); packet.hdr.type = VS_DATA_PACKET_HEADER; - packet.hdr.seqno = atomic_inc_return(&hp->seqno); + packet.hdr.seqno = cpu_to_be16(atomic_inc_return(&hp->seqno)); packet.hdr.len = count + sizeof(struct hvsi_header); memcpy(&packet.data, buf, count); @@ -697,9 +699,9 @@ static void hvsi_close_protocol(struct hvsi_struct *hp) struct hvsi_control packet __ALIGNED__; packet.hdr.type = VS_CONTROL_PACKET_HEADER; - packet.hdr.seqno = atomic_inc_return(&hp->seqno); + packet.hdr.seqno = cpu_to_be16(atomic_inc_return(&hp->seqno)); packet.hdr.len = 6; - packet.verb = VSV_CLOSE_PROTOCOL; + packet.verb = cpu_to_be16(VSV_CLOSE_PROTOCOL); pr_debug("%s: sending %i bytes\n", __func__, packet.hdr.len); dbg_dump_hex((uint8_t*)&packet, packet.hdr.len); @@ -1180,7 +1182,7 @@ static int __init hvsi_console_init(void) /* search device tree for vty nodes */ for_each_compatible_node(vty, "serial", "hvterm-protocol") { struct hvsi_struct *hp; - const uint32_t *vtermno, *irq; + const __be32 *vtermno, *irq; vtermno = of_get_property(vty, "reg", NULL); irq = of_get_property(vty, "interrupts", NULL); @@ -1202,11 +1204,11 @@ static int __init hvsi_console_init(void) hp->index = hvsi_count; hp->inbuf_end = hp->inbuf; hp->state = HVSI_CLOSED; - hp->vtermno = *vtermno; - hp->virq = irq_create_mapping(NULL, irq[0]); + hp->vtermno = be32_to_cpup(vtermno); + hp->virq = irq_create_mapping(NULL, be32_to_cpup(irq)); if (hp->virq == 0) { printk(KERN_ERR "%s: couldn't create irq mapping for 0x%x\n", - __func__, irq[0]); + __func__, be32_to_cpup(irq)); tty_port_destroy(&hp->port); continue; } diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 7a6c1d6cc173..f2ffe5bd720d 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -200,4 +200,14 @@ unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll); ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, loff_t *off); +/* + * For EEH, a driver may want to assert a PERST will reload the same image + * from flash into the FPGA. + * + * This is a property of the entire adapter, not a single AFU, so drivers + * should set this property with care! + */ +void cxl_perst_reloads_same_image(struct cxl_afu *afu, + bool perst_reloads_same_image); + #endif /* _MISC_CXL_H */ diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 99a8ca15fe64..1e889aa8a36e 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -29,8 +29,10 @@ struct cxl_ioctl_start_work { #define CXL_START_WORK_AMR 0x0000000000000001ULL #define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL +#define CXL_START_WORK_ERR_FF 0x0000000000000004ULL #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ - CXL_START_WORK_NUM_IRQS) + CXL_START_WORK_NUM_IRQS |\ + CXL_START_WORK_ERR_FF) /* Possible modes that an afu can be in */ diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 41cc3ed66818..ee179e22308c 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,8 +2,9 @@ noarg: $(MAKE) -C ../ TEST_PROGS := hugetlb_vs_thp_test subpage_prot +TEST_FILES := tempfile -all: $(TEST_PROGS) tempfile +all: $(TEST_PROGS) $(TEST_FILES) $(TEST_PROGS): ../harness.c diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c5abe7fd7590..a004b4cce99e 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -14,6 +14,7 @@ #include <linux/filter.h> #include <sys/prctl.h> #include <sys/ptrace.h> +#include <sys/types.h> #include <sys/user.h> #include <linux/prctl.h> #include <linux/ptrace.h> @@ -82,7 +83,13 @@ struct seccomp_data { }; #endif +#if __BYTE_ORDER == __LITTLE_ENDIAN #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) +#else +#error "wut? Unknown __BYTE_ORDER?!" +#endif #define SIBLING_EXIT_UNKILLED 0xbadbeef #define SIBLING_EXIT_FAILURE 0xbadface @@ -1199,6 +1206,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM regs[8] # define SYSCALL_RET regs[0] +#elif defined(__powerpc__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM gpr[0] +# define SYSCALL_RET gpr[3] #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1232,7 +1243,7 @@ void change_syscall(struct __test_metadata *_metadata, ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); EXPECT_EQ(0, ret); -#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || defined(__powerpc__) { regs.SYSCALL_NUM = syscall; } @@ -1396,6 +1407,8 @@ TEST_F(TRACE_syscall, syscall_dropped) # define __NR_seccomp 383 # elif defined(__aarch64__) # define __NR_seccomp 277 +# elif defined(__powerpc__) +# define __NR_seccomp 358 # else # warning "seccomp syscall number unknown for this architecture" # define __NR_seccomp 0xffff |