diff options
Diffstat (limited to 'arch/powerpc')
184 files changed, 4112 insertions, 3899 deletions
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild index 571f260b0842..b010ccb071b6 100644 --- a/arch/powerpc/Kbuild +++ b/arch/powerpc/Kbuild @@ -19,4 +19,4 @@ obj-$(CONFIG_KEXEC_CORE) += kexec/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ # for cleaning -subdir- += boot +subdir- += boot tools diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8094a01974cc..a0ce777f9706 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -234,6 +234,8 @@ config PPC select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 + select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if PPC_FTRACE_OUT_OF_LINE || (PPC32 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY) + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -243,7 +245,7 @@ config PPC select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if PPC64 || (PPC32 && CC_IS_GCC) + select HAVE_FUNCTION_TRACER if !COMPILE_TEST && (PPC64 || (PPC32 && CC_IS_GCC)) select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC select HAVE_GENERIC_VDSO select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP @@ -273,10 +275,12 @@ config PPC select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ + select HAVE_SAMPLE_FTRACE_DIRECT if HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_SETUP_PER_CPU_AREA if PPC64 select HAVE_SOFTIRQ_ON_OWN_STACK - select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) - select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) + select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,$(m32-flag) -mstack-protector-guard=tls -mstack-protector-guard-reg=r2 -mstack-protector-guard-offset=0) + select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,$(m64-flag) -mstack-protector-guard=tls -mstack-protector-guard-reg=r13 -mstack-protector-guard-offset=0) select HAVE_STATIC_CALL if PPC32 select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING @@ -569,6 +573,22 @@ config ARCH_USING_PATCHABLE_FUNCTION_ENTRY def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN +config PPC_FTRACE_OUT_OF_LINE + def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY + select ARCH_WANTS_PRE_LINK_VMLINUX + +config PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE + int "Number of ftrace out-of-line stubs to reserve within .text" + depends on PPC_FTRACE_OUT_OF_LINE + default 32768 + help + Number of stubs to reserve for use by ftrace. This space is + reserved within .text, and is distinct from any additional space + added at the end of .text before the final vmlinux link. Set to + zero to have stubs only be generated at the end of vmlinux (only + if the size of vmlinux is less than 32MB). Set to a higher value + if building vmlinux larger than 48MB. + config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ @@ -684,6 +704,10 @@ config RELOCATABLE_TEST config ARCH_SUPPORTS_CRASH_DUMP def_bool PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP) +config ARCH_DEFAULT_CRASH_DUMP + bool + default y if !PPC_BOOK3S_32 + config ARCH_SELECTS_CRASH_DUMP def_bool y depends on CRASH_DUMP @@ -1298,6 +1322,14 @@ config MODULES_SIZE endmenu +config PPC64_PROC_SYSTEMCFG + def_bool y + depends on PPC64 && PROC_FS + help + This option enables the presence of /proc/ppc64/systemcfg through + which the systemcfg page can be accessed. + This interface only exists for backwards-compatibility. + if PPC64 # This value must have zeroes in the bottom 60 bits otherwise lots will break config PAGE_OFFSET diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 0bbec4afc0d5..20d05605fa83 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -223,12 +223,6 @@ config PPC_EARLY_DEBUG_RTAS_CONSOLE help Select this to enable early debugging via the RTAS console. -config PPC_EARLY_DEBUG_MAPLE - bool "Maple real mode" - depends on PPC_MAPLE - help - Select this to enable early debugging for Maple. - config PPC_EARLY_DEBUG_PAS_REALMODE bool "PA Semi real mode" depends on PPC_PASEMI diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index bbfe4a1f06ef..f3804103c56c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -62,14 +62,14 @@ KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o endif ifdef CONFIG_CPU_LITTLE_ENDIAN -KBUILD_CFLAGS += -mlittle-endian +KBUILD_CPPFLAGS += -mlittle-endian KBUILD_LDFLAGS += -EL LDEMULATION := lppc GNUTARGET := powerpcle MULTIPLEWORD := -mno-multiple KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect) else -KBUILD_CFLAGS += $(call cc-option,-mbig-endian) +KBUILD_CPPFLAGS += $(call cc-option,-mbig-endian) KBUILD_LDFLAGS += -EB LDEMULATION := ppc GNUTARGET := powerpc @@ -95,18 +95,11 @@ aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian ifeq ($(HAS_BIARCH),y) -KBUILD_CFLAGS += -m$(BITS) +KBUILD_CPPFLAGS += -m$(BITS) KBUILD_AFLAGS += -m$(BITS) KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION) endif -cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard=tls -ifdef CONFIG_PPC64 -cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard-reg=r13 -else -cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard-reg=r2 -endif - LDFLAGS_vmlinux-y := -Bstatic LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) += -z notext @@ -155,7 +148,15 @@ CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float) ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY +ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +CC_FLAGS_FTRACE := -fpatchable-function-entry=1 +else +ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS # PPC32 only +CC_FLAGS_FTRACE := -fpatchable-function-entry=3,1 +else CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif +endif else CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL @@ -175,7 +176,6 @@ KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr) KBUILD_AFLAGS += $(AFLAGS-y) KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) KBUILD_CFLAGS += $(CFLAGS-y) -CPP = $(CC) -E $(KBUILD_CFLAGS) CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__ ifdef CONFIG_CPU_BIG_ENDIAN @@ -359,7 +359,7 @@ define archhelp echo ' install - Install kernel using' echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' - echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' install to $$(INSTALL_PATH)' echo ' *_defconfig - Select default config from arch/powerpc/configs' echo '' echo ' Targets with <dt> embed a device tree blob inside the image' @@ -402,9 +402,13 @@ prepare: stack_protector_prepare PHONY += stack_protector_prepare stack_protector_prepare: prepare0 ifdef CONFIG_PPC64 - $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h)) + $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls -mstack-protector-guard-reg=r13 \ + -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' \ + $(objtree)/include/generated/asm-offsets.h)) else - $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' include/generated/asm-offsets.h)) + $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls -mstack-protector-guard-reg=r2 \ + -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' \ + $(objtree)/include/generated/asm-offsets.h)) endif endif diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index ae5a4256b03d..bb601be36173 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -24,6 +24,9 @@ else $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" endif +quiet_cmd_ftrace_check = CHKFTRC $@ + cmd_ftrace_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/ftrace_check.sh "$(NM)" "$@" + # `@true` prevents complaint when there is nothing to be done vmlinux: FORCE @@ -34,6 +37,11 @@ endif ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) endif +ifdef CONFIG_FUNCTION_TRACER +ifndef CONFIG_PPC64_ELF_ABI_V1 + $(call cmd,ftrace_check) +endif +endif clean: rm -f .tmp_symbols.txt diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index a4716d138cfc..5a867f23fe7f 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -30,7 +30,6 @@ zImage.coff zImage.epapr zImage.holly zImage.*lds -zImage.maple zImage.miboot zImage.pmac zImage.pseries diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index fa8518067d38..1ff6ad4f6cd2 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -276,7 +276,6 @@ quiet_cmd_wrap = WRAP $@ image-$(CONFIG_PPC_PSERIES) += zImage.pseries image-$(CONFIG_PPC_POWERNV) += zImage.pseries -image-$(CONFIG_PPC_MAPLE) += zImage.maple image-$(CONFIG_PPC_IBM_CELL_BLADE) += zImage.pseries image-$(CONFIG_PPC_PS3) += dtbImage.ps3 image-$(CONFIG_PPC_CHRP) += zImage.chrp @@ -444,7 +443,7 @@ $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \ zImage.miboot zImage.pmac zImage.pseries \ - zImage.maple simpleImage.* otheros.bld + simpleImage.* otheros.bld # clean up files cached by wrapper clean-kernel-base := vmlinux.strip vmlinux.bin diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index b1f5549a3c9c..1db60fe13802 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -271,11 +271,6 @@ pseries) fi make_space=n ;; -maple) - platformo="$object/of.o $object/epapr.o" - link_address='0x400000' - make_space=n - ;; pmac|chrp) platformo="$object/of.o $object/epapr.o" make_space=n @@ -517,7 +512,7 @@ fi # post-processing needed for some platforms case "$platform" in -pseries|chrp|maple) +pseries|chrp) $objbin/addnote "$ofile" ;; coff) diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig deleted file mode 100644 index c821a97f4a89..000000000000 --- a/arch/powerpc/configs/maple_defconfig +++ /dev/null @@ -1,111 +0,0 @@ -CONFIG_PPC64=y -CONFIG_SMP=y -CONFIG_NR_CPUS=4 -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_KPROBES=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y -# CONFIG_PPC_POWERNV is not set -# CONFIG_PPC_PSERIES is not set -# CONFIG_PPC_PMAC is not set -CONFIG_PPC_MAPLE=y -CONFIG_UDBG_RTAS_CONSOLE=y -CONFIG_GEN_RTC=y -CONFIG_KEXEC=y -CONFIG_IRQ_ALL_CPUS=y -CONFIG_PPC_4K_PAGES=y -CONFIG_PCI_MSI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IPV6 is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -# CONFIG_SCSI_PROC_FS is not set -CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_IPR=y -CONFIG_ATA=y -CONFIG_PATA_AMD=y -CONFIG_ATA_GENERIC=y -CONFIG_NETDEVICES=y -CONFIG_AMD8111_ETH=y -CONFIG_TIGON3=y -CONFIG_E1000=y -CONFIG_USB_PEGASUS=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_HVC_RTAS=y -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_AMD8111=y -# CONFIG_VGA_CONSOLE is not set -CONFIG_HID_GYRATION=y -CONFIG_HID_PANTHERLORD=y -CONFIG_HID_PETALYNX=y -CONFIG_HID_SAMSUNG=y -CONFIG_HID_SUNPLUS=y -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -# CONFIG_USB_EHCI_HCD_PPC_OF is not set -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_UHCI_HCD=y -CONFIG_USB_SERIAL=y -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_CYPRESS_M8=m -CONFIG_USB_SERIAL_GARMIN=m -CONFIG_USB_SERIAL_IPW=m -CONFIG_USB_SERIAL_KEYSPAN=y -CONFIG_USB_SERIAL_TI=m -CONFIG_EXT2_FS=y -CONFIG_EXT4_FS=y -CONFIG_FS_DAX=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_DEFAULT="utf-8" -CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACK_USAGE=y -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y -CONFIG_BOOTX_TEXT=y -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_PCBC=m -# CONFIG_CRYPTO_HW is not set -CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 57ded82c2840..e8b3f67bf3f5 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -208,6 +208,7 @@ CONFIG_FB_ATY=y CONFIG_FB_ATY_CT=y CONFIG_FB_ATY_GX=y CONFIG_FB_3DFX=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index a5e3e7f97f4d..f39c0d000c43 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -44,7 +44,6 @@ CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y CONFIG_PAPR_SCM=m CONFIG_PPC_SVM=y -CONFIG_PPC_MAPLE=y CONFIG_PPC_PASEMI=y CONFIG_PPC_PASEMI_IOMMU=y CONFIG_PPC_PS3=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c06344db0eb3..ca0c90e95837 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -435,7 +435,6 @@ CONFIG_DM9102=m CONFIG_ULI526X=m CONFIG_PCMCIA_XIRCOM=m CONFIG_DL2K=m -CONFIG_SUNDANCE=m CONFIG_S2IO=m CONFIG_FEC_MPC52xx=m CONFIG_GIANFAR=m @@ -717,6 +716,7 @@ CONFIG_FB_TRIDENT=m CONFIG_FB_SM501=m CONFIG_FB_IBM_GXT4500=y CONFIG_LCD_PLATFORM=m +CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 46a4c85e85e2..951a43726461 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -107,12 +107,12 @@ config CRYPTO_AES_PPC_SPE config CRYPTO_AES_GCM_P10 tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)" - depends on BROKEN depends on PPC64 && CPU_LITTLE_ENDIAN && VSX select CRYPTO_LIB_AES select CRYPTO_ALGAPI select CRYPTO_AEAD select CRYPTO_SKCIPHER + select CRYPTO_SIMD help AEAD cipher: AES cipher algorithms (FIPS-197) GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D) diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c index f66ad56e765f..f37b3d13fc53 100644 --- a/arch/powerpc/crypto/aes-gcm-p10-glue.c +++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c @@ -8,6 +8,7 @@ #include <linux/unaligned.h> #include <asm/simd.h> #include <asm/switch_to.h> +#include <crypto/gcm.h> #include <crypto/aes.h> #include <crypto/algapi.h> #include <crypto/b128ops.h> @@ -24,6 +25,7 @@ #define PPC_ALIGN 16 #define GCM_IV_SIZE 12 +#define RFC4106_NONCE_SIZE 4 MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation"); MODULE_AUTHOR("Danny Tsen <[email protected]"); @@ -31,7 +33,7 @@ MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("aes"); asmlinkage int aes_p10_set_encrypt_key(const u8 *userKey, const int bits, - void *key); + void *key); asmlinkage void aes_p10_encrypt(const u8 *in, u8 *out, const void *key); asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len, void *rkey, u8 *iv, void *Xi); @@ -39,7 +41,8 @@ asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len, void *rkey, u8 *iv, void *Xi); asmlinkage void gcm_init_htable(unsigned char htable[], unsigned char Xi[]); asmlinkage void gcm_ghash_p10(unsigned char *Xi, unsigned char *Htable, - unsigned char *aad, unsigned int alen); + unsigned char *aad, unsigned int alen); +asmlinkage void gcm_update(u8 *iv, void *Xi); struct aes_key { u8 key[AES_MAX_KEYLENGTH]; @@ -52,6 +55,7 @@ struct gcm_ctx { u8 aad_hash[16]; u64 aadLen; u64 Plen; /* offset 56 - used in aes_p10_gcm_{en/de}crypt */ + u8 pblock[16]; }; struct Hash_ctx { u8 H[16]; /* subkey */ @@ -60,17 +64,20 @@ struct Hash_ctx { struct p10_aes_gcm_ctx { struct aes_key enc_key; + u8 nonce[RFC4106_NONCE_SIZE]; }; static void vsx_begin(void) { preempt_disable(); + pagefault_disable(); enable_kernel_vsx(); } static void vsx_end(void) { disable_kernel_vsx(); + pagefault_enable(); preempt_enable(); } @@ -185,7 +192,7 @@ static int set_authsize(struct crypto_aead *tfm, unsigned int authsize) } static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) + unsigned int keylen) { struct crypto_tfm *tfm = crypto_aead_tfm(aead); struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm); @@ -198,7 +205,8 @@ static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, return ret ? -EINVAL : 0; } -static int p10_aes_gcm_crypt(struct aead_request *req, int enc) +static int p10_aes_gcm_crypt(struct aead_request *req, u8 *riv, + int assoclen, int enc) { struct crypto_tfm *tfm = req->base.tfm; struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm); @@ -210,7 +218,6 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) struct skcipher_walk walk; u8 *assocmem = NULL; u8 *assoc; - unsigned int assoclen = req->assoclen; unsigned int cryptlen = req->cryptlen; unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN]; unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN); @@ -218,11 +225,12 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm)); u8 otag[16]; int total_processed = 0; + int nbytes; memset(databuf, 0, sizeof(databuf)); memset(hashbuf, 0, sizeof(hashbuf)); memset(ivbuf, 0, sizeof(ivbuf)); - memcpy(iv, req->iv, GCM_IV_SIZE); + memcpy(iv, riv, GCM_IV_SIZE); /* Linearize assoc, if not already linear */ if (req->src->length >= assoclen && req->src->length) { @@ -257,19 +265,25 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) if (ret) return ret; - while (walk.nbytes > 0 && ret == 0) { + while ((nbytes = walk.nbytes) > 0 && ret == 0) { + u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + u8 buf[AES_BLOCK_SIZE]; + + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf, src, nbytes); vsx_begin(); if (enc) - aes_p10_gcm_encrypt(walk.src.virt.addr, - walk.dst.virt.addr, - walk.nbytes, + aes_p10_gcm_encrypt(src, dst, nbytes, &ctx->enc_key, gctx->iv, hash->Htable); else - aes_p10_gcm_decrypt(walk.src.virt.addr, - walk.dst.virt.addr, - walk.nbytes, + aes_p10_gcm_decrypt(src, dst, nbytes, &ctx->enc_key, gctx->iv, hash->Htable); + + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, buf, nbytes); + vsx_end(); total_processed += walk.nbytes; @@ -281,6 +295,7 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) /* Finalize hash */ vsx_begin(); + gcm_update(gctx->iv, hash->Htable); finish_tag(gctx, hash, total_processed); vsx_end(); @@ -302,17 +317,63 @@ static int p10_aes_gcm_crypt(struct aead_request *req, int enc) return 0; } +static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, + unsigned int keylen) +{ + struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm); + int err; + + keylen -= RFC4106_NONCE_SIZE; + err = p10_aes_gcm_setkey(tfm, inkey, keylen); + if (err) + return err; + + memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); + return 0; +} + +static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + return crypto_rfc4106_check_authsize(authsize); +} + +static int rfc4106_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 iv[AES_BLOCK_SIZE]; + + memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); + memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); + + return crypto_ipsec_check_assoclen(req->assoclen) ?: + p10_aes_gcm_crypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE, 1); +} + +static int rfc4106_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 iv[AES_BLOCK_SIZE]; + + memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); + memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); + + return crypto_ipsec_check_assoclen(req->assoclen) ?: + p10_aes_gcm_crypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE, 0); +} + static int p10_aes_gcm_encrypt(struct aead_request *req) { - return p10_aes_gcm_crypt(req, 1); + return p10_aes_gcm_crypt(req, req->iv, req->assoclen, 1); } static int p10_aes_gcm_decrypt(struct aead_request *req) { - return p10_aes_gcm_crypt(req, 0); + return p10_aes_gcm_crypt(req, req->iv, req->assoclen, 0); } -static struct aead_alg gcm_aes_alg = { +static struct aead_alg gcm_aes_algs[] = {{ .ivsize = GCM_IV_SIZE, .maxauthsize = 16, @@ -321,23 +382,57 @@ static struct aead_alg gcm_aes_alg = { .encrypt = p10_aes_gcm_encrypt, .decrypt = p10_aes_gcm_decrypt, - .base.cra_name = "gcm(aes)", - .base.cra_driver_name = "aes_gcm_p10", + .base.cra_name = "__gcm(aes)", + .base.cra_driver_name = "__aes_gcm_p10", .base.cra_priority = 2100, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx), + .base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx)+ + 4 * sizeof(u64[2]), .base.cra_module = THIS_MODULE, -}; + .base.cra_flags = CRYPTO_ALG_INTERNAL, +}, { + .ivsize = GCM_RFC4106_IV_SIZE, + .maxauthsize = 16, + .setkey = rfc4106_setkey, + .setauthsize = rfc4106_setauthsize, + .encrypt = rfc4106_encrypt, + .decrypt = rfc4106_decrypt, + + .base.cra_name = "__rfc4106(gcm(aes))", + .base.cra_driver_name = "__rfc4106_aes_gcm_p10", + .base.cra_priority = 2100, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx) + + 4 * sizeof(u64[2]), + .base.cra_module = THIS_MODULE, + .base.cra_flags = CRYPTO_ALG_INTERNAL, +}}; + +static struct simd_aead_alg *p10_simd_aeads[ARRAY_SIZE(gcm_aes_algs)]; static int __init p10_init(void) { - return crypto_register_aead(&gcm_aes_alg); + int ret; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + + ret = simd_register_aeads_compat(gcm_aes_algs, + ARRAY_SIZE(gcm_aes_algs), + p10_simd_aeads); + if (ret) { + simd_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs), + p10_simd_aeads); + return ret; + } + return 0; } static void __exit p10_exit(void) { - crypto_unregister_aead(&gcm_aes_alg); + simd_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs), + p10_simd_aeads); } -module_cpu_feature_match(PPC_MODULE_FEATURE_P10, p10_init); +module_init(p10_init); module_exit(p10_exit); diff --git a/arch/powerpc/crypto/aes-gcm-p10.S b/arch/powerpc/crypto/aes-gcm-p10.S index a51f4b265308..89f50eef3512 100644 --- a/arch/powerpc/crypto/aes-gcm-p10.S +++ b/arch/powerpc/crypto/aes-gcm-p10.S @@ -1,42 +1,42 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ - # - # Accelerated AES-GCM stitched implementation for ppc64le. - # - # Copyright 2022- IBM Inc. All rights reserved - # - #=================================================================================== - # Written by Danny Tsen <[email protected]> - # - # GHASH is based on the Karatsuba multiplication method. - # - # Xi xor X1 - # - # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = - # (X1.h * H4.h + xX.l * H4.l + X1 * H4) + - # (X2.h * H3.h + X2.l * H3.l + X2 * H3) + - # (X3.h * H2.h + X3.l * H2.l + X3 * H2) + - # (X4.h * H.h + X4.l * H.l + X4 * H) - # - # Xi = v0 - # H Poly = v2 - # Hash keys = v3 - v14 - # ( H.l, H, H.h) - # ( H^2.l, H^2, H^2.h) - # ( H^3.l, H^3, H^3.h) - # ( H^4.l, H^4, H^4.h) - # - # v30 is IV - # v31 - counter 1 - # - # AES used, - # vs0 - vs14 for round keys - # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) - # - # This implementation uses stitched AES-GCM approach to improve overall performance. - # AES is implemented with 8x blocks and GHASH is using 2 4x blocks. - # - # =================================================================================== - # +# +# Accelerated AES-GCM stitched implementation for ppc64le. +# +# Copyright 2024- IBM Inc. +# +#=================================================================================== +# Written by Danny Tsen <[email protected]> +# +# GHASH is based on the Karatsuba multiplication method. +# +# Xi xor X1 +# +# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = +# (X1.h * H4.h + xX.l * H4.l + X1 * H4) + +# (X2.h * H3.h + X2.l * H3.l + X2 * H3) + +# (X3.h * H2.h + X3.l * H2.l + X3 * H2) + +# (X4.h * H.h + X4.l * H.l + X4 * H) +# +# Xi = v0 +# H Poly = v2 +# Hash keys = v3 - v14 +# ( H.l, H, H.h) +# ( H^2.l, H^2, H^2.h) +# ( H^3.l, H^3, H^3.h) +# ( H^4.l, H^4, H^4.h) +# +# v30 is IV +# v31 - counter 1 +# +# AES used, +# vs0 - round key 0 +# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) +# +# This implementation uses stitched AES-GCM approach to improve overall performance. +# AES is implemented with 8x blocks and GHASH is using 2 4x blocks. +# +# =================================================================================== +# #include <asm/ppc_asm.h> #include <linux/linkage.h> @@ -44,483 +44,224 @@ .machine "any" .text - # 4x loops - # v15 - v18 - input states - # vs1 - vs9 - round keys - # -.macro Loop_aes_middle4x - xxlor 19+32, 1, 1 - xxlor 20+32, 2, 2 - xxlor 21+32, 3, 3 - xxlor 22+32, 4, 4 - - vcipher 15, 15, 19 - vcipher 16, 16, 19 - vcipher 17, 17, 19 - vcipher 18, 18, 19 - - vcipher 15, 15, 20 - vcipher 16, 16, 20 - vcipher 17, 17, 20 - vcipher 18, 18, 20 - - vcipher 15, 15, 21 - vcipher 16, 16, 21 - vcipher 17, 17, 21 - vcipher 18, 18, 21 - - vcipher 15, 15, 22 - vcipher 16, 16, 22 - vcipher 17, 17, 22 - vcipher 18, 18, 22 - - xxlor 19+32, 5, 5 - xxlor 20+32, 6, 6 - xxlor 21+32, 7, 7 - xxlor 22+32, 8, 8 - - vcipher 15, 15, 19 - vcipher 16, 16, 19 - vcipher 17, 17, 19 - vcipher 18, 18, 19 - - vcipher 15, 15, 20 - vcipher 16, 16, 20 - vcipher 17, 17, 20 - vcipher 18, 18, 20 - - vcipher 15, 15, 21 - vcipher 16, 16, 21 - vcipher 17, 17, 21 - vcipher 18, 18, 21 - - vcipher 15, 15, 22 - vcipher 16, 16, 22 - vcipher 17, 17, 22 - vcipher 18, 18, 22 - - xxlor 23+32, 9, 9 - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 +.macro SAVE_GPR GPR OFFSET FRAME + std \GPR,\OFFSET(\FRAME) .endm - # 8x loops - # v15 - v22 - input states - # vs1 - vs9 - round keys - # -.macro Loop_aes_middle8x - xxlor 23+32, 1, 1 - xxlor 24+32, 2, 2 - xxlor 25+32, 3, 3 - xxlor 26+32, 4, 4 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - vcipher 15, 15, 25 - vcipher 16, 16, 25 - vcipher 17, 17, 25 - vcipher 18, 18, 25 - vcipher 19, 19, 25 - vcipher 20, 20, 25 - vcipher 21, 21, 25 - vcipher 22, 22, 25 - - vcipher 15, 15, 26 - vcipher 16, 16, 26 - vcipher 17, 17, 26 - vcipher 18, 18, 26 - vcipher 19, 19, 26 - vcipher 20, 20, 26 - vcipher 21, 21, 26 - vcipher 22, 22, 26 - - xxlor 23+32, 5, 5 - xxlor 24+32, 6, 6 - xxlor 25+32, 7, 7 - xxlor 26+32, 8, 8 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - vcipher 15, 15, 25 - vcipher 16, 16, 25 - vcipher 17, 17, 25 - vcipher 18, 18, 25 - vcipher 19, 19, 25 - vcipher 20, 20, 25 - vcipher 21, 21, 25 - vcipher 22, 22, 25 - - vcipher 15, 15, 26 - vcipher 16, 16, 26 - vcipher 17, 17, 26 - vcipher 18, 18, 26 - vcipher 19, 19, 26 - vcipher 20, 20, 26 - vcipher 21, 21, 26 - vcipher 22, 22, 26 - - xxlor 23+32, 9, 9 - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 +.macro SAVE_VRS VRS OFFSET FRAME + stxv \VRS+32, \OFFSET(\FRAME) .endm -.macro Loop_aes_middle_1x - xxlor 19+32, 1, 1 - xxlor 20+32, 2, 2 - xxlor 21+32, 3, 3 - xxlor 22+32, 4, 4 - - vcipher 15, 15, 19 - vcipher 15, 15, 20 - vcipher 15, 15, 21 - vcipher 15, 15, 22 - - xxlor 19+32, 5, 5 - xxlor 20+32, 6, 6 - xxlor 21+32, 7, 7 - xxlor 22+32, 8, 8 - - vcipher 15, 15, 19 - vcipher 15, 15, 20 - vcipher 15, 15, 21 - vcipher 15, 15, 22 - - xxlor 19+32, 9, 9 - vcipher 15, 15, 19 +.macro RESTORE_GPR GPR OFFSET FRAME + ld \GPR,\OFFSET(\FRAME) .endm - # - # Compute 4x hash values based on Karatsuba method. - # -.macro ppc_aes_gcm_ghash - vxor 15, 15, 0 - - vpmsumd 23, 12, 15 # H4.L * X.L - vpmsumd 24, 9, 16 - vpmsumd 25, 6, 17 - vpmsumd 26, 3, 18 - - vxor 23, 23, 24 - vxor 23, 23, 25 - vxor 23, 23, 26 # L - - vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L - vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L - vpmsumd 26, 7, 17 - vpmsumd 27, 4, 18 - - vxor 24, 24, 25 - vxor 24, 24, 26 - vxor 24, 24, 27 # M - - # sum hash and reduction with H Poly - vpmsumd 28, 23, 2 # reduction - - vxor 29, 29, 29 - vsldoi 26, 24, 29, 8 # mL - vsldoi 29, 29, 24, 8 # mH - vxor 23, 23, 26 # mL + L - - vsldoi 23, 23, 23, 8 # swap - vxor 23, 23, 28 - - vpmsumd 24, 14, 15 # H4.H * X.H - vpmsumd 25, 11, 16 - vpmsumd 26, 8, 17 - vpmsumd 27, 5, 18 - - vxor 24, 24, 25 - vxor 24, 24, 26 - vxor 24, 24, 27 - - vxor 24, 24, 29 - - # sum hash and reduction with H Poly - vsldoi 27, 23, 23, 8 # swap - vpmsumd 23, 23, 2 - vxor 27, 27, 24 - vxor 23, 23, 27 - - xxlor 32, 23+32, 23+32 # update hash - +.macro RESTORE_VRS VRS OFFSET FRAME + lxv \VRS+32, \OFFSET(\FRAME) .endm - # - # Combine two 4x ghash - # v15 - v22 - input blocks - # -.macro ppc_aes_gcm_ghash2_4x - # first 4x hash - vxor 15, 15, 0 # Xi + X - - vpmsumd 23, 12, 15 # H4.L * X.L - vpmsumd 24, 9, 16 - vpmsumd 25, 6, 17 - vpmsumd 26, 3, 18 - - vxor 23, 23, 24 - vxor 23, 23, 25 - vxor 23, 23, 26 # L - - vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L - vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L - vpmsumd 26, 7, 17 - vpmsumd 27, 4, 18 - - vxor 24, 24, 25 - vxor 24, 24, 26 - - # sum hash and reduction with H Poly - vpmsumd 28, 23, 2 # reduction - - vxor 29, 29, 29 - - vxor 24, 24, 27 # M - vsldoi 26, 24, 29, 8 # mL - vsldoi 29, 29, 24, 8 # mH - vxor 23, 23, 26 # mL + L - - vsldoi 23, 23, 23, 8 # swap - vxor 23, 23, 28 +.macro SAVE_REGS + mflr 0 + std 0, 16(1) + stdu 1,-512(1) + + SAVE_GPR 14, 112, 1 + SAVE_GPR 15, 120, 1 + SAVE_GPR 16, 128, 1 + SAVE_GPR 17, 136, 1 + SAVE_GPR 18, 144, 1 + SAVE_GPR 19, 152, 1 + SAVE_GPR 20, 160, 1 + SAVE_GPR 21, 168, 1 + SAVE_GPR 22, 176, 1 + SAVE_GPR 23, 184, 1 + SAVE_GPR 24, 192, 1 + + addi 9, 1, 256 + SAVE_VRS 20, 0, 9 + SAVE_VRS 21, 16, 9 + SAVE_VRS 22, 32, 9 + SAVE_VRS 23, 48, 9 + SAVE_VRS 24, 64, 9 + SAVE_VRS 25, 80, 9 + SAVE_VRS 26, 96, 9 + SAVE_VRS 27, 112, 9 + SAVE_VRS 28, 128, 9 + SAVE_VRS 29, 144, 9 + SAVE_VRS 30, 160, 9 + SAVE_VRS 31, 176, 9 +.endm # SAVE_REGS - vpmsumd 24, 14, 15 # H4.H * X.H - vpmsumd 25, 11, 16 - vpmsumd 26, 8, 17 - vpmsumd 27, 5, 18 +.macro RESTORE_REGS + addi 9, 1, 256 + RESTORE_VRS 20, 0, 9 + RESTORE_VRS 21, 16, 9 + RESTORE_VRS 22, 32, 9 + RESTORE_VRS 23, 48, 9 + RESTORE_VRS 24, 64, 9 + RESTORE_VRS 25, 80, 9 + RESTORE_VRS 26, 96, 9 + RESTORE_VRS 27, 112, 9 + RESTORE_VRS 28, 128, 9 + RESTORE_VRS 29, 144, 9 + RESTORE_VRS 30, 160, 9 + RESTORE_VRS 31, 176, 9 + + RESTORE_GPR 14, 112, 1 + RESTORE_GPR 15, 120, 1 + RESTORE_GPR 16, 128, 1 + RESTORE_GPR 17, 136, 1 + RESTORE_GPR 18, 144, 1 + RESTORE_GPR 19, 152, 1 + RESTORE_GPR 20, 160, 1 + RESTORE_GPR 21, 168, 1 + RESTORE_GPR 22, 176, 1 + RESTORE_GPR 23, 184, 1 + RESTORE_GPR 24, 192, 1 + + addi 1, 1, 512 + ld 0, 16(1) + mtlr 0 +.endm # RESTORE_REGS + +# 4x loops +.macro AES_CIPHER_4x _VCIPHER ST r + \_VCIPHER \ST, \ST, \r + \_VCIPHER \ST+1, \ST+1, \r + \_VCIPHER \ST+2, \ST+2, \r + \_VCIPHER \ST+3, \ST+3, \r +.endm - vxor 24, 24, 25 - vxor 24, 24, 26 - vxor 24, 24, 27 # H +# 8x loops +.macro AES_CIPHER_8x _VCIPHER ST r + \_VCIPHER \ST, \ST, \r + \_VCIPHER \ST+1, \ST+1, \r + \_VCIPHER \ST+2, \ST+2, \r + \_VCIPHER \ST+3, \ST+3, \r + \_VCIPHER \ST+4, \ST+4, \r + \_VCIPHER \ST+5, \ST+5, \r + \_VCIPHER \ST+6, \ST+6, \r + \_VCIPHER \ST+7, \ST+7, \r +.endm - vxor 24, 24, 29 # H + mH +.macro LOOP_8AES_STATE + xxlor 32+23, 1, 1 + xxlor 32+24, 2, 2 + xxlor 32+25, 3, 3 + xxlor 32+26, 4, 4 + AES_CIPHER_8x vcipher, 15, 23 + AES_CIPHER_8x vcipher, 15, 24 + AES_CIPHER_8x vcipher, 15, 25 + AES_CIPHER_8x vcipher, 15, 26 + xxlor 32+23, 5, 5 + xxlor 32+24, 6, 6 + xxlor 32+25, 7, 7 + xxlor 32+26, 8, 8 + AES_CIPHER_8x vcipher, 15, 23 + AES_CIPHER_8x vcipher, 15, 24 + AES_CIPHER_8x vcipher, 15, 25 + AES_CIPHER_8x vcipher, 15, 26 +.endm - # sum hash and reduction with H Poly - vsldoi 27, 23, 23, 8 # swap - vpmsumd 23, 23, 2 - vxor 27, 27, 24 - vxor 27, 23, 27 # 1st Xi - - # 2nd 4x hash - vpmsumd 24, 9, 20 - vpmsumd 25, 6, 21 - vpmsumd 26, 3, 22 - vxor 19, 19, 27 # Xi + X - vpmsumd 23, 12, 19 # H4.L * X.L - - vxor 23, 23, 24 - vxor 23, 23, 25 - vxor 23, 23, 26 # L - - vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L - vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L - vpmsumd 26, 7, 21 - vpmsumd 27, 4, 22 - - vxor 24, 24, 25 - vxor 24, 24, 26 +# +# PPC_GHASH4x(H, S1, S2, S3, S4): Compute 4x hash values based on Karatsuba method. +# H: returning digest +# S#: states +# +# S1 should xor with the previous digest +# +# Xi = v0 +# H Poly = v2 +# Hash keys = v3 - v14 +# Scratch: v23 - v29 +# +.macro PPC_GHASH4x H S1 S2 S3 S4 + + vpmsumd 23, 12, \S1 # H4.L * X.L + vpmsumd 24, 9, \S2 + vpmsumd 25, 6, \S3 + vpmsumd 26, 3, \S4 + + vpmsumd 27, 13, \S1 # H4.L * X.H + H4.H * X.L + vpmsumd 28, 10, \S2 # H3.L * X1.H + H3.H * X1.L + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 # L + + vxor 24, 27, 28 + vpmsumd 25, 7, \S3 + vpmsumd 26, 4, \S4 + + vxor 24, 24, 25 + vxor 24, 24, 26 # M # sum hash and reduction with H Poly - vpmsumd 28, 23, 2 # reduction - - vxor 29, 29, 29 + vpmsumd 28, 23, 2 # reduction - vxor 24, 24, 27 # M - vsldoi 26, 24, 29, 8 # mL - vsldoi 29, 29, 24, 8 # mH - vxor 23, 23, 26 # mL + L + vxor 1, 1, 1 + vsldoi 25, 24, 1, 8 # mL + vsldoi 1, 1, 24, 8 # mH + vxor 23, 23, 25 # mL + L - vsldoi 23, 23, 23, 8 # swap - vxor 23, 23, 28 + # This performs swap and xor like, + # vsldoi 23, 23, 23, 8 # swap + # vxor 23, 23, 28 + xxlor 32+25, 10, 10 + vpermxor 23, 23, 28, 25 - vpmsumd 24, 14, 19 # H4.H * X.H - vpmsumd 25, 11, 20 - vpmsumd 26, 8, 21 - vpmsumd 27, 5, 22 + vpmsumd 26, 14, \S1 # H4.H * X.H + vpmsumd 27, 11, \S2 + vpmsumd 28, 8, \S3 + vpmsumd 29, 5, \S4 - vxor 24, 24, 25 - vxor 24, 24, 26 - vxor 24, 24, 27 # H + vxor 24, 26, 27 + vxor 24, 24, 28 + vxor 24, 24, 29 - vxor 24, 24, 29 # H + mH + vxor 24, 24, 1 # sum hash and reduction with H Poly - vsldoi 27, 23, 23, 8 # swap - vpmsumd 23, 23, 2 - vxor 27, 27, 24 - vxor 23, 23, 27 - - xxlor 32, 23+32, 23+32 # update hash - + vsldoi 25, 23, 23, 8 # swap + vpmsumd 23, 23, 2 + vxor 27, 25, 24 + vxor \H, 23, 27 .endm - # - # Compute update single hash - # -.macro ppc_update_hash_1x - vxor 28, 28, 0 - - vxor 19, 19, 19 +# +# Compute update single ghash +# scratch: v1, v22..v27 +# +.macro PPC_GHASH1x H S1 - vpmsumd 22, 3, 28 # L - vpmsumd 23, 4, 28 # M - vpmsumd 24, 5, 28 # H + vxor 1, 1, 1 - vpmsumd 27, 22, 2 # reduction + vpmsumd 22, 3, \S1 # L + vpmsumd 23, 4, \S1 # M + vpmsumd 24, 5, \S1 # H - vsldoi 25, 23, 19, 8 # mL - vsldoi 26, 19, 23, 8 # mH - vxor 22, 22, 25 # LL + LL - vxor 24, 24, 26 # HH + HH + vpmsumd 27, 22, 2 # reduction - vsldoi 22, 22, 22, 8 # swap - vxor 22, 22, 27 + vsldoi 25, 23, 1, 8 # mL + vsldoi 26, 1, 23, 8 # mH + vxor 22, 22, 25 # LL + LL + vxor 24, 24, 26 # HH + HH - vsldoi 20, 22, 22, 8 # swap - vpmsumd 22, 22, 2 # reduction - vxor 20, 20, 24 - vxor 22, 22, 20 + xxlor 32+25, 10, 10 + vpermxor 22, 22, 27, 25 - vmr 0, 22 # update hash - -.endm - -.macro SAVE_REGS - stdu 1,-640(1) - mflr 0 - - std 14,112(1) - std 15,120(1) - std 16,128(1) - std 17,136(1) - std 18,144(1) - std 19,152(1) - std 20,160(1) - std 21,168(1) - li 9, 256 - stvx 20, 9, 1 - addi 9, 9, 16 - stvx 21, 9, 1 - addi 9, 9, 16 - stvx 22, 9, 1 - addi 9, 9, 16 - stvx 23, 9, 1 - addi 9, 9, 16 - stvx 24, 9, 1 - addi 9, 9, 16 - stvx 25, 9, 1 - addi 9, 9, 16 - stvx 26, 9, 1 - addi 9, 9, 16 - stvx 27, 9, 1 - addi 9, 9, 16 - stvx 28, 9, 1 - addi 9, 9, 16 - stvx 29, 9, 1 - addi 9, 9, 16 - stvx 30, 9, 1 - addi 9, 9, 16 - stvx 31, 9, 1 - stxv 14, 464(1) - stxv 15, 480(1) - stxv 16, 496(1) - stxv 17, 512(1) - stxv 18, 528(1) - stxv 19, 544(1) - stxv 20, 560(1) - stxv 21, 576(1) - stxv 22, 592(1) - std 0, 656(1) -.endm - -.macro RESTORE_REGS - lxv 14, 464(1) - lxv 15, 480(1) - lxv 16, 496(1) - lxv 17, 512(1) - lxv 18, 528(1) - lxv 19, 544(1) - lxv 20, 560(1) - lxv 21, 576(1) - lxv 22, 592(1) - li 9, 256 - lvx 20, 9, 1 - addi 9, 9, 16 - lvx 21, 9, 1 - addi 9, 9, 16 - lvx 22, 9, 1 - addi 9, 9, 16 - lvx 23, 9, 1 - addi 9, 9, 16 - lvx 24, 9, 1 - addi 9, 9, 16 - lvx 25, 9, 1 - addi 9, 9, 16 - lvx 26, 9, 1 - addi 9, 9, 16 - lvx 27, 9, 1 - addi 9, 9, 16 - lvx 28, 9, 1 - addi 9, 9, 16 - lvx 29, 9, 1 - addi 9, 9, 16 - lvx 30, 9, 1 - addi 9, 9, 16 - lvx 31, 9, 1 - - ld 0, 656(1) - ld 14,112(1) - ld 15,120(1) - ld 16,128(1) - ld 17,136(1) - ld 18,144(1) - ld 19,152(1) - ld 20,160(1) - ld 21,168(1) - - mtlr 0 - addi 1, 1, 640 + vsldoi 23, 22, 22, 8 # swap + vpmsumd 22, 22, 2 # reduction + vxor 23, 23, 24 + vxor \H, 22, 23 .endm +# +# LOAD_HASH_TABLE +# Xi = v0 +# H Poly = v2 +# Hash keys = v3 - v14 +# .macro LOAD_HASH_TABLE # Load Xi lxvb16x 32, 0, 8 # load Xi @@ -557,657 +298,434 @@ lxvd2x 14+32, 10, 8 # H^4h .endm - # - # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, - # const char *rk, unsigned char iv[16], void *Xip); - # - # r3 - inp - # r4 - out - # r5 - len - # r6 - AES round keys - # r7 - iv and other data - # r8 - Xi, HPoli, hash keys - # - # rounds is at offset 240 in rk - # Xi is at 0 in gcm_table (Xip). - # -_GLOBAL(aes_p10_gcm_encrypt) -.align 5 - - SAVE_REGS - - LOAD_HASH_TABLE - - # initialize ICB: GHASH( IV ), IV - r7 - lxvb16x 30+32, 0, 7 # load IV - v30 - - mr 12, 5 # length - li 11, 0 # block index - - # counter 1 - vxor 31, 31, 31 - vspltisb 22, 1 - vsldoi 31, 31, 22,1 # counter 1 - - # load round key to VSR - lxv 0, 0(6) - lxv 1, 0x10(6) - lxv 2, 0x20(6) - lxv 3, 0x30(6) - lxv 4, 0x40(6) - lxv 5, 0x50(6) - lxv 6, 0x60(6) - lxv 7, 0x70(6) - lxv 8, 0x80(6) - lxv 9, 0x90(6) - lxv 10, 0xa0(6) - - # load rounds - 10 (128), 12 (192), 14 (256) - lwz 9,240(6) - - # - # vxor state, state, w # addroundkey - xxlor 32+29, 0, 0 - vxor 15, 30, 29 # IV + round key - add round key 0 - - cmpdi 9, 10 - beq Loop_aes_gcm_8x - - # load 2 more round keys (v11, v12) - lxv 11, 0xb0(6) - lxv 12, 0xc0(6) - - cmpdi 9, 12 - beq Loop_aes_gcm_8x - - # load 2 more round keys (v11, v12, v13, v14) - lxv 13, 0xd0(6) - lxv 14, 0xe0(6) - cmpdi 9, 14 - beq Loop_aes_gcm_8x - - b aes_gcm_out - -.align 5 -Loop_aes_gcm_8x: - mr 14, 3 - mr 9, 4 - - # - # check partial block - # -Continue_partial_check: - ld 15, 56(7) - cmpdi 15, 0 - beq Continue - bgt Final_block - cmpdi 15, 16 - blt Final_block - -Continue: - # n blcoks - li 10, 128 - divdu 10, 12, 10 # n 128 bytes-blocks - cmpdi 10, 0 - beq Loop_last_block - - vaddudm 30, 30, 31 # IV + counter - vxor 16, 30, 29 - vaddudm 30, 30, 31 - vxor 17, 30, 29 - vaddudm 30, 30, 31 - vxor 18, 30, 29 - vaddudm 30, 30, 31 - vxor 19, 30, 29 - vaddudm 30, 30, 31 - vxor 20, 30, 29 - vaddudm 30, 30, 31 - vxor 21, 30, 29 - vaddudm 30, 30, 31 - vxor 22, 30, 29 - - mtctr 10 - - li 15, 16 - li 16, 32 - li 17, 48 - li 18, 64 - li 19, 80 - li 20, 96 - li 21, 112 - - lwz 10, 240(6) - -Loop_8x_block: - - lxvb16x 15, 0, 14 # load block - lxvb16x 16, 15, 14 # load block - lxvb16x 17, 16, 14 # load block - lxvb16x 18, 17, 14 # load block - lxvb16x 19, 18, 14 # load block - lxvb16x 20, 19, 14 # load block - lxvb16x 21, 20, 14 # load block - lxvb16x 22, 21, 14 # load block - addi 14, 14, 128 - - Loop_aes_middle8x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_next_ghash - - # 192 bits - xxlor 24+32, 11, 11 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - xxlor 23+32, 12, 12 - - cmpdi 10, 12 - beq Do_next_ghash - - # 256 bits - xxlor 24+32, 13, 13 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - xxlor 23+32, 14, 14 - - cmpdi 10, 14 - beq Do_next_ghash - b aes_gcm_out - -Do_next_ghash: - - # - # last round - vcipherlast 15, 15, 23 - vcipherlast 16, 16, 23 - - xxlxor 47, 47, 15 - stxvb16x 47, 0, 9 # store output - xxlxor 48, 48, 16 - stxvb16x 48, 15, 9 # store output - - vcipherlast 17, 17, 23 - vcipherlast 18, 18, 23 - - xxlxor 49, 49, 17 - stxvb16x 49, 16, 9 # store output - xxlxor 50, 50, 18 - stxvb16x 50, 17, 9 # store output - - vcipherlast 19, 19, 23 - vcipherlast 20, 20, 23 - - xxlxor 51, 51, 19 - stxvb16x 51, 18, 9 # store output - xxlxor 52, 52, 20 - stxvb16x 52, 19, 9 # store output - - vcipherlast 21, 21, 23 - vcipherlast 22, 22, 23 - - xxlxor 53, 53, 21 - stxvb16x 53, 20, 9 # store output - xxlxor 54, 54, 22 - stxvb16x 54, 21, 9 # store output - - addi 9, 9, 128 - - # ghash here - ppc_aes_gcm_ghash2_4x - - xxlor 27+32, 0, 0 - vaddudm 30, 30, 31 # IV + counter - vmr 29, 30 - vxor 15, 30, 27 # add round key - vaddudm 30, 30, 31 - vxor 16, 30, 27 - vaddudm 30, 30, 31 - vxor 17, 30, 27 - vaddudm 30, 30, 31 - vxor 18, 30, 27 - vaddudm 30, 30, 31 - vxor 19, 30, 27 - vaddudm 30, 30, 31 - vxor 20, 30, 27 - vaddudm 30, 30, 31 - vxor 21, 30, 27 - vaddudm 30, 30, 31 - vxor 22, 30, 27 - - addi 12, 12, -128 - addi 11, 11, 128 - - bdnz Loop_8x_block - - vmr 30, 29 - stxvb16x 30+32, 0, 7 # update IV - -Loop_last_block: - cmpdi 12, 0 - beq aes_gcm_out - - # loop last few blocks +################################################################################ +# Compute AES and ghash one block at a time. +# r23: AES rounds +# v30: current IV +# vs0: roundkey 0 +# +################################################################################ +SYM_FUNC_START_LOCAL(aes_gcm_crypt_1x) + + cmpdi 5, 16 + bge __More_1x + blr +__More_1x: li 10, 16 - divdu 10, 12, 10 - - mtctr 10 - - lwz 10, 240(6) - - cmpdi 12, 16 - blt Final_block - -Next_rem_block: - lxvb16x 15, 0, 14 # load block - - Loop_aes_middle_1x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_next_1x - - # 192 bits - xxlor 24+32, 11, 11 - - vcipher 15, 15, 23 - vcipher 15, 15, 24 - - xxlor 23+32, 12, 12 + divdu 12, 5, 10 + + xxlxor 32+15, 32+30, 0 + + # Pre-load 8 AES rounds to scratch vectors. + xxlor 32+16, 1, 1 + xxlor 32+17, 2, 2 + xxlor 32+18, 3, 3 + xxlor 32+19, 4, 4 + xxlor 32+20, 5, 5 + xxlor 32+21, 6, 6 + xxlor 32+28, 7, 7 + xxlor 32+29, 8, 8 + lwz 23, 240(6) # n rounds + addi 22, 23, -9 # remaing AES rounds - cmpdi 10, 12 - beq Do_next_1x - - # 256 bits - xxlor 24+32, 13, 13 - - vcipher 15, 15, 23 - vcipher 15, 15, 24 - - xxlor 23+32, 14, 14 - - cmpdi 10, 14 - beq Do_next_1x - -Do_next_1x: - vcipherlast 15, 15, 23 - - xxlxor 47, 47, 15 - stxvb16x 47, 0, 9 # store output - addi 14, 14, 16 - addi 9, 9, 16 - - vmr 28, 15 - ppc_update_hash_1x - - addi 12, 12, -16 - addi 11, 11, 16 - xxlor 19+32, 0, 0 - vaddudm 30, 30, 31 # IV + counter - vxor 15, 30, 19 # add round key - - bdnz Next_rem_block - - li 15, 0 - std 15, 56(7) # clear partial? - stxvb16x 30+32, 0, 7 # update IV cmpdi 12, 0 - beq aes_gcm_out - -Final_block: - lwz 10, 240(6) - Loop_aes_middle_1x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_final_1x - - # 192 bits - xxlor 24+32, 11, 11 - - vcipher 15, 15, 23 - vcipher 15, 15, 24 - - xxlor 23+32, 12, 12 - - cmpdi 10, 12 - beq Do_final_1x - - # 256 bits - xxlor 24+32, 13, 13 - - vcipher 15, 15, 23 - vcipher 15, 15, 24 + bgt __Loop_1x + blr - xxlor 23+32, 14, 14 +__Loop_1x: + mtctr 22 + addi 10, 6, 144 + vcipher 15, 15, 16 + vcipher 15, 15, 17 + vcipher 15, 15, 18 + vcipher 15, 15, 19 + vcipher 15, 15, 20 + vcipher 15, 15, 21 + vcipher 15, 15, 28 + vcipher 15, 15, 29 - cmpdi 10, 14 - beq Do_final_1x +__Loop_aes_1state: + lxv 32+1, 0(10) + vcipher 15, 15, 1 + addi 10, 10, 16 + bdnz __Loop_aes_1state + lxv 32+1, 0(10) # last round key + lxvb16x 11, 0, 14 # load input block + vcipherlast 15, 15, 1 + + xxlxor 32+15, 32+15, 11 + stxvb16x 32+15, 0, 9 # store output + addi 14, 14, 16 + addi 9, 9, 16 -Do_final_1x: - vcipherlast 15, 15, 23 + cmpdi 24, 0 # decrypt? + bne __Encrypt_1x + xxlor 15+32, 11, 11 +__Encrypt_1x: + vxor 15, 15, 0 + PPC_GHASH1x 0, 15 - # check partial block - li 21, 0 # encrypt - ld 15, 56(7) # partial? - cmpdi 15, 0 - beq Normal_block - bl Do_partial_block + addi 5, 5, -16 + addi 11, 11, 16 + vadduwm 30, 30, 31 # IV + counter + xxlxor 32+15, 32+30, 0 + addi 12, 12, -1 cmpdi 12, 0 - ble aes_gcm_out + bgt __Loop_1x - b Continue_partial_check - -Normal_block: - lxvb16x 15, 0, 14 # load last block - xxlxor 47, 47, 15 - - # create partial block mask - li 15, 16 - sub 15, 15, 12 # index to the mask - - vspltisb 16, -1 # first 16 bytes - 0xffff...ff - vspltisb 17, 0 # second 16 bytes - 0x0000...00 - li 10, 192 - stvx 16, 10, 1 + stxvb16x 32+30, 0, 7 # update IV + stxvb16x 32+0, 0, 8 # update Xi + blr +SYM_FUNC_END(aes_gcm_crypt_1x) + +################################################################################ +# Process a normal partial block when we come here. +# Compute partial mask, Load and store partial block to stack. +# Update partial_len and pblock. +# pblock is (encrypted ^ AES state) for encrypt +# and (input ^ AES state) for decrypt. +# +################################################################################ +SYM_FUNC_START_LOCAL(__Process_partial) + + # create partial mask + vspltisb 16, -1 + li 12, 16 + sub 12, 12, 5 + sldi 12, 12, 3 + mtvsrdd 32+17, 0, 12 + vslo 16, 16, 17 # partial block mask + + lxvb16x 11, 0, 14 # load partial block + xxland 11, 11, 32+16 + + # AES crypt partial + xxlxor 32+15, 32+30, 0 + lwz 23, 240(6) # n rounds + addi 22, 23, -1 # loop - 1 + mtctr 22 + addi 10, 6, 16 + +__Loop_aes_pstate: + lxv 32+1, 0(10) + vcipher 15, 15, 1 addi 10, 10, 16 - stvx 17, 10, 1 - - addi 10, 1, 192 - lxvb16x 16, 15, 10 # load partial block mask - xxland 47, 47, 16 - - vmr 28, 15 - ppc_update_hash_1x + bdnz __Loop_aes_pstate + lxv 32+1, 0(10) # last round key + vcipherlast 15, 15, 1 - # * should store only the remaining bytes. - bl Write_partial_block - - stxvb16x 30+32, 0, 7 # update IV - std 12, 56(7) # update partial? - li 16, 16 + xxlxor 32+15, 32+15, 11 + vand 15, 15, 16 - stxvb16x 32, 0, 8 # write out Xi - stxvb16x 32, 16, 8 # write out Xi - b aes_gcm_out - - # - # Compute data mask - # -.macro GEN_MASK _mask _start _end - vspltisb 16, -1 # first 16 bytes - 0xffff...ff - vspltisb 17, 0 # second 16 bytes - 0x0000...00 - li 10, 192 - stxvb16x 17+32, 10, 1 - add 10, 10, \_start - stxvb16x 16+32, 10, 1 - add 10, 10, \_end - stxvb16x 17+32, 10, 1 - - addi 10, 1, 192 - lxvb16x \_mask, 0, 10 # load partial block mask -.endm + # AES crypt output v15 + # Write partial + li 10, 224 + stxvb16x 15+32, 10, 1 # write v15 to stack + addi 10, 1, 223 + addi 12, 9, -1 + mtctr 5 # partial block len +__Write_partial: + lbzu 22, 1(10) + stbu 22, 1(12) + bdnz __Write_partial + + cmpdi 24, 0 # decrypt? + bne __Encrypt_partial + xxlor 32+15, 11, 11 # decrypt using the input block +__Encrypt_partial: + #vxor 15, 15, 0 # ^ previous hash + #PPC_GHASH1x 0, 15 + + add 14, 14, 5 + add 9, 9, 5 + std 5, 56(7) # update partial + sub 11, 11, 5 + li 5, 0 # done last byte - # - # Handle multiple partial blocks for encrypt and decrypt - # operations. - # -SYM_FUNC_START_LOCAL(Do_partial_block) - add 17, 15, 5 - cmpdi 17, 16 - bgt Big_block - GEN_MASK 18, 15, 5 - b _Partial -SYM_FUNC_END(Do_partial_block) -Big_block: + # + # Don't increase IV since this is the last partial. + # It should get updated in gcm_update if no more data blocks. + #vadduwm 30, 30, 31 # increase IV + stxvb16x 32+30, 0, 7 # update IV + li 10, 64 + stxvb16x 32+0, 0, 8 # Update X1 + stxvb16x 32+15, 10, 7 # Update pblock + blr +SYM_FUNC_END(__Process_partial) + +################################################################################ +# Combine partial blocks and ghash when we come here. +# +# The partial block has to be shifted to the right location to encrypt/decrypt +# and compute ghash if combing the previous partial block is needed. +# - Compute ghash for a full block. Clear Partial_len and pblock. Update IV. +# Write Xi. +# - Don't compute ghash if not full block. gcm_update will take care of it +# is the last block. Update Partial_len and pblock. +# +################################################################################ +SYM_FUNC_START_LOCAL(__Combine_partial) + + ld 12, 56(7) + mr 21, 5 # these bytes to be processed + + li 17, 0 li 16, 16 - GEN_MASK 18, 15, 16 - -_Partial: - lxvb16x 17+32, 0, 14 # load last block - sldi 16, 15, 3 - mtvsrdd 32+16, 0, 16 - vsro 17, 17, 16 - xxlxor 47, 47, 17+32 - xxland 47, 47, 18 - - vxor 0, 0, 0 # clear Xi - vmr 28, 15 - - cmpdi 21, 0 # encrypt/decrypt ops? - beq Skip_decrypt - xxland 32+28, 32+17, 18 - -Skip_decrypt: - - ppc_update_hash_1x + sub 22, 16, 12 # bytes to complete a block + sub 17, 22, 5 # remaining bytes in a block + cmpdi 5, 16 + ble __Inp_msg_less16 + li 17, 0 + mr 21, 22 + b __Combine_continue +__Inp_msg_less16: + cmpd 22, 5 + bgt __Combine_continue + li 17, 0 + mr 21, 22 # these bytes to be processed + +__Combine_continue: + # load msg and shift to the proper location and mask + vspltisb 16, -1 + sldi 15, 12, 3 + mtvsrdd 32+17, 0, 15 + vslo 16, 16, 17 + vsro 16, 16, 17 + sldi 15, 17, 3 + mtvsrdd 32+17, 0, 15 + vsro 16, 16, 17 + vslo 16, 16, 17 # mask + + lxvb16x 32+19, 0, 14 # load partial block + sldi 15, 12, 3 + mtvsrdd 32+17, 0, 15 + vsro 19, 19, 17 # 0x00..xxxx??..?? + sldi 15, 17, 3 + mtvsrdd 32+17, 0, 15 + vsro 19, 19, 17 # 0x00..xxxx + vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00 + + # AES crypt partial + xxlxor 32+15, 32+30, 0 + lwz 23, 240(6) # n rounds + addi 22, 23, -1 # loop - 1 + mtctr 22 + addi 10, 6, 16 + +__Loop_aes_cpstate: + lxv 32+1, 0(10) + vcipher 15, 15, 1 + addi 10, 10, 16 + bdnz __Loop_aes_cpstate + lxv 32+1, 0(10) # last round key + vcipherlast 15, 15, 1 - li 16, 16 - lxvb16x 32+29, 16, 8 - vxor 0, 0, 29 - stxvb16x 32, 0, 8 # save Xi - stxvb16x 32, 16, 8 # save Xi - - # store partial block - # loop the rest of the stream if any - sldi 16, 15, 3 - mtvsrdd 32+16, 0, 16 - vslo 15, 15, 16 - #stxvb16x 15+32, 0, 9 # last block + vxor 15, 15, 19 + vand 15, 15, 16 - li 16, 16 - sub 17, 16, 15 # 16 - partial - - add 16, 15, 5 - cmpdi 16, 16 - bgt Larger_16 - mr 17, 5 -Larger_16: - - # write partial - li 10, 192 - stxvb16x 15+32, 10, 1 # save current block - - addi 10, 9, -1 - addi 16, 1, 191 - mtctr 17 # move partial byte count - -Write_last_partial: - lbzu 18, 1(16) - stbu 18, 1(10) - bdnz Write_last_partial - # Complete loop partial - - add 14, 14, 17 - add 9, 9, 17 - sub 12, 12, 17 - add 11, 11, 17 - - add 15, 15, 5 - cmpdi 15, 16 - blt Save_partial - - vaddudm 30, 30, 31 - stxvb16x 30+32, 0, 7 # update IV - xxlor 32+29, 0, 0 - vxor 15, 30, 29 # IV + round key - add round key 0 - li 15, 0 - std 15, 56(7) # partial done - clear - b Partial_done -Save_partial: - std 15, 56(7) # partial - -Partial_done: + # AES crypt output v15 + # Write partial + li 10, 224 + stxvb16x 15+32, 10, 1 # write v15 to stack + addi 10, 1, 223 + add 10, 10, 12 # add offset + addi 15, 9, -1 + mtctr 21 # partial block len +__Write_combine_partial: + lbzu 22, 1(10) + stbu 22, 1(15) + bdnz __Write_combine_partial + + add 14, 14, 21 + add 11, 11, 21 + add 9, 9, 21 + sub 5, 5, 21 + + # Encrypt/Decrypt? + cmpdi 24, 0 # decrypt? + bne __Encrypt_combine_partial + vmr 15, 19 # decrypt using the input block + +__Encrypt_combine_partial: + # + # Update partial flag and combine ghash. +__Update_partial_ghash: + li 10, 64 + lxvb16x 32+17, 10, 7 # load previous pblock + add 12, 12, 21 # combined pprocessed + vxor 15, 15, 17 # combined pblock + + cmpdi 12, 16 + beq __Clear_partial_flag + std 12, 56(7) # update partial len + stxvb16x 32+15, 10, 7 # Update current pblock blr - # - # Write partial block - # r9 - output - # r12 - remaining bytes - # v15 - partial input data - # -SYM_FUNC_START_LOCAL(Write_partial_block) - li 10, 192 - stxvb16x 15+32, 10, 1 # last block - - addi 10, 9, -1 - addi 16, 1, 191 - - mtctr 12 # remaining bytes - li 15, 0 - -Write_last_byte: - lbzu 14, 1(16) - stbu 14, 1(10) - bdnz Write_last_byte +__Clear_partial_flag: + li 12, 0 + std 12, 56(7) + # Update IV and ghash here + vadduwm 30, 30, 31 # increase IV + stxvb16x 32+30, 0, 7 # update IV + + # v15 either is either (input blockor encrypted)^(AES state) + vxor 15, 15, 0 + PPC_GHASH1x 0, 15 + stxvb16x 32+0, 10, 7 # update pblock for debug? + stxvb16x 32+0, 0, 8 # update Xi blr -SYM_FUNC_END(Write_partial_block) +SYM_FUNC_END(__Combine_partial) -aes_gcm_out: - # out = state - stxvb16x 32, 0, 8 # write out Xi - add 3, 11, 12 # return count +################################################################################ +# gcm_update(iv, Xi) - compute last hash +# +################################################################################ +SYM_FUNC_START(gcm_update) - RESTORE_REGS - blr + ld 10, 56(3) + cmpdi 10, 0 + beq __no_update - # - # 8x Decrypt - # -_GLOBAL(aes_p10_gcm_decrypt) -.align 5 + lxvb16x 32, 0, 4 # load Xi + # load Hash - h^4, h^3, h^2, h + li 10, 32 + lxvd2x 2+32, 10, 4 # H Poli + li 10, 48 + lxvd2x 3+32, 10, 4 # Hl + li 10, 64 + lxvd2x 4+32, 10, 4 # H + li 10, 80 + lxvd2x 5+32, 10, 4 # Hh + + addis 11, 2, permx@toc@ha + addi 11, 11, permx@toc@l + lxv 10, 0(11) # vs10: vpermxor vector + + li 9, 64 + lxvb16x 32+6, 9, 3 # load pblock + vxor 6, 6, 0 + + vxor 1, 1, 1 + vpmsumd 12, 3, 6 # L + vpmsumd 13, 4, 6 # M + vpmsumd 14, 5, 6 # H + vpmsumd 17, 12, 2 # reduction + vsldoi 15, 13, 1, 8 # mL + vsldoi 16, 1, 13, 8 # mH + vxor 12, 12, 15 # LL + LL + vxor 14, 14, 16 # HH + HH + xxlor 32+15, 10, 10 + vpermxor 12, 12, 17, 15 + vsldoi 13, 12, 12, 8 # swap + vpmsumd 12, 12, 2 # reduction + vxor 13, 13, 14 + vxor 7, 12, 13 + + #vxor 0, 0, 0 + #stxvb16x 32+0, 9, 3 + li 10, 0 + std 10, 56(3) + stxvb16x 32+7, 0, 4 + +__no_update: + blr +SYM_FUNC_END(gcm_update) + +################################################################################ +# aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, +# const char *rk, unsigned char iv[16], void *Xip); +# +# r3 - inp +# r4 - out +# r5 - len +# r6 - AES round keys +# r7 - iv and other data +# r8 - Xi, HPoli, hash keys +# +# rounds is at offset 240 in rk +# Xi is at 0 in gcm_table (Xip). +# +################################################################################ +SYM_FUNC_START(aes_p10_gcm_encrypt) + + cmpdi 5, 0 + ble __Invalid_msg_len SAVE_REGS - LOAD_HASH_TABLE # initialize ICB: GHASH( IV ), IV - r7 lxvb16x 30+32, 0, 7 # load IV - v30 - mr 12, 5 # length - li 11, 0 # block index + mr 14, 3 + mr 9, 4 # counter 1 vxor 31, 31, 31 vspltisb 22, 1 vsldoi 31, 31, 22,1 # counter 1 - # load round key to VSR - lxv 0, 0(6) - lxv 1, 0x10(6) - lxv 2, 0x20(6) - lxv 3, 0x30(6) - lxv 4, 0x40(6) - lxv 5, 0x50(6) - lxv 6, 0x60(6) - lxv 7, 0x70(6) - lxv 8, 0x80(6) - lxv 9, 0x90(6) - lxv 10, 0xa0(6) + addis 11, 2, permx@toc@ha + addi 11, 11, permx@toc@l + lxv 10, 0(11) # vs10: vpermxor vector + li 11, 0 + + # load 9 round keys to VSR + lxv 0, 0(6) # round key 0 + lxv 1, 16(6) # round key 1 + lxv 2, 32(6) # round key 2 + lxv 3, 48(6) # round key 3 + lxv 4, 64(6) # round key 4 + lxv 5, 80(6) # round key 5 + lxv 6, 96(6) # round key 6 + lxv 7, 112(6) # round key 7 + lxv 8, 128(6) # round key 8 # load rounds - 10 (128), 12 (192), 14 (256) - lwz 9,240(6) + lwz 23, 240(6) # n rounds + li 24, 1 # encrypt +__Process_encrypt: # - # vxor state, state, w # addroundkey - xxlor 32+29, 0, 0 - vxor 15, 30, 29 # IV + round key - add round key 0 - - cmpdi 9, 10 - beq Loop_aes_gcm_8x_dec - - # load 2 more round keys (v11, v12) - lxv 11, 0xb0(6) - lxv 12, 0xc0(6) - - cmpdi 9, 12 - beq Loop_aes_gcm_8x_dec - - # load 2 more round keys (v11, v12, v13, v14) - lxv 13, 0xd0(6) - lxv 14, 0xe0(6) - cmpdi 9, 14 - beq Loop_aes_gcm_8x_dec + # Process different blocks + # + ld 12, 56(7) + cmpdi 12, 0 + bgt __Do_combine_enc + cmpdi 5, 128 + blt __Process_more_enc + +# +# Process 8x AES/GCM blocks +# +__Process_8x_enc: + # 8x blcoks + li 10, 128 + divdu 12, 5, 10 # n 128 bytes-blocks - b aes_gcm_out + addi 12, 12, -1 # loop - 1 -.align 5 -Loop_aes_gcm_8x_dec: - mr 14, 3 - mr 9, 4 + vmr 15, 30 # first state: IV + vadduwm 16, 15, 31 # state + counter + vadduwm 17, 16, 31 + vadduwm 18, 17, 31 + vadduwm 19, 18, 31 + vadduwm 20, 19, 31 + vadduwm 21, 20, 31 + vadduwm 22, 21, 31 + xxlor 9, 32+22, 32+22 # save last state - # - # check partial block - # -Continue_partial_check_dec: - ld 15, 56(7) - cmpdi 15, 0 - beq Continue_dec - bgt Final_block_dec - cmpdi 15, 16 - blt Final_block_dec - -Continue_dec: - # n blcoks - li 10, 128 - divdu 10, 12, 10 # n 128 bytes-blocks - cmpdi 10, 0 - beq Loop_last_block_dec - - vaddudm 30, 30, 31 # IV + counter - vxor 16, 30, 29 - vaddudm 30, 30, 31 - vxor 17, 30, 29 - vaddudm 30, 30, 31 - vxor 18, 30, 29 - vaddudm 30, 30, 31 - vxor 19, 30, 29 - vaddudm 30, 30, 31 - vxor 20, 30, 29 - vaddudm 30, 30, 31 - vxor 21, 30, 29 - vaddudm 30, 30, 31 - vxor 22, 30, 29 - - mtctr 10 + # vxor state, state, w # addroundkey + xxlor 32+29, 0, 0 + vxor 15, 15, 29 # IV + round key - add round key 0 + vxor 16, 16, 29 + vxor 17, 17, 29 + vxor 18, 18, 29 + vxor 19, 19, 29 + vxor 20, 20, 29 + vxor 21, 21, 29 + vxor 22, 22, 29 li 15, 16 li 16, 32 @@ -1217,305 +735,502 @@ Continue_dec: li 20, 96 li 21, 112 - lwz 10, 240(6) - -Loop_8x_block_dec: - - lxvb16x 15, 0, 14 # load block - lxvb16x 16, 15, 14 # load block - lxvb16x 17, 16, 14 # load block - lxvb16x 18, 17, 14 # load block - lxvb16x 19, 18, 14 # load block - lxvb16x 20, 19, 14 # load block - lxvb16x 21, 20, 14 # load block - lxvb16x 22, 21, 14 # load block - addi 14, 14, 128 - - Loop_aes_middle8x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_next_ghash_dec - - # 192 bits - xxlor 24+32, 11, 11 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - xxlor 23+32, 12, 12 - - cmpdi 10, 12 - beq Do_next_ghash_dec - - # 256 bits - xxlor 24+32, 13, 13 - - vcipher 15, 15, 23 - vcipher 16, 16, 23 - vcipher 17, 17, 23 - vcipher 18, 18, 23 - vcipher 19, 19, 23 - vcipher 20, 20, 23 - vcipher 21, 21, 23 - vcipher 22, 22, 23 - - vcipher 15, 15, 24 - vcipher 16, 16, 24 - vcipher 17, 17, 24 - vcipher 18, 18, 24 - vcipher 19, 19, 24 - vcipher 20, 20, 24 - vcipher 21, 21, 24 - vcipher 22, 22, 24 - - xxlor 23+32, 14, 14 - - cmpdi 10, 14 - beq Do_next_ghash_dec - b aes_gcm_out + # + # Pre-compute first 8 AES state and leave 1/3/5 more rounds + # for the loop. + # + addi 22, 23, -9 # process 8 keys + mtctr 22 # AES key loop + addi 10, 6, 144 -Do_next_ghash_dec: + LOOP_8AES_STATE # process 8 AES keys - # - # last round - vcipherlast 15, 15, 23 - vcipherlast 16, 16, 23 - - xxlxor 47, 47, 15 - stxvb16x 47, 0, 9 # store output - xxlxor 48, 48, 16 - stxvb16x 48, 15, 9 # store output - - vcipherlast 17, 17, 23 - vcipherlast 18, 18, 23 - - xxlxor 49, 49, 17 - stxvb16x 49, 16, 9 # store output - xxlxor 50, 50, 18 - stxvb16x 50, 17, 9 # store output - - vcipherlast 19, 19, 23 - vcipherlast 20, 20, 23 - - xxlxor 51, 51, 19 - stxvb16x 51, 18, 9 # store output - xxlxor 52, 52, 20 - stxvb16x 52, 19, 9 # store output - - vcipherlast 21, 21, 23 - vcipherlast 22, 22, 23 - - xxlxor 53, 53, 21 - stxvb16x 53, 20, 9 # store output - xxlxor 54, 54, 22 - stxvb16x 54, 21, 9 # store output - - addi 9, 9, 128 - - xxlor 15+32, 15, 15 - xxlor 16+32, 16, 16 - xxlor 17+32, 17, 17 - xxlor 18+32, 18, 18 - xxlor 19+32, 19, 19 - xxlor 20+32, 20, 20 - xxlor 21+32, 21, 21 - xxlor 22+32, 22, 22 +__PreLoop_aes_state: + lxv 32+1, 0(10) # round key + AES_CIPHER_8x vcipher 15 1 + addi 10, 10, 16 + bdnz __PreLoop_aes_state + lxv 32+1, 0(10) # last round key (v1) + + cmpdi 12, 0 # Only one loop (8 block) + beq __Finish_ghash + +# +# Loop 8x blocks and compute ghash +# +__Loop_8x_block_enc: + vcipherlast 15, 15, 1 + vcipherlast 16, 16, 1 + vcipherlast 17, 17, 1 + vcipherlast 18, 18, 1 + vcipherlast 19, 19, 1 + vcipherlast 20, 20, 1 + vcipherlast 21, 21, 1 + vcipherlast 22, 22, 1 + + lxvb16x 32+23, 0, 14 # load block + lxvb16x 32+24, 15, 14 # load block + lxvb16x 32+25, 16, 14 # load block + lxvb16x 32+26, 17, 14 # load block + lxvb16x 32+27, 18, 14 # load block + lxvb16x 32+28, 19, 14 # load block + lxvb16x 32+29, 20, 14 # load block + lxvb16x 32+30, 21, 14 # load block + addi 14, 14, 128 + + vxor 15, 15, 23 + vxor 16, 16, 24 + vxor 17, 17, 25 + vxor 18, 18, 26 + vxor 19, 19, 27 + vxor 20, 20, 28 + vxor 21, 21, 29 + vxor 22, 22, 30 + + stxvb16x 47, 0, 9 # store output + stxvb16x 48, 15, 9 # store output + stxvb16x 49, 16, 9 # store output + stxvb16x 50, 17, 9 # store output + stxvb16x 51, 18, 9 # store output + stxvb16x 52, 19, 9 # store output + stxvb16x 53, 20, 9 # store output + stxvb16x 54, 21, 9 # store output + addi 9, 9, 128 # ghash here - ppc_aes_gcm_ghash2_4x - - xxlor 27+32, 0, 0 - vaddudm 30, 30, 31 # IV + counter - vmr 29, 30 - vxor 15, 30, 27 # add round key - vaddudm 30, 30, 31 - vxor 16, 30, 27 - vaddudm 30, 30, 31 - vxor 17, 30, 27 - vaddudm 30, 30, 31 - vxor 18, 30, 27 - vaddudm 30, 30, 31 - vxor 19, 30, 27 - vaddudm 30, 30, 31 - vxor 20, 30, 27 - vaddudm 30, 30, 31 - vxor 21, 30, 27 - vaddudm 30, 30, 31 - vxor 22, 30, 27 - - addi 12, 12, -128 + vxor 15, 15, 0 + PPC_GHASH4x 0, 15, 16, 17, 18 + + vxor 19, 19, 0 + PPC_GHASH4x 0, 19, 20, 21, 22 + + xxlor 32+15, 9, 9 # last state + vadduwm 15, 15, 31 # state + counter + vadduwm 16, 15, 31 + vadduwm 17, 16, 31 + vadduwm 18, 17, 31 + vadduwm 19, 18, 31 + vadduwm 20, 19, 31 + vadduwm 21, 20, 31 + vadduwm 22, 21, 31 + xxlor 9, 32+22, 32+22 # save last state + + xxlor 32+27, 0, 0 # restore roundkey 0 + vxor 15, 15, 27 # IV + round key - add round key 0 + vxor 16, 16, 27 + vxor 17, 17, 27 + vxor 18, 18, 27 + vxor 19, 19, 27 + vxor 20, 20, 27 + vxor 21, 21, 27 + vxor 22, 22, 27 + + addi 5, 5, -128 addi 11, 11, 128 - bdnz Loop_8x_block_dec - - vmr 30, 29 - stxvb16x 30+32, 0, 7 # update IV - -Loop_last_block_dec: - cmpdi 12, 0 - beq aes_gcm_out - - # loop last few blocks - li 10, 16 - divdu 10, 12, 10 - - mtctr 10 - - lwz 10, 240(6) - - cmpdi 12, 16 - blt Final_block_dec - -Next_rem_block_dec: - lxvb16x 15, 0, 14 # load block - - Loop_aes_middle_1x - - xxlor 23+32, 10, 10 + LOOP_8AES_STATE # process 8 AES keys + mtctr 22 # AES key loop + addi 10, 6, 144 +__LastLoop_aes_state: + lxv 32+1, 0(10) # round key + AES_CIPHER_8x vcipher 15 1 + addi 10, 10, 16 + bdnz __LastLoop_aes_state + lxv 32+1, 0(10) # last round key (v1) - cmpdi 10, 10 - beq Do_next_1x_dec + addi 12, 12, -1 + cmpdi 12, 0 + bne __Loop_8x_block_enc + +__Finish_ghash: + vcipherlast 15, 15, 1 + vcipherlast 16, 16, 1 + vcipherlast 17, 17, 1 + vcipherlast 18, 18, 1 + vcipherlast 19, 19, 1 + vcipherlast 20, 20, 1 + vcipherlast 21, 21, 1 + vcipherlast 22, 22, 1 + + lxvb16x 32+23, 0, 14 # load block + lxvb16x 32+24, 15, 14 # load block + lxvb16x 32+25, 16, 14 # load block + lxvb16x 32+26, 17, 14 # load block + lxvb16x 32+27, 18, 14 # load block + lxvb16x 32+28, 19, 14 # load block + lxvb16x 32+29, 20, 14 # load block + lxvb16x 32+30, 21, 14 # load block + addi 14, 14, 128 + + vxor 15, 15, 23 + vxor 16, 16, 24 + vxor 17, 17, 25 + vxor 18, 18, 26 + vxor 19, 19, 27 + vxor 20, 20, 28 + vxor 21, 21, 29 + vxor 22, 22, 30 + + stxvb16x 47, 0, 9 # store output + stxvb16x 48, 15, 9 # store output + stxvb16x 49, 16, 9 # store output + stxvb16x 50, 17, 9 # store output + stxvb16x 51, 18, 9 # store output + stxvb16x 52, 19, 9 # store output + stxvb16x 53, 20, 9 # store output + stxvb16x 54, 21, 9 # store output + addi 9, 9, 128 + + vxor 15, 15, 0 + PPC_GHASH4x 0, 15, 16, 17, 18 + + vxor 19, 19, 0 + PPC_GHASH4x 0, 19, 20, 21, 22 + + xxlor 30+32, 9, 9 # last ctr + vadduwm 30, 30, 31 # increase ctr + stxvb16x 32+30, 0, 7 # update IV + stxvb16x 32+0, 0, 8 # update Xi + + addi 5, 5, -128 + addi 11, 11, 128 - # 192 bits - xxlor 24+32, 11, 11 + # + # Done 8x blocks + # - vcipher 15, 15, 23 - vcipher 15, 15, 24 + cmpdi 5, 0 + beq aes_gcm_out - xxlor 23+32, 12, 12 +__Process_more_enc: + li 24, 1 # encrypt + bl aes_gcm_crypt_1x + cmpdi 5, 0 + beq aes_gcm_out - cmpdi 10, 12 - beq Do_next_1x_dec + bl __Process_partial + cmpdi 5, 0 + beq aes_gcm_out +__Do_combine_enc: + bl __Combine_partial + cmpdi 5, 0 + bgt __Process_encrypt + b aes_gcm_out - # 256 bits - xxlor 24+32, 13, 13 +SYM_FUNC_END(aes_p10_gcm_encrypt) - vcipher 15, 15, 23 - vcipher 15, 15, 24 +################################################################################ +# aes_p10_gcm_decrypt (const void *inp, void *out, size_t len, +# const char *rk, unsigned char iv[16], void *Xip); +# 8x Decrypt +# +################################################################################ +SYM_FUNC_START(aes_p10_gcm_decrypt) - xxlor 23+32, 14, 14 + cmpdi 5, 0 + ble __Invalid_msg_len - cmpdi 10, 14 - beq Do_next_1x_dec + SAVE_REGS + LOAD_HASH_TABLE -Do_next_1x_dec: - vcipherlast 15, 15, 23 + # initialize ICB: GHASH( IV ), IV - r7 + lxvb16x 30+32, 0, 7 # load IV - v30 - xxlxor 47, 47, 15 - stxvb16x 47, 0, 9 # store output - addi 14, 14, 16 - addi 9, 9, 16 + mr 14, 3 + mr 9, 4 - xxlor 28+32, 15, 15 - #vmr 28, 15 - ppc_update_hash_1x + # counter 1 + vxor 31, 31, 31 + vspltisb 22, 1 + vsldoi 31, 31, 22,1 # counter 1 - addi 12, 12, -16 - addi 11, 11, 16 - xxlor 19+32, 0, 0 - vaddudm 30, 30, 31 # IV + counter - vxor 15, 30, 19 # add round key + addis 11, 2, permx@toc@ha + addi 11, 11, permx@toc@l + lxv 10, 0(11) # vs10: vpermxor vector + li 11, 0 + + # load 9 round keys to VSR + lxv 0, 0(6) # round key 0 + lxv 1, 16(6) # round key 1 + lxv 2, 32(6) # round key 2 + lxv 3, 48(6) # round key 3 + lxv 4, 64(6) # round key 4 + lxv 5, 80(6) # round key 5 + lxv 6, 96(6) # round key 6 + lxv 7, 112(6) # round key 7 + lxv 8, 128(6) # round key 8 - bdnz Next_rem_block_dec + # load rounds - 10 (128), 12 (192), 14 (256) + lwz 23, 240(6) # n rounds + li 24, 0 # decrypt - li 15, 0 - std 15, 56(7) # clear partial? - stxvb16x 30+32, 0, 7 # update IV +__Process_decrypt: + # + # Process different blocks + # + ld 12, 56(7) cmpdi 12, 0 - beq aes_gcm_out - -Final_block_dec: - lwz 10, 240(6) - Loop_aes_middle_1x - - xxlor 23+32, 10, 10 - - cmpdi 10, 10 - beq Do_final_1x_dec + bgt __Do_combine_dec + cmpdi 5, 128 + blt __Process_more_dec + +# +# Process 8x AES/GCM blocks +# +__Process_8x_dec: + # 8x blcoks + li 10, 128 + divdu 12, 5, 10 # n 128 bytes-blocks - # 192 bits - xxlor 24+32, 11, 11 + addi 12, 12, -1 # loop - 1 - vcipher 15, 15, 23 - vcipher 15, 15, 24 + vmr 15, 30 # first state: IV + vadduwm 16, 15, 31 # state + counter + vadduwm 17, 16, 31 + vadduwm 18, 17, 31 + vadduwm 19, 18, 31 + vadduwm 20, 19, 31 + vadduwm 21, 20, 31 + vadduwm 22, 21, 31 + xxlor 9, 32+22, 32+22 # save last state - xxlor 23+32, 12, 12 + # vxor state, state, w # addroundkey + xxlor 32+29, 0, 0 + vxor 15, 15, 29 # IV + round key - add round key 0 + vxor 16, 16, 29 + vxor 17, 17, 29 + vxor 18, 18, 29 + vxor 19, 19, 29 + vxor 20, 20, 29 + vxor 21, 21, 29 + vxor 22, 22, 29 - cmpdi 10, 12 - beq Do_final_1x_dec + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 - # 256 bits - xxlor 24+32, 13, 13 + # + # Pre-compute first 8 AES state and leave 1/3/5 more rounds + # for the loop. + # + addi 22, 23, -9 # process 8 keys + mtctr 22 # AES key loop + addi 10, 6, 144 - vcipher 15, 15, 23 - vcipher 15, 15, 24 + LOOP_8AES_STATE # process 8 AES keys - xxlor 23+32, 14, 14 +__PreLoop_aes_state_dec: + lxv 32+1, 0(10) # round key + AES_CIPHER_8x vcipher 15 1 + addi 10, 10, 16 + bdnz __PreLoop_aes_state_dec + lxv 32+1, 0(10) # last round key (v1) + + cmpdi 12, 0 # Only one loop (8 block) + beq __Finish_ghash_dec + +# +# Loop 8x blocks and compute ghash +# +__Loop_8x_block_dec: + vcipherlast 15, 15, 1 + vcipherlast 16, 16, 1 + vcipherlast 17, 17, 1 + vcipherlast 18, 18, 1 + vcipherlast 19, 19, 1 + vcipherlast 20, 20, 1 + vcipherlast 21, 21, 1 + vcipherlast 22, 22, 1 + + lxvb16x 32+23, 0, 14 # load block + lxvb16x 32+24, 15, 14 # load block + lxvb16x 32+25, 16, 14 # load block + lxvb16x 32+26, 17, 14 # load block + lxvb16x 32+27, 18, 14 # load block + lxvb16x 32+28, 19, 14 # load block + lxvb16x 32+29, 20, 14 # load block + lxvb16x 32+30, 21, 14 # load block + addi 14, 14, 128 + + vxor 15, 15, 23 + vxor 16, 16, 24 + vxor 17, 17, 25 + vxor 18, 18, 26 + vxor 19, 19, 27 + vxor 20, 20, 28 + vxor 21, 21, 29 + vxor 22, 22, 30 + + stxvb16x 47, 0, 9 # store output + stxvb16x 48, 15, 9 # store output + stxvb16x 49, 16, 9 # store output + stxvb16x 50, 17, 9 # store output + stxvb16x 51, 18, 9 # store output + stxvb16x 52, 19, 9 # store output + stxvb16x 53, 20, 9 # store output + stxvb16x 54, 21, 9 # store output + + addi 9, 9, 128 + + vmr 15, 23 + vmr 16, 24 + vmr 17, 25 + vmr 18, 26 + vmr 19, 27 + vmr 20, 28 + vmr 21, 29 + vmr 22, 30 - cmpdi 10, 14 - beq Do_final_1x_dec + # ghash here + vxor 15, 15, 0 + PPC_GHASH4x 0, 15, 16, 17, 18 + + vxor 19, 19, 0 + PPC_GHASH4x 0, 19, 20, 21, 22 + + xxlor 32+15, 9, 9 # last state + vadduwm 15, 15, 31 # state + counter + vadduwm 16, 15, 31 + vadduwm 17, 16, 31 + vadduwm 18, 17, 31 + vadduwm 19, 18, 31 + vadduwm 20, 19, 31 + vadduwm 21, 20, 31 + vadduwm 22, 21, 31 + xxlor 9, 32+22, 32+22 # save last state + + xxlor 32+27, 0, 0 # restore roundkey 0 + vxor 15, 15, 27 # IV + round key - add round key 0 + vxor 16, 16, 27 + vxor 17, 17, 27 + vxor 18, 18, 27 + vxor 19, 19, 27 + vxor 20, 20, 27 + vxor 21, 21, 27 + vxor 22, 22, 27 + + addi 5, 5, -128 + addi 11, 11, 128 -Do_final_1x_dec: - vcipherlast 15, 15, 23 + LOOP_8AES_STATE # process 8 AES keys + mtctr 22 # AES key loop + addi 10, 6, 144 +__LastLoop_aes_state_dec: + lxv 32+1, 0(10) # round key + AES_CIPHER_8x vcipher 15 1 + addi 10, 10, 16 + bdnz __LastLoop_aes_state_dec + lxv 32+1, 0(10) # last round key (v1) - # check partial block - li 21, 1 # decrypt - ld 15, 56(7) # partial? - cmpdi 15, 0 - beq Normal_block_dec - bl Do_partial_block + addi 12, 12, -1 cmpdi 12, 0 - ble aes_gcm_out - - b Continue_partial_check_dec + bne __Loop_8x_block_dec + +__Finish_ghash_dec: + vcipherlast 15, 15, 1 + vcipherlast 16, 16, 1 + vcipherlast 17, 17, 1 + vcipherlast 18, 18, 1 + vcipherlast 19, 19, 1 + vcipherlast 20, 20, 1 + vcipherlast 21, 21, 1 + vcipherlast 22, 22, 1 + + lxvb16x 32+23, 0, 14 # load block + lxvb16x 32+24, 15, 14 # load block + lxvb16x 32+25, 16, 14 # load block + lxvb16x 32+26, 17, 14 # load block + lxvb16x 32+27, 18, 14 # load block + lxvb16x 32+28, 19, 14 # load block + lxvb16x 32+29, 20, 14 # load block + lxvb16x 32+30, 21, 14 # load block + addi 14, 14, 128 + + vxor 15, 15, 23 + vxor 16, 16, 24 + vxor 17, 17, 25 + vxor 18, 18, 26 + vxor 19, 19, 27 + vxor 20, 20, 28 + vxor 21, 21, 29 + vxor 22, 22, 30 + + stxvb16x 47, 0, 9 # store output + stxvb16x 48, 15, 9 # store output + stxvb16x 49, 16, 9 # store output + stxvb16x 50, 17, 9 # store output + stxvb16x 51, 18, 9 # store output + stxvb16x 52, 19, 9 # store output + stxvb16x 53, 20, 9 # store output + stxvb16x 54, 21, 9 # store output + addi 9, 9, 128 + + #vmr 15, 23 + vxor 15, 23, 0 + vmr 16, 24 + vmr 17, 25 + vmr 18, 26 + vmr 19, 27 + vmr 20, 28 + vmr 21, 29 + vmr 22, 30 + + #vxor 15, 15, 0 + PPC_GHASH4x 0, 15, 16, 17, 18 + + vxor 19, 19, 0 + PPC_GHASH4x 0, 19, 20, 21, 22 + + xxlor 30+32, 9, 9 # last ctr + vadduwm 30, 30, 31 # increase ctr + stxvb16x 32+30, 0, 7 # update IV + stxvb16x 32+0, 0, 8 # update Xi + + addi 5, 5, -128 + addi 11, 11, 128 -Normal_block_dec: - lxvb16x 15, 0, 14 # load last block - xxlxor 47, 47, 15 + # + # Done 8x blocks + # - # create partial block mask - li 15, 16 - sub 15, 15, 12 # index to the mask + cmpdi 5, 0 + beq aes_gcm_out - vspltisb 16, -1 # first 16 bytes - 0xffff...ff - vspltisb 17, 0 # second 16 bytes - 0x0000...00 - li 10, 192 - stvx 16, 10, 1 - addi 10, 10, 16 - stvx 17, 10, 1 +__Process_more_dec: + li 24, 0 # decrypt + bl aes_gcm_crypt_1x + cmpdi 5, 0 + beq aes_gcm_out - addi 10, 1, 192 - lxvb16x 16, 15, 10 # load partial block mask - xxland 47, 47, 16 + bl __Process_partial + cmpdi 5, 0 + beq aes_gcm_out +__Do_combine_dec: + bl __Combine_partial + cmpdi 5, 0 + bgt __Process_decrypt + b aes_gcm_out +SYM_FUNC_END(aes_p10_gcm_decrypt) - xxland 32+28, 15, 16 - #vmr 28, 15 - ppc_update_hash_1x +SYM_FUNC_START_LOCAL(aes_gcm_out) - # * should store only the remaining bytes. - bl Write_partial_block + mr 3, 11 # return count - stxvb16x 30+32, 0, 7 # update IV - std 12, 56(7) # update partial? - li 16, 16 + RESTORE_REGS + blr - stxvb16x 32, 0, 8 # write out Xi - stxvb16x 32, 16, 8 # write out Xi - b aes_gcm_out +__Invalid_msg_len: + li 3, 0 + blr +SYM_FUNC_END(aes_gcm_out) + +SYM_DATA_START_LOCAL(PERMX) +.align 4 +# for vector permute and xor +permx: +.long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3 +SYM_DATA_END(permx) diff --git a/arch/powerpc/crypto/vmx.c b/arch/powerpc/crypto/vmx.c index 7eb713cc87c8..0b725e826388 100644 --- a/arch/powerpc/crypto/vmx.c +++ b/arch/powerpc/crypto/vmx.c @@ -74,4 +74,4 @@ MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions " "support on Power 8"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); -MODULE_IMPORT_NS(CRYPTO_INTERNAL); +MODULE_IMPORT_NS("CRYPTO_INTERNAL"); diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 201218faed61..29a529d2ab8b 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -193,6 +193,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000) #define CPU_FTR_DAWR1 LONG_ASM_CONST(0x0008000000000000) #define CPU_FTR_DEXCR_NPHIE LONG_ASM_CONST(0x0010000000000000) +#define CPU_FTR_P11_PVR LONG_ASM_CONST(0x0020000000000000) #ifndef __ASSEMBLY__ @@ -454,7 +455,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DAWR | CPU_FTR_DAWR1 | \ CPU_FTR_DEXCR_NPHIE) -#define CPU_FTRS_POWER11 CPU_FTRS_POWER10 +#define CPU_FTRS_POWER11 (CPU_FTRS_POWER10 | CPU_FTR_P11_PVR) #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ @@ -475,7 +476,7 @@ static inline void cpu_feature_keys_init(void) { } (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \ CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | \ - CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10) + CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10 | CPU_FTRS_POWER11) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ @@ -483,7 +484,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \ CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \ CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | \ - CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10) + CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10 | CPU_FTRS_POWER11) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif #else @@ -547,7 +548,7 @@ enum { (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & ~CPU_FTR_DBELL & \ CPU_FTRS_POWER7 & CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & \ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \ - CPU_FTRS_POWER10 & CPU_FTRS_DT_CPU_BASE) + CPU_FTRS_POWER10 & CPU_FTRS_POWER11 & CPU_FTRS_DT_CPU_BASE) #else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ @@ -555,7 +556,7 @@ enum { CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ ~CPU_FTR_HVMODE & ~CPU_FTR_DBELL & CPU_FTRS_POSSIBLE & \ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \ - CPU_FTRS_POWER10 & CPU_FTRS_DT_CPU_BASE) + CPU_FTRS_POWER10 & CPU_FTRS_POWER11 & CPU_FTRS_DT_CPU_BASE) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif #else diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h index d6f43d149f8d..a5c21bc623cb 100644 --- a/arch/powerpc/include/asm/dtl.h +++ b/arch/powerpc/include/asm/dtl.h @@ -1,8 +1,8 @@ #ifndef _ASM_POWERPC_DTL_H #define _ASM_POWERPC_DTL_H +#include <linux/rwsem.h> #include <asm/lppaca.h> -#include <linux/spinlock_types.h> /* * Layout of entries in the hypervisor's dispatch trace log buffer. @@ -35,7 +35,7 @@ struct dtl_entry { #define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) extern struct kmem_cache *dtl_cache; -extern rwlock_t dtl_access_lock; +extern struct rw_semaphore dtl_access_lock; extern void register_dtl_buffer(int cpu); extern void alloc_dtl_buffers(unsigned long *time_limit); diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index ef40c9b6972a..a48f54dde4f6 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -19,6 +19,7 @@ extern int is_fadump_active(void); extern int should_fadump_crash(void); extern void crash_fadump(struct pt_regs *, const char *); extern void fadump_cleanup(void); +void fadump_setup_param_area(void); extern void fadump_append_bootargs(void); #else /* CONFIG_FA_DUMP */ @@ -26,6 +27,7 @@ static inline int is_fadump_active(void) { return 0; } static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } static inline void fadump_cleanup(void) { } +static inline void fadump_setup_param_area(void) { } static inline void fadump_append_bootargs(void) { } #endif /* !CONFIG_FA_DUMP */ @@ -34,4 +36,11 @@ extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data); extern int fadump_reserve_mem(void); #endif + +#if defined(CONFIG_FA_DUMP) && defined(CONFIG_CMA) +void fadump_cma_init(void); +#else +static inline void fadump_cma_init(void) { } +#endif + #endif /* _ASM_POWERPC_FADUMP_H */ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 559560286e6d..db481b336bca 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -24,7 +24,10 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, struct module; struct dyn_ftrace; struct dyn_arch_ftrace { - struct module *mod; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* pointer to the associated out-of-line stub */ + unsigned long ool_stub; +#endif }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -32,42 +35,21 @@ struct dyn_arch_ftrace { int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -struct ftrace_regs { - struct pt_regs regs; -}; +#include <linux/ftrace_regs.h> static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { /* We clear regs.msr in ftrace_call */ - return fregs->regs.msr ? &fregs->regs : NULL; + return arch_ftrace_regs(fregs)->regs.msr ? &arch_ftrace_regs(fregs)->regs : NULL; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - regs_set_return_ip(&fregs->regs, ip); + regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip); } -static __always_inline unsigned long -ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) -{ - return instruction_pointer(&fregs->regs); -} - -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) - struct ftrace_ops; #define ftrace_graph_func ftrace_graph_func @@ -131,8 +113,36 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } #ifdef CONFIG_FUNCTION_TRACER extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +struct ftrace_ool_stub { +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + struct ftrace_ops *ftrace_op; +#endif + u32 insn[4]; +} __aligned(sizeof(unsigned long)); +extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_text[], + ftrace_ool_stub_inittext[]; +extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count, + ftrace_ool_stub_inittext_count; +#endif void ftrace_free_init_tramp(void); unsigned long ftrace_call_adjust(unsigned long addr); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When an ftrace registered caller is tracing a function that is also set by a + * register_ftrace_direct() call, it needs to be differentiated in the + * ftrace_caller trampoline so that the direct call can be invoked after the + * other ftrace ops. To do this, place the direct caller in the orig_gpr3 field + * of pt_regs. This tells ftrace_caller that there's a direct caller. + */ +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) +{ + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; + + regs->orig_gpr3 = addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #else static inline void ftrace_free_init_tramp(void) { } static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 7a8495660c2f..65d1f291393d 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -495,6 +495,7 @@ #define H_GUEST_CAP_COPY_MEM (1UL<<(63-0)) #define H_GUEST_CAP_POWER9 (1UL<<(63-1)) #define H_GUEST_CAP_POWER10 (1UL<<(63-2)) +#define H_GUEST_CAP_POWER11 (1UL<<(63-3)) #define H_GUEST_CAP_BITMAP2 (1UL<<(63-63)) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 52e1b1d15ff6..fd92ac450169 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -970,18 +970,6 @@ static inline void * phys_to_virt(unsigned long address) #define phys_to_virt phys_to_virt /* - * Change "struct page" to physical address. - */ -static inline phys_addr_t page_to_phys(struct page *page) -{ - unsigned long pfn = page_to_pfn(page); - - WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && !pfn_valid(pfn)); - - return PFN_PHYS(pfn); -} - -/* * 32 bits still uses virt_to_bus() for its implementation of DMA * mappings se we have to keep it defined here. We also have some old * drivers (shame shame shame) that use bus_to_virt() and haven't been diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h index fab124ada1c7..1f7cab58ab2c 100644 --- a/arch/powerpc/include/asm/kfence.h +++ b/arch/powerpc/include/asm/kfence.h @@ -15,7 +15,7 @@ #define ARCH_FUNC_PREFIX "." #endif -#ifdef CONFIG_KFENCE +extern bool kfence_early_init; extern bool kfence_disabled; static inline void disable_kfence(void) @@ -27,7 +27,11 @@ static inline bool arch_kfence_init_pool(void) { return !kfence_disabled; } -#endif + +static inline bool kfence_early_init_enabled(void) +{ + return IS_ENABLED(CONFIG_KFENCE) && kfence_early_init; +} #ifdef CONFIG_PPC64 static inline bool kfence_protect_page(unsigned long addr, bool protect) diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 4525a9c68260..dfe2e5ad3b21 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -21,7 +21,7 @@ #include <linux/percpu.h> #include <linux/module.h> #include <asm/probes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #ifdef CONFIG_KPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 10618622d7ef..e1ff291ba891 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -203,7 +203,7 @@ extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, unsigned long gpa, struct kvm_memory_slot *memslot, - bool writing, bool kvm_ro, + bool writing, pte_t *inserted_pte, unsigned int *levelp); extern int kvmppc_init_vm_radix(struct kvm *kvm); extern void kvmppc_free_radix(struct kvm *kvm); @@ -235,7 +235,7 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); extern int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu); extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, - bool writing, bool *writable); + bool writing, bool *writable, struct page **page); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); extern void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 2ef9a5f4e5d1..b936e174eefd 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -684,10 +684,16 @@ int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1); int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu); int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa); -int kmvhv_counters_tracepoint_regfunc(void); -void kmvhv_counters_tracepoint_unregfunc(void); +int kvmhv_counters_tracepoint_regfunc(void); +void kvmhv_counters_tracepoint_unregfunc(void); int kvmhv_get_l2_counters_status(void); void kvmhv_set_l2_counters_status(int cpu, bool status); +u64 kvmhv_get_l1_to_l2_cs_time(void); +u64 kvmhv_get_l2_to_l1_cs_time(void); +u64 kvmhv_get_l2_runtime_agg(void); +u64 kvmhv_get_l1_to_l2_cs_time_vcpu(void); +u64 kvmhv_get_l2_to_l1_cs_time_vcpu(void); +u64 kvmhv_get_l2_runtime_agg_vcpu(void); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 37e581c5b201..6e1108f8fce6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -871,6 +871,11 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ #endif #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + u64 l1_to_l2_cs; + u64 l2_to_l1_cs; + u64 l2_runtime_agg; +#endif }; #define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET] diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 1862f94335ee..3298eec123a3 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -4,20 +4,24 @@ #ifdef __KERNEL__ #include <linux/compiler.h> -#include <linux/seq_file.h> #include <linux/init.h> -#include <linux/dma-mapping.h> #include <linux/export.h> +#include <linux/time64.h> + +#include <asm/page.h> struct pt_regs; struct pci_bus; +struct device; struct device_node; struct iommu_table; struct rtc_time; struct file; +struct pci_dev; struct pci_controller; struct kimage; struct pci_host_bridge; +struct seq_file; struct machdep_calls { const char *name; diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 300c777cc307..e1ee5026ac4a 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -35,9 +35,11 @@ struct mod_arch_specific { bool toc_fixed; /* Have we fixed up .TOC.? */ #endif +#ifdef CONFIG_PPC64_ELF_ABI_V1 /* For module function descriptor dereference */ unsigned long start_opd; unsigned long end_opd; +#endif #else /* powerpc64 */ /* Indices of PLT sections within module. */ unsigned int core_plt_section; @@ -47,6 +49,11 @@ struct mod_arch_specific { #ifdef CONFIG_DYNAMIC_FTRACE unsigned long tramp; unsigned long tramp_regs; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + struct ftrace_ool_stub *ool_stubs; + unsigned int ool_stub_count; + unsigned int ool_stub_index; +#endif #endif }; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 83d0a4fc5f75..af9a2628d1df 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -21,8 +21,7 @@ * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#define PAGE_SHIFT CONFIG_PAGE_SHIFT -#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#include <vdso/page.h> #ifndef __ASSEMBLY__ #ifndef CONFIG_HUGETLB_PAGE @@ -42,13 +41,6 @@ extern unsigned int hpage_shift; #endif /* - * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we - * assign PAGE_MASK to a larger type it gets extended the way we want - * (i.e. with 1s in the high bits) - */ -#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) - -/* * KERNELBASE is the virtual address of the start of the kernel, it's often * the same as PAGE_OFFSET, but _might not be_. * diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 5995614e9062..af0f46e2373b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -102,8 +102,8 @@ struct power_pmu { int __init register_power_pmu(struct power_pmu *pmu); struct pt_regs; -extern unsigned long perf_misc_flags(struct pt_regs *regs); -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_arch_misc_flags(struct pt_regs *regs); +extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs); extern unsigned long int read_bhrb(int n); /* @@ -111,7 +111,7 @@ extern unsigned long int read_bhrb(int n); * if we have hardware PMU support. */ #ifdef CONFIG_PPC_PERF_CTRS -#define perf_misc_flags(regs) perf_misc_flags(regs) +#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs) #endif /* diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index b98a9e982c03..4312bcb913a4 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -587,12 +587,26 @@ #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) #define PPC_RAW_EIEIO() (0x7c0006ac) +/* bcl 20,31,$+4 */ +#define PPC_RAW_BCL4() (0x429f0005) #define PPC_RAW_BRANCH(offset) (0x48000000 | PPC_LI(offset)) #define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset)) #define PPC_RAW_TW(t0, a, b) (0x7c000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0) #define PPC_RAW_SETB(t, bfa) (0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2)) +#ifdef CONFIG_PPC32 +#define PPC_RAW_STL PPC_RAW_STW +#define PPC_RAW_STLU PPC_RAW_STWU +#define PPC_RAW_LL PPC_RAW_LWZ +#define PPC_RAW_CMPLI PPC_RAW_CMPWI +#else +#define PPC_RAW_STL PPC_RAW_STD +#define PPC_RAW_STLU PPC_RAW_STDU +#define PPC_RAW_LL PPC_RAW_LD +#define PPC_RAW_CMPLI PPC_RAW_CMPDI +#endif + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index 9a025b776a4b..9c8d5747755d 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -12,37 +12,37 @@ int change_memory_attr(unsigned long addr, int numpages, long action); -static inline int set_memory_ro(unsigned long addr, int numpages) +static inline int __must_check set_memory_ro(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_RO); } -static inline int set_memory_rw(unsigned long addr, int numpages) +static inline int __must_check set_memory_rw(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_RW); } -static inline int set_memory_nx(unsigned long addr, int numpages) +static inline int __must_check set_memory_nx(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_NX); } -static inline int set_memory_x(unsigned long addr, int numpages) +static inline int __must_check set_memory_x(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_X); } -static inline int set_memory_np(unsigned long addr, int numpages) +static inline int __must_check set_memory_np(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_NP); } -static inline int set_memory_p(unsigned long addr, int numpages) +static inline int __must_check set_memory_p(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_P); } -static inline int set_memory_rox(unsigned long addr, int numpages) +static inline int __must_check set_memory_rox(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_ROX); } diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h index 08243338069d..391fc19f7272 100644 --- a/arch/powerpc/include/asm/simple_spinlock_types.h +++ b/arch/powerpc/include/asm/simple_spinlock_types.h @@ -3,7 +3,7 @@ #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif typedef struct { diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 40b01446cf75..569765fa16bc 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -3,7 +3,7 @@ #define _ASM_POWERPC_SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #ifdef CONFIG_PPC_QUEUED_SPINLOCKS diff --git a/arch/powerpc/include/asm/spu_priv1.h b/arch/powerpc/include/asm/spu_priv1.h index 2167d756e6d5..6fee411d973d 100644 --- a/arch/powerpc/include/asm/spu_priv1.h +++ b/arch/powerpc/include/asm/spu_priv1.h @@ -216,7 +216,6 @@ spu_disable_spu (struct spu_context *ctx) */ extern const struct spu_priv1_ops spu_priv1_mmio_ops; -extern const struct spu_priv1_ops spu_priv1_beat_ops; extern const struct spu_management_ops spu_management_of_ops; diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 50950deedb87..e3d0e714ff28 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -173,9 +173,4 @@ int emulate_step(struct pt_regs *regs, ppc_inst_t instr); */ extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op); -extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem, bool cross_endian); -extern void emulate_vsx_store(struct instruction_op *op, - const union vsx_reg *reg, void *mem, - bool cross_endian); extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/systemcfg.h b/arch/powerpc/include/asm/systemcfg.h new file mode 100644 index 000000000000..2f9b1d6a5c98 --- /dev/null +++ b/arch/powerpc/include/asm/systemcfg.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _SYSTEMCFG_H +#define _SYSTEMCFG_H + +/* + * Copyright (C) 2002 Peter Bergner <[email protected]>, IBM + * Copyright (C) 2005 Benjamin Herrenschmidy <[email protected]>, + * IBM Corp. + */ + +#ifdef CONFIG_PPC64 + +/* + * If the major version changes we are incompatible. + * Minor version changes are a hint. + */ +#define SYSTEMCFG_MAJOR 1 +#define SYSTEMCFG_MINOR 1 + +#include <linux/types.h> + +struct systemcfg { + __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ + struct { /* Systemcfg version numbers */ + __u32 major; /* Major number 0x10 */ + __u32 minor; /* Minor number 0x14 */ + } version; + + /* Note about the platform flags: it now only contains the lpar + * bit. The actual platform number is dead and buried + */ + __u32 platform; /* Platform flags 0x18 */ + __u32 processor; /* Processor type 0x1C */ + __u64 processorCount; /* # of physical processors 0x20 */ + __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ + __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */ + __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ + __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */ + __u64 stamp_xsec; /* (NU) 0x48 */ + __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */ + __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */ + __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */ + __u32 dcache_size; /* L1 d-cache size 0x60 */ + __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ + __u32 icache_size; /* L1 i-cache size 0x68 */ + __u32 icache_line_size; /* L1 i-cache line size 0x6C */ +}; + +extern struct systemcfg *systemcfg; + +#endif /* CONFIG_PPC64 */ +#endif /* _SYSTEMCFG_H */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/text-patching.h index e7f14720f630..e7f14720f630 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/text-patching.h diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 289023f7a656..a8681b12864f 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -38,7 +38,6 @@ void __init udbg_early_init(void); void __init udbg_init_debug_lpar(void); void __init udbg_init_debug_lpar_hvsi(void); void __init udbg_init_pmac_realmode(void); -void __init udbg_init_maple_realmode(void); void __init udbg_init_pas_realmode(void); void __init udbg_init_rtas_panel(void); void __init udbg_init_rtas_console(void); diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 7650b6ce14c8..8d972bc98b55 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -25,6 +25,7 @@ int vdso_getcpu_init(void); #ifdef __VDSO64__ #define V_FUNCTION_BEGIN(name) \ .globl name; \ + .type name,@function; \ name: \ #define V_FUNCTION_END(name) \ diff --git a/arch/powerpc/include/asm/vdso/getrandom.h b/arch/powerpc/include/asm/vdso/getrandom.h index 501d6bb14e8a..80ce0709725e 100644 --- a/arch/powerpc/include/asm/vdso/getrandom.h +++ b/arch/powerpc/include/asm/vdso/getrandom.h @@ -7,6 +7,8 @@ #ifndef __ASSEMBLY__ +#include <asm/vdso_datapage.h> + static __always_inline int do_syscall_3(const unsigned long _r0, const unsigned long _r3, const unsigned long _r4, const unsigned long _r5) { @@ -43,11 +45,21 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig static __always_inline struct vdso_rng_data *__arch_get_vdso_rng_data(void) { - return NULL; + struct vdso_arch_data *data; + + asm ( + " bcl 20, 31, .+4 ;" + "0: mflr %0 ;" + " addis %0, %0, (_vdso_datapage - 0b)@ha ;" + " addi %0, %0, (_vdso_datapage - 0b)@l ;" + : "=r" (data) : : "lr" + ); + + return &data->rng_data; } ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, - size_t opaque_len, const struct vdso_rng_data *vd); + size_t opaque_len); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h index 92f480d8cc6d..48560a119559 100644 --- a/arch/powerpc/include/asm/vdso/vsyscall.h +++ b/arch/powerpc/include/asm/vdso/vsyscall.h @@ -4,12 +4,8 @@ #ifndef __ASSEMBLY__ -#include <linux/timekeeper_internal.h> #include <asm/vdso_datapage.h> -/* - * Update the vDSO data page to keep in sync with kernel timekeeping. - */ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) { diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 248dee138f7b..a202f5b63479 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -9,29 +9,6 @@ * IBM Corp. */ - -/* - * Note about this structure: - * - * This structure was historically called systemcfg and exposed to - * userland via /proc/ppc64/systemcfg. Unfortunately, this became an - * ABI issue as some proprietary software started relying on being able - * to mmap() it, thus we have to keep the base layout at least for a - * few kernel versions. - * - * However, since ppc32 doesn't suffer from this backward handicap, - * a simpler version of the data structure is used there with only the - * fields actually used by the vDSO. - * - */ - -/* - * If the major version changes we are incompatible. - * Minor version changes are a hint. - */ -#define SYSTEMCFG_MAJOR 1 -#define SYSTEMCFG_MINOR 1 - #ifndef __ASSEMBLY__ #include <linux/unistd.h> @@ -40,41 +17,10 @@ #define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) -/* - * So here is the ppc64 backward compatible version - */ - #ifdef CONFIG_PPC64 struct vdso_arch_data { - __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */ - struct { /* Systemcfg version numbers */ - __u32 major; /* Major number 0x10 */ - __u32 minor; /* Minor number 0x14 */ - } version; - - /* Note about the platform flags: it now only contains the lpar - * bit. The actual platform number is dead and buried - */ - __u32 platform; /* Platform flags 0x18 */ - __u32 processor; /* Processor type 0x1C */ - __u64 processorCount; /* # of physical processors 0x20 */ - __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */ - __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */ - __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ - __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */ - __u64 stamp_xsec; /* (NU) 0x48 */ - __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */ - __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */ - __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */ - __u32 dcache_size; /* L1 d-cache size 0x60 */ - __u32 dcache_line_size; /* L1 d-cache line size 0x64 */ - __u32 icache_size; /* L1 i-cache size 0x68 */ - __u32 icache_line_size; /* L1 i-cache line size 0x6C */ - - /* those additional ones don't have to be located anywhere - * special as they were not part of the original systemcfg - */ + __u64 tb_ticks_per_sec; /* Timebase tics / sec */ __u32 dcache_block_size; /* L1 d-cache block size */ __u32 icache_block_size; /* L1 i-cache block size */ __u32 dcache_log_block_size; /* L1 d-cache log block size */ @@ -82,21 +28,20 @@ struct vdso_arch_data { __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */ - struct vdso_data data[CS_BASES]; struct vdso_rng_data rng_data; + + struct vdso_data data[CS_BASES] __aligned(1 << CONFIG_PAGE_SHIFT); }; #else /* CONFIG_PPC64 */ -/* - * And here is the simpler 32 bits version - */ struct vdso_arch_data { - __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ + __u64 tb_ticks_per_sec; /* Timebase tics / sec */ __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ - struct vdso_data data[CS_BASES]; struct vdso_rng_data rng_data; + + struct vdso_data data[CS_BASES] __aligned(1 << CONFIG_PAGE_SHIFT); }; #endif /* CONFIG_PPC64 */ @@ -105,29 +50,17 @@ extern struct vdso_arch_data *vdso_data; #else /* __ASSEMBLY__ */ -.macro get_datapage ptr +.macro get_datapage ptr offset=0 bcl 20, 31, .+4 999: mflr \ptr - addis \ptr, \ptr, (_vdso_datapage - 999b)@ha - addi \ptr, \ptr, (_vdso_datapage - 999b)@l + addis \ptr, \ptr, (_vdso_datapage - 999b + \offset)@ha + addi \ptr, \ptr, (_vdso_datapage - 999b + \offset)@l .endm #include <asm/asm-offsets.h> #include <asm/page.h> -.macro get_realdatapage ptr scratch - get_datapage \ptr -#ifdef CONFIG_TIME_NS - lwz \scratch, VDSO_CLOCKMODE_OFFSET(\ptr) - xoris \scratch, \scratch, VDSO_CLOCKMODE_TIMENS@h - xori \scratch, \scratch, VDSO_CLOCKMODE_TIMENS@l - cntlzw \scratch, \scratch - rlwinm \scratch, \scratch, PAGE_SHIFT - 5, 1 << PAGE_SHIFT - add \ptr, \ptr, \scratch -#endif -.endm - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h index fcf721682a71..f2dc40e1c52a 100644 --- a/arch/powerpc/include/asm/vga.h +++ b/arch/powerpc/include/asm/vga.h @@ -40,11 +40,6 @@ static inline void scr_memsetw(u16 *s, u16 v, unsigned int n) memset16(s, cpu_to_le16(v), n / 2); } -#define VT_BUF_HAVE_MEMCPYW -#define VT_BUF_HAVE_MEMMOVEW -#define scr_memcpyw memcpy -#define scr_memmovew memmove - #endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */ #ifdef __powerpc64__ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 131a8cc10dbe..7a390bd4f4af 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -335,7 +335,6 @@ int main(void) /* datapage offsets for use by vdso */ OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); - OFFSET(VDSO_RNG_DATA_OFFSET, vdso_arch_data, rng_data); OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); @@ -347,8 +346,6 @@ int main(void) #else OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map); #endif - OFFSET(VDSO_CLOCKMODE_OFFSET, vdso_arch_data, data[0].clock_mode); - DEFINE(VDSO_CLOCKMODE_TIMENS, VDSO_CLOCKMODE_TIMENS); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); @@ -597,7 +594,6 @@ int main(void) HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); - DEFINE(IPI_PRIORITY, IPI_PRIORITY); OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr); OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar); OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); @@ -677,5 +673,16 @@ int main(void) DEFINE(BPT_SIZE, BPT_SIZE); #endif +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub)); +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + OFFSET(FTRACE_OPS_DIRECT_CALL, ftrace_ops, direct_call); +#endif +#endif + return 0; } diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 2086fa6cdc25..103b6605dd68 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,7 +13,7 @@ #include <linux/io.h> #include <linux/memblock.h> #include <linux/of.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/kdump.h> #include <asm/firmware.h> #include <linux/uio.h> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index af4263594eb2..1bee15c013e7 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -867,7 +867,7 @@ bool __init dt_cpu_ftrs_init(void *fdt) using_dt_cpu_ftrs = false; /* Setup and verify the FDT, if it fails we just bail */ - if (!early_init_dt_verify(fdt)) + if (!early_init_dt_verify(fdt, __pa(fdt))) return false; if (!of_scan_flat_dt(fdt_find_cpu_features, NULL)) diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index d4b8aff20815..247ab2acaccc 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -9,7 +9,7 @@ #include <linux/of_fdt.h> #include <asm/epapr_hcalls.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/machdep.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index a612e7513a4f..4b371c738213 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -78,26 +78,38 @@ static struct cma *fadump_cma; * But for some reason even if it fails we still have the memory reservation * with us and we can still continue doing fadump. */ -static int __init fadump_cma_init(void) +void __init fadump_cma_init(void) { - unsigned long long base, size; + unsigned long long base, size, end; int rc; - if (!fw_dump.fadump_enabled) - return 0; - + if (!fw_dump.fadump_supported || !fw_dump.fadump_enabled || + fw_dump.dump_active) + return; /* * Do not use CMA if user has provided fadump=nocma kernel parameter. - * Return 1 to continue with fadump old behaviour. */ - if (fw_dump.nocma) - return 1; + if (fw_dump.nocma || !fw_dump.boot_memory_size) + return; + /* + * [base, end) should be reserved during early init in + * fadump_reserve_mem(). No need to check this here as + * cma_init_reserved_mem() already checks for overlap. + * Here we give the aligned chunk of this reserved memory to CMA. + */ base = fw_dump.reserve_dump_area_start; size = fw_dump.boot_memory_size; + end = base + size; - if (!size) - return 0; + base = ALIGN(base, CMA_MIN_ALIGNMENT_BYTES); + end = ALIGN_DOWN(end, CMA_MIN_ALIGNMENT_BYTES); + size = end - base; + + if (end <= base) { + pr_warn("%s: Too less memory to give to CMA\n", __func__); + return; + } rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); if (rc) { @@ -108,7 +120,7 @@ static int __init fadump_cma_init(void) * blocked from production system usage. Hence return 1, * so that we can continue with fadump. */ - return 1; + return; } /* @@ -120,15 +132,13 @@ static int __init fadump_cma_init(void) /* * So we now have successfully initialized cma area for fadump. */ - pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " + pr_info("Initialized [0x%llx, %luMB] cma area from [0x%lx, %luMB] " "bytes of memory reserved for firmware-assisted dump\n", - cma_get_size(fadump_cma), - (unsigned long)cma_get_base(fadump_cma) >> 20, - fw_dump.reserve_dump_area_size); - return 1; + cma_get_base(fadump_cma), cma_get_size(fadump_cma) >> 20, + fw_dump.reserve_dump_area_start, + fw_dump.boot_memory_size >> 20); + return; } -#else -static int __init fadump_cma_init(void) { return 1; } #endif /* CONFIG_CMA */ /* @@ -143,7 +153,7 @@ void __init fadump_append_bootargs(void) if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) return; - if (fw_dump.param_area >= fw_dump.boot_mem_top) { + if (fw_dump.param_area < fw_dump.boot_mem_top) { if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { pr_warn("WARNING: Can't use additional parameters area!\n"); fw_dump.param_area = 0; @@ -558,13 +568,6 @@ int __init fadump_reserve_mem(void) if (!fw_dump.dump_active) { fw_dump.boot_memory_size = PAGE_ALIGN(fadump_calculate_reserve_size()); -#ifdef CONFIG_CMA - if (!fw_dump.nocma) { - fw_dump.boot_memory_size = - ALIGN(fw_dump.boot_memory_size, - CMA_MIN_ALIGNMENT_BYTES); - } -#endif bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); if (fw_dump.boot_memory_size < bootmem_min) { @@ -637,8 +640,6 @@ int __init fadump_reserve_mem(void) pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n", (size >> 20), base, (memblock_phys_mem_size() >> 20)); - - ret = fadump_cma_init(); } return ret; @@ -1586,6 +1587,12 @@ static void __init fadump_init_files(void) return; } + if (fw_dump.param_area) { + rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr); + if (rc) + pr_err("unable to create bootargs_append sysfs file (%d)\n", rc); + } + debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, &fadump_region_fops); @@ -1740,7 +1747,7 @@ err_out: * Reserve memory to store additional parameters to be passed * for fadump/capture kernel. */ -static void __init fadump_setup_param_area(void) +void __init fadump_setup_param_area(void) { phys_addr_t range_start, range_end; @@ -1748,7 +1755,7 @@ static void __init fadump_setup_param_area(void) return; /* This memory can't be used by PFW or bootloader as it is shared across kernels */ - if (radix_enabled()) { + if (early_radix_enabled()) { /* * Anywhere in the upper half should be good enough as all memory * is accessible in real mode. @@ -1776,12 +1783,12 @@ static void __init fadump_setup_param_area(void) COMMAND_LINE_SIZE, range_start, range_end); - if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) { + if (!fw_dump.param_area) { pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); return; } - memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE); + memset((void *)fw_dump.param_area, 0, COMMAND_LINE_SIZE); } /* @@ -1807,7 +1814,6 @@ int __init setup_fadump(void) } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { - fadump_setup_param_area(); fw_dump.ops->fadump_init_mem_struct(&fw_dump); register_fadump(); } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2e1600a8bbbb..a0e8b998c9b5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -89,69 +89,69 @@ int arch_show_interrupts(struct seq_file *p, int prec) #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized) { - seq_printf(p, "%*s: ", prec, "TAU"); + seq_printf(p, "%*s:", prec, "TAU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); + seq_put_decimal_ull_width(p, " ", tau_interrupts(j), 10); seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); } #endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_event, 10); seq_printf(p, " Local timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "BCT"); + seq_printf(p, "%*s:", prec, "BCT"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).broadcast_irqs_event, 10); seq_printf(p, " Broadcast timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_others, 10); seq_printf(p, " Local timer interrupts for others\n"); - seq_printf(p, "%*s: ", prec, "SPU"); + seq_printf(p, "%*s:", prec, "SPU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).spurious_irqs, 10); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "PMI"); + seq_printf(p, "%*s:", prec, "PMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).pmu_irqs, 10); seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "MCE"); + seq_printf(p, "%*s:", prec, "MCE"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).mce_exceptions, 10); seq_printf(p, " Machine check exceptions\n"); #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { - seq_printf(p, "%*s: ", prec, "HMI"); + seq_printf(p, "%*s:", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); + seq_put_decimal_ull_width(p, " ", paca_ptrs[j]->hmi_irqs, 10); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } #endif - seq_printf(p, "%*s: ", prec, "NMI"); + seq_printf(p, "%*s:", prec, "NMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).sreset_irqs, 10); seq_printf(p, " System Reset interrupts\n"); #ifdef CONFIG_PPC_WATCHDOG - seq_printf(p, "%*s: ", prec, "WDG"); + seq_printf(p, "%*s:", prec, "WDG"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).soft_nmi_irqs, 10); seq_printf(p, " Watchdog soft-NMI interrupts\n"); #endif #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { - seq_printf(p, "%*s: ", prec, "DBL"); + seq_printf(p, "%*s:", prec, "DBL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).doorbell_irqs, 10); seq_printf(p, " Doorbell interrupts\n"); } #endif diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 5277cf582c16..2659e1ac8604 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -5,7 +5,7 @@ #include <linux/kernel.h> #include <linux/jump_label.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> void arch_jump_label_transform(struct jump_entry *entry, diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7a8bc03a00af..5081334b7bd2 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -21,7 +21,7 @@ #include <asm/processor.h> #include <asm/machdep.h> #include <asm/debug.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/slab.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index f8aa91bc3b17..c0d9f12cb441 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -21,7 +21,7 @@ #include <linux/slab.h> #include <linux/set_memory.h> #include <linux/execmem.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> @@ -105,24 +105,22 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } -static bool arch_kprobe_on_func_entry(unsigned long offset) +static bool arch_kprobe_on_func_entry(unsigned long addr, unsigned long offset) { -#ifdef CONFIG_PPC64_ELF_ABI_V2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else + unsigned long ip = ftrace_location(addr); + + if (ip) + return offset <= (ip - addr); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return offset <= 8; return !offset; -#endif } /* XXX try and fold the magic of kprobe_lookup_name() in this */ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - *on_func_entry = arch_kprobe_on_func_entry(offset); + *on_func_entry = arch_kprobe_on_func_entry(addr, offset); return (kprobe_opcode_t *)(addr + offset); } diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 91123e102db4..a997c7f43dc0 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -74,7 +74,7 @@ _GLOBAL(rmci_off) blr #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +#ifdef CONFIG_PPC_PMAC /* * Do an IO access in real mode @@ -137,7 +137,7 @@ _GLOBAL(real_writeb) sync isync blr -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ +#endif // CONFIG_PPC_PMAC #ifdef CONFIG_PPC_PASEMI @@ -174,7 +174,7 @@ _GLOBAL(real_205_writeb) #endif /* CONFIG_PPC_PASEMI */ -#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE) +#ifdef CONFIG_CPU_FREQ_PMAC64 /* * SCOM access functions for 970 (FX only for now) * @@ -243,7 +243,7 @@ _GLOBAL(scom970_write) /* restore interrupts */ mtmsrd r5,1 blr -#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */ +#endif // CONFIG_CPU_FREQ_PMAC64 /* kexec_wait(phys_cpu) * diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 816a63fd71fb..f930e3395a7f 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,7 +18,7 @@ #include <linux/bug.h> #include <linux/sort.h> #include <asm/setup.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> /* Count how many different relocations (different symbol, different addend) */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index e9bab599d0c2..45dac7b46aa3 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -17,7 +17,7 @@ #include <linux/kernel.h> #include <asm/module.h> #include <asm/firmware.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/sort.h> #include <asm/setup.h> #include <asm/sections.h> @@ -205,7 +205,9 @@ static int relacmp(const void *_x, const void *_y) /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs) + const Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) { /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; @@ -241,13 +243,25 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, } } -#ifdef CONFIG_DYNAMIC_FTRACE - /* make the trampoline to the ftrace_caller */ - relocs++; -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - /* an additional one for ftrace_regs_caller */ - relocs++; -#endif + /* stubs for ftrace_caller and ftrace_regs_caller */ + relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS); + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* stubs for the function tracer */ + for (i = 1; i < hdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, "__patchable_function_entries")) { + me->arch.ool_stub_count = sechdrs[i].sh_size / sizeof(unsigned long); + me->arch.ool_stub_index = 0; + relocs += roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / + sizeof(struct ppc64_stub_entry); + break; + } + } + if (i == hdr->e_shnum) { + pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); + return -ENOEXEC; + } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); @@ -460,7 +474,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, #endif /* Override the stubs size */ - sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs, secstrings, me); return 0; } @@ -1085,6 +1099,37 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return 0; } +static int setup_ftrace_ool_stubs(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + unsigned int i, total_stubs, num_stubs; + struct ppc64_stub_entry *stub; + + total_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stub); + num_stubs = roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / sizeof(struct ppc64_stub_entry); + + /* Find the next available entry */ + stub = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; stub_func_addr(stub[i].funcdata); i++) + if (WARN_ON(i >= total_stubs)) + return -1; + + if (WARN_ON(i + num_stubs > total_stubs)) + return -1; + + stub += i; + me->arch.ool_stubs = (struct ftrace_ool_stub *)stub; + + /* reserve stubs */ + for (i = 0; i < num_stubs; i++) + if (patch_u32((void *)&stub->funcdata, PPC_RAW_NOP())) + return -1; +#endif + + return 0; +} + int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { mod->arch.tramp = stub_for_addr(sechdrs, @@ -1103,6 +1148,9 @@ int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) if (!mod->arch.tramp) return -ENOENT; + if (setup_ftrace_ool_stubs(sechdrs, mod->arch.tramp, mod)) + return -ENOENT; + return 0; } #endif diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index c0b351d61058..2e83702bf9ba 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -13,7 +13,7 @@ #include <asm/kprobes.h> #include <asm/ptrace.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index b109cd7b5d01..3816a2bf2b84 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -4,6 +4,7 @@ */ #include <linux/init.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/kernel.h> @@ -12,9 +13,10 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> +#include <asm/systemcfg.h> #include <linux/uaccess.h> -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG static loff_t page_map_seek(struct file *file, loff_t off, int whence) { @@ -33,10 +35,9 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) return -EINVAL; - remap_pfn_range(vma, vma->vm_start, - __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot); - return 0; + return remap_pfn_range(vma, vma->vm_start, + __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); } static const struct proc_ops page_map_proc_ops = { @@ -45,13 +46,35 @@ static const struct proc_ops page_map_proc_ops = { .proc_mmap = page_map_mmap, }; +static union { + struct systemcfg data; + u8 page[PAGE_SIZE]; +} systemcfg_data_store __page_aligned_data; +struct systemcfg *systemcfg = &systemcfg_data_store.data; static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; + strcpy((char *)systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + systemcfg->version.major = SYSTEMCFG_MAJOR; + systemcfg->version.minor = SYSTEMCFG_MINOR; + systemcfg->processor = mfspr(SPRN_PVR); + /* + * Fake the old platform number for pSeries and add + * in LPAR bit if necessary + */ + systemcfg->platform = 0x100; + if (firmware_has_feature(FW_FEATURE_LPAR)) + systemcfg->platform |= 1; + systemcfg->physicalMemorySize = memblock_phys_mem_size(); + systemcfg->dcache_size = ppc64_caches.l1d.size; + systemcfg->dcache_line_size = ppc64_caches.l1d.line_size; + systemcfg->icache_size = ppc64_caches.l1i.size; + systemcfg->icache_line_size = ppc64_caches.l1i.line_size; + pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, - &page_map_proc_ops, vdso_data); + &page_map_proc_ops, systemcfg); if (!pde) return 1; proc_set_size(pde, PAGE_SIZE); @@ -60,7 +83,7 @@ static int __init proc_ppc64_init(void) } __initcall(proc_ppc64_init); -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC64_PROC_SYSTEMCFG */ /* * Create the ppc64 and ppc64/rtas directories early. This allows us to diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ff61a3e7984c..7b739b9a91ab 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -54,7 +54,7 @@ #include <asm/firmware.h> #include <asm/hw_irq.h> #endif -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/exec.h> #include <asm/livepatch.h> #include <asm/cpu_has_feature.h> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 0be07ed407c7..e0059842a1c6 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -791,7 +791,7 @@ void __init early_init_devtree(void *params) DBG(" -> early_init_devtree(%px)\n", params); /* Too early to BUG_ON(), do it by hand */ - if (!early_init_dt_verify(params)) + if (!early_init_dt_verify(params, __pa(params))) panic("BUG: Failed verifying flat device tree, bad version?"); of_scan_flat_dt(early_init_dt_scan_model, NULL); @@ -908,6 +908,9 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); + /* Setup param area for passing additional parameters to fadump capture kernel. */ + fadump_setup_param_area(); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index fbb68fc28ed3..8e776ba39497 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2792,90 +2792,6 @@ static void __init flatten_device_tree(void) dt_struct_start, dt_struct_end); } -#ifdef CONFIG_PPC_MAPLE -/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property. - * The values are bad, and it doesn't even have the right number of cells. */ -static void __init fixup_device_tree_maple(void) -{ - phandle isa; - u32 rloc = 0x01002000; /* IO space; PCI device = 4 */ - u32 isa_ranges[6]; - char *name; - - name = "/ht@0/isa@4"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(isa)) { - name = "/ht@0/isa@6"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - rloc = 0x01003000; /* IO space; PCI device = 6 */ - } - if (!PHANDLE_VALID(isa)) - return; - - if (prom_getproplen(isa, "ranges") != 12) - return; - if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) - == PROM_ERROR) - return; - - if (isa_ranges[0] != 0x1 || - isa_ranges[1] != 0xf4000000 || - isa_ranges[2] != 0x00010000) - return; - - prom_printf("Fixing up bogus ISA range on Maple/Apache...\n"); - - isa_ranges[0] = 0x1; - isa_ranges[1] = 0x0; - isa_ranges[2] = rloc; - isa_ranges[3] = 0x0; - isa_ranges[4] = 0x0; - isa_ranges[5] = 0x00010000; - prom_setprop(isa, name, "ranges", - isa_ranges, sizeof(isa_ranges)); -} - -#define CPC925_MC_START 0xf8000000 -#define CPC925_MC_LENGTH 0x1000000 -/* The values for memory-controller don't have right number of cells */ -static void __init fixup_device_tree_maple_memory_controller(void) -{ - phandle mc; - u32 mc_reg[4]; - char *name = "/hostbridge@f8000000"; - u32 ac, sc; - - mc = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(mc)) - return; - - if (prom_getproplen(mc, "reg") != 8) - return; - - prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac)); - prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc)); - if ((ac != 2) || (sc != 2)) - return; - - if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR) - return; - - if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH) - return; - - prom_printf("Fixing up bogus hostbridge on Maple...\n"); - - mc_reg[0] = 0x0; - mc_reg[1] = CPC925_MC_START; - mc_reg[2] = 0x0; - mc_reg[3] = CPC925_MC_LENGTH; - prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg)); -} -#else -#define fixup_device_tree_maple() -#define fixup_device_tree_maple_memory_controller() -#endif - #ifdef CONFIG_PPC_CHRP /* * Pegasos and BriQ lacks the "ranges" property in the isa node @@ -2932,7 +2848,7 @@ static void __init fixup_device_tree_chrp(void) #endif #if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) -static void __init fixup_device_tree_pmac(void) +static void __init fixup_device_tree_pmac64(void) { phandle u3, i2c, mpic; u32 u3_rev; @@ -2972,7 +2888,31 @@ static void __init fixup_device_tree_pmac(void) &parent, sizeof(parent)); } #else -#define fixup_device_tree_pmac() +#define fixup_device_tree_pmac64() +#endif + +#ifdef CONFIG_PPC_PMAC +static void __init fixup_device_tree_pmac(void) +{ + __be32 val = 1; + char type[8]; + phandle node; + + // Some pmacs are missing #size-cells on escc nodes + for (node = 0; prom_next_node(&node); ) { + type[0] = '\0'; + prom_getprop(node, "device_type", type, sizeof(type)); + if (prom_strcmp(type, "escc")) + continue; + + if (prom_getproplen(node, "#size-cells") != PROM_ERROR) + continue; + + prom_setprop(node, NULL, "#size-cells", &val, sizeof(val)); + } +} +#else +static inline void fixup_device_tree_pmac(void) { } #endif #ifdef CONFIG_PPC_EFIKA @@ -3193,10 +3133,9 @@ static inline void fixup_device_tree_pasemi(void) { } static void __init fixup_device_tree(void) { - fixup_device_tree_maple(); - fixup_device_tree_maple_memory_controller(); fixup_device_tree_chrp(); fixup_device_tree_pmac(); + fixup_device_tree_pmac64(); fixup_device_tree_efika(); fixup_device_tree_pasemi(); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index f7e86e09c49f..d31c9799cab2 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1390,21 +1390,14 @@ bool __ref rtas_busy_delay(int status) */ ms = clamp(ms, 1U, 1000U); /* - * The delay hint is an order-of-magnitude suggestion, not - * a minimum. It is fine, possibly even advantageous, for - * us to pause for less time than hinted. For small values, - * use usleep_range() to ensure we don't sleep much longer - * than actually needed. - * - * See Documentation/timers/timers-howto.rst for - * explanation of the threshold used here. In effect we use - * usleep_range() for 9900 and 9901, msleep() for - * 9902-9905. + * The delay hint is an order-of-magnitude suggestion, not a + * minimum. It is fine, possibly even advantageous, for us to + * pause for less time than hinted. To make sure pause time will + * not be way longer than requested independent of HZ + * configuration, use fsleep(). See fsleep() for details of + * used sleeping functions. */ - if (ms <= 20) - usleep_range(ms * 100, ms * 1000); - else - msleep(ms); + fsleep(ms * 1000); break; case RTAS_BUSY: ret = true; diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 9e0efb657f39..3a28795b4ed8 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -5,6 +5,7 @@ */ #include <linux/types.h> #include <linux/of.h> +#include <linux/string_choices.h> #include <asm/secure_boot.h> static struct device_node *get_ppc_fw_sb_node(void) @@ -38,7 +39,7 @@ bool is_ppc_secureboot_enabled(void) of_node_put(node); out: - pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Secure boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } @@ -62,7 +63,7 @@ bool is_ppc_trustedboot_enabled(void) of_node_put(node); out: - pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Trusted boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 4856e1a5161c..fbb7ebd8aa08 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -14,7 +14,7 @@ #include <linux/debugfs.h> #include <asm/asm-prototypes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/security_features.h> #include <asm/sections.h> #include <asm/setup.h> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 943430077375..6fa179448c33 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -67,6 +67,7 @@ #include <asm/cpu_has_feature.h> #include <asm/kasan.h> #include <asm/mce.h> +#include <asm/systemcfg.h> #include "setup.h" @@ -560,7 +561,9 @@ void __init smp_setup_cpu_maps(void) out: of_node_put(dn); } - vdso_data->processorCount = num_present_cpus(); +#endif +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount = num_present_cpus(); #endif /* CONFIG_PPC64 */ /* Initialize CPU <=> thread mapping/ @@ -997,9 +1000,11 @@ void __init setup_arch(char **cmdline_p) initmem_init(); /* - * Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must - * be called after initmem_init(), so that pageblock_order is initialised. + * Reserve large chunks of memory for use by CMA for fadump, KVM and + * hugetlb. These must be called after initmem_init(), so that + * pageblock_order is initialised. */ + fadump_cma_init(); kvm_cma_reserve(); gigantic_hugetlb_cma_reserve(); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e515c1f7d8d3..75dbf3e0d9c4 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,7 +40,7 @@ #include <asm/time.h> #include <asm/serial.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> #include <asm/kdump.h> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 22f83fbbc762..e67f3048611f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -60,7 +60,7 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/opal.h> #include <asm/cputhreads.h> @@ -920,6 +920,7 @@ static int __init disable_hardlockup_detector(void) hardlockup_detector_disable(); #else if (firmware_has_feature(FW_FEATURE_LPAR)) { + check_kvm_guest(); if (is_kvm_guest()) hardlockup_detector_disable(); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4ab9b8cee77a..5ac7084eebc0 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,7 @@ #include <asm/ftrace.h> #include <asm/kup.h> #include <asm/fadump.h> +#include <asm/systemcfg.h> #include <trace/events/ipi.h> @@ -1186,8 +1187,8 @@ int generic_cpu_disable(void) return -EBUSY; set_cpu_online(cpu, false); -#ifdef CONFIG_PPC64 - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; #endif /* Update affinity of all IRQs previously aimed at this CPU */ irq_migrate_all_off_this_cpu(); @@ -1642,10 +1643,12 @@ void start_secondary(void *unused) secondary_cpu_time_init(); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG if (system_state == SYSTEM_RUNNING) - vdso_data->processorCount++; + systemcfg->processorCount++; +#endif +#ifdef CONFIG_PPC64 vdso_getcpu_init(); #endif set_numa_node(numa_cpu_lookup_table[cpu]); diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c index 1502b7e439ca..7cfd0710e757 100644 --- a/arch/powerpc/kernel/static_call.c +++ b/arch/powerpc/kernel/static_call.c @@ -2,7 +2,7 @@ #include <linux/memory.h> #include <linux/static_call.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index ebae8415dfbb..d8b4ab78bef0 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -553,3 +553,7 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index b842c83ab497..6b3dd6decdf9 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include <asm/hvcall.h> #include <asm/machdep.h> #include <asm/smp.h> +#include <asm/time.h> #include <asm/pmc.h> #include <asm/firmware.h> #include <asm/idle.h> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0ff9f038e800..0727332ad86f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -71,11 +71,11 @@ #include <asm/vdso_datapage.h> #include <asm/firmware.h> #include <asm/mce.h> +#include <asm/systemcfg.h> /* powerpc clocksource/clockevent code */ #include <linux/clockchips.h> -#include <linux/timekeeper_internal.h> static u64 timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { @@ -951,6 +951,9 @@ void __init time_init(void) } vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; +#endif /* initialise and enable the large decrementer (if we have one) */ set_decrementer_max(); diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 125f4ca588b9..d6c3885453bd 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -9,12 +9,15 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o -ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o +ifdef CONFIG_FUNCTION_TRACER +obj32-y += ftrace.o ftrace_entry.o +ifeq ($(CONFIG_MPROFILE_KERNEL)$(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY),) +obj64-y += ftrace_64_pg.o ftrace_64_pg_entry.o else -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_64_pg_entry.o +obj64-y += ftrace.o ftrace_entry.o +endif endif + obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d8d6b4fd9a14..5ccd791761e8 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -23,7 +23,7 @@ #include <linux/list.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> @@ -37,8 +37,12 @@ unsigned long ftrace_call_adjust(unsigned long addr) if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) return 0; - if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && + !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { addr += MCOUNT_INSN_SIZE; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + addr += MCOUNT_INSN_SIZE; + } return addr; } @@ -82,7 +86,7 @@ static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_ { int ret = ftrace_validate_inst(ip, old); - if (!ret) + if (!ret && !ppc_inst_equal(old, new)) ret = patch_instruction((u32 *)ip, new); return ret; @@ -106,28 +110,68 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } +#ifdef CONFIG_MODULES +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) +{ + struct module *mod = NULL; + + preempt_disable(); + mod = __module_text_address(ip); + preempt_enable(); + + if (!mod) + pr_err("No module loaded at addr=%lx\n", ip); + + return (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); +} +#else +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) +{ + return 0; +} +#endif + +static unsigned long ftrace_get_ool_stub(struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + return rec->arch.ool_stub; +#else + BUILD_BUG(); +#endif +} + static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) { - unsigned long ip = rec->ip; + unsigned long ip; unsigned long stub; - if (is_offset_in_branch_range(addr - ip)) { + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + else + ip = rec->ip; + + if (!is_offset_in_branch_range(addr - ip) && addr != FTRACE_ADDR && + addr != FTRACE_REGS_ADDR) { + /* This can only happen with ftrace direct */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + pr_err("0x%lx (0x%lx): Unexpected target address 0x%lx\n", + ip, rec->ip, addr); + return -EINVAL; + } + addr = FTRACE_ADDR; + } + + if (is_offset_in_branch_range(addr - ip)) /* Within range */ stub = addr; -#ifdef CONFIG_MODULES - } else if (rec->arch.mod) { - /* Module code would be going to one of the module stubs */ - stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp : - rec->arch.mod->arch.tramp_regs); -#endif - } else if (core_kernel_text(ip)) { + else if (core_kernel_text(ip)) /* We would be branching to one of our ftrace stubs */ stub = find_ftrace_tramp(ip); - if (!stub) { - pr_err("0x%lx: No ftrace stubs reachable\n", ip); - return -EINVAL; - } - } else { + else + stub = ftrace_lookup_module_stub(ip, addr); + + if (!stub) { + pr_err("0x%lx (0x%lx): No ftrace stubs reachable\n", ip, rec->ip); return -EINVAL; } @@ -135,6 +179,145 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ return 0; } +static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + static int ool_stub_text_index, ool_stub_text_end_index, ool_stub_inittext_index; + int ret = 0, ool_stub_count, *ool_stub_index; + ppc_inst_t inst; + /* + * See ftrace_entry.S if changing the below instruction sequence, as we rely on + * decoding the last branch instruction here to recover the correct function ip. + */ + struct ftrace_ool_stub *ool_stub, ool_stub_template = { + .insn = { + PPC_RAW_MFLR(_R0), + PPC_RAW_NOP(), /* bl ftrace_caller */ + PPC_RAW_MTLR(_R0), + PPC_RAW_NOP() /* b rec->ip + 4 */ + } + }; + + WARN_ON(rec->arch.ool_stub); + + if (is_kernel_inittext(rec->ip)) { + ool_stub = ftrace_ool_stub_inittext; + ool_stub_index = &ool_stub_inittext_index; + ool_stub_count = ftrace_ool_stub_inittext_count; + } else if (is_kernel_text(rec->ip)) { + /* + * ftrace records are sorted, so we first use up the stub area within .text + * (ftrace_ool_stub_text) before using the area at the end of .text + * (ftrace_ool_stub_text_end), unless the stub is out of range of the record. + */ + if (ool_stub_text_index >= ftrace_ool_stub_text_count || + !is_offset_in_branch_range((long)rec->ip - + (long)&ftrace_ool_stub_text[ool_stub_text_index])) { + ool_stub = ftrace_ool_stub_text_end; + ool_stub_index = &ool_stub_text_end_index; + ool_stub_count = ftrace_ool_stub_text_end_count; + } else { + ool_stub = ftrace_ool_stub_text; + ool_stub_index = &ool_stub_text_index; + ool_stub_count = ftrace_ool_stub_text_count; + } +#ifdef CONFIG_MODULES + } else if (mod) { + ool_stub = mod->arch.ool_stubs; + ool_stub_index = &mod->arch.ool_stub_index; + ool_stub_count = mod->arch.ool_stub_count; +#endif + } else { + return -EINVAL; + } + + ool_stub += (*ool_stub_index)++; + + if (WARN_ON(*ool_stub_index > ool_stub_count)) + return -EINVAL; + + if (!is_offset_in_branch_range((long)rec->ip - (long)&ool_stub->insn[0]) || + !is_offset_in_branch_range((long)(rec->ip + MCOUNT_INSN_SIZE) - + (long)&ool_stub->insn[3])) { + pr_err("%s: ftrace ool stub out of range (%p -> %p).\n", + __func__, (void *)rec->ip, (void *)&ool_stub->insn[0]); + return -EINVAL; + } + + rec->arch.ool_stub = (unsigned long)&ool_stub->insn[0]; + + /* bl ftrace_caller */ + if (!mod) + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &inst); +#ifdef CONFIG_MODULES + else + /* + * We can't use ftrace_get_call_inst() since that uses + * __module_text_address(rec->ip) to look up the module. + * But, since the module is not fully formed at this stage, + * the lookup fails. We know the target though, so generate + * the branch inst directly. + */ + inst = ftrace_create_branch_inst(ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE, + mod->arch.tramp, 1); +#endif + ool_stub_template.insn[1] = ppc_inst_val(inst); + + /* b rec->ip + 4 */ + if (!ret && create_branch(&inst, &ool_stub->insn[3], rec->ip + MCOUNT_INSN_SIZE, 0)) + return -EINVAL; + ool_stub_template.insn[3] = ppc_inst_val(inst); + + if (!ret) + ret = patch_instructions((u32 *)ool_stub, (u32 *)&ool_stub_template, + sizeof(ool_stub_template), false); + + return ret; +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + BUILD_BUG(); +#endif +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *powerpc_rec_get_ops(struct dyn_ftrace *rec) +{ + const struct ftrace_ops *ops = NULL; + + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); + } + + if (!ops) + ops = &ftrace_list_ops; + + return ops; +} + +static int ftrace_rec_set_ops(struct dyn_ftrace *rec, const struct ftrace_ops *ops) +{ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return patch_ulong((void *)(ftrace_get_ool_stub(rec) - sizeof(unsigned long)), + (unsigned long)ops); + else + return patch_ulong((void *)(rec->ip - MCOUNT_INSN_SIZE - sizeof(unsigned long)), + (unsigned long)ops); +} + +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} + +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, powerpc_rec_get_ops(rec)); +} +#else +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -147,18 +330,33 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { ppc_inst_t old, new; - int ret; + unsigned long ip = rec->ip; + int ret = 0; /* This can only ever be called during module load */ - if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip))) + if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(ip))) return -EINVAL; old = ppc_inst(PPC_RAW_NOP()); - ret = ftrace_get_call_inst(rec, addr, &new); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &old); + } + + ret |= ftrace_get_call_inst(rec, addr, &new); + + if (!ret) + ret = ftrace_modify_code(ip, old, new); + + ret = ftrace_rec_update_ops(rec); if (ret) return ret; - return ftrace_modify_code(rec->ip, old, new); + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()), + ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip))); + + return ret; } int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) @@ -191,6 +389,13 @@ void ftrace_replace_code(int enable) new_addr = ftrace_get_addr_new(rec); update = ftrace_update_record(rec, enable); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && update != FTRACE_UPDATE_IGNORE) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &nop_inst); + if (ret) + goto out; + } + switch (update) { case FTRACE_UPDATE_IGNORE: default: @@ -198,16 +403,19 @@ void ftrace_replace_code(int enable) case FTRACE_UPDATE_MODIFY_CALL: ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst); ret |= ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); old = call_inst; new = new_call_inst; break; case FTRACE_UPDATE_MAKE_NOP: ret = ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_set_nop_ops(rec); old = call_inst; new = nop_inst; break; case FTRACE_UPDATE_MAKE_CALL: ret = ftrace_get_call_inst(rec, new_addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); old = nop_inst; new = call_inst; break; @@ -215,6 +423,24 @@ void ftrace_replace_code(int enable) if (!ret) ret = ftrace_modify_code(ip, old, new); + + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && + (update == FTRACE_UPDATE_MAKE_NOP || update == FTRACE_UPDATE_MAKE_CALL)) { + /* Update the actual ftrace location */ + call_inst = ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - + (long)rec->ip)); + nop_inst = ppc_inst(PPC_RAW_NOP()); + ip = rec->ip; + + if (update == FTRACE_UPDATE_MAKE_NOP) + ret = ftrace_modify_code(ip, call_inst, nop_inst); + else + ret = ftrace_modify_code(ip, nop_inst, call_inst); + + if (ret) + goto out; + } + if (ret) goto out; } @@ -234,20 +460,27 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) /* Verify instructions surrounding the ftrace location */ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { /* Expect nops */ - ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); + if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); if (!ret) ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP())); } else if (IS_ENABLED(CONFIG_PPC32)) { /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); - if (!ret) - ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)), + ppc_inst(PPC_RAW_NOP())); } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ ret = ftrace_read_inst(ip - 4, &old); if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) { + /* Gcc v5.x emit the additional 'std' instruction, gcc v6.x don't */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); - ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)), + ppc_inst(PPC_RAW_NOP())); } } else { return -EINVAL; @@ -256,13 +489,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) if (ret) return ret; - if (!core_kernel_text(ip)) { - if (!mod) { - pr_err("0x%lx: No module provided for non-kernel address\n", ip); - return -EFAULT; - } - rec->arch.mod = mod; - } + /* Set up out-of-line stub */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return ftrace_init_ool_stub(mod, rec); /* Nop-out the ftrace location */ new = ppc_inst(PPC_RAW_NOP()); @@ -302,6 +531,13 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ppc_inst_t old, new; int ret; + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; + old = ppc_inst_read((u32 *)&ftrace_call); new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); @@ -421,7 +657,7 @@ int __init ftrace_dyn_arch_init(void) void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - unsigned long sp = fregs->regs.gpr[1]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1]; int bit; if (unlikely(ftrace_graph_is_dead())) @@ -439,6 +675,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, ftrace_test_recursion_unlock(bit); out: - fregs->regs.link = parent_ip; + arch_ftrace_regs(fregs)->regs.link = parent_ip; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 12fab1803bcf..98787376eb87 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -23,7 +23,7 @@ #include <linux/list.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> @@ -116,6 +116,20 @@ static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) } #ifdef CONFIG_MODULES +static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) +{ + struct module *mod; + + preempt_disable(); + mod = __module_text_address(rec->ip); + preempt_enable(); + + if (!mod) + pr_err("No module loaded at addr=%lx\n", rec->ip); + + return mod; +} + static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) @@ -124,6 +138,12 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; ppc_inst_t op, pop; + if (!mod) { + mod = ftrace_lookup_module(rec); + if (!mod) + return -EINVAL; + } + /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); @@ -366,27 +386,6 @@ int ftrace_make_nop(struct module *mod, return -EINVAL; } - /* - * Out of range jumps are called from modules. - * We should either already have a pointer to the module - * or it has been passed in. - */ - if (!rec->arch.mod) { - if (!mod) { - pr_err("No module loaded addr=%lx\n", addr); - return -EFAULT; - } - rec->arch.mod = mod; - } else if (mod) { - if (mod != rec->arch.mod) { - pr_err("Record mod %p not equal to passed in mod %p\n", - rec->arch.mod, mod); - return -EINVAL; - } - /* nothing to do if mod == rec->arch.mod */ - } else - mod = rec->arch.mod; - return __ftrace_make_nop(mod, rec, addr); } @@ -411,7 +410,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ppc_inst_t op[2]; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; /* read where this goes */ if (copy_inst_from_kernel_nofault(op, ip)) @@ -533,16 +535,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EINVAL; } - /* - * Out of range jumps are called from modules. - * Being that we are converting from nop, it had better - * already have a module defined. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - return __ftrace_make_call(rec, addr); } @@ -555,7 +547,10 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, ppc_inst_t op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; /* If we never set up ftrace trampolines, then bail */ if (!mod->arch.tramp || !mod->arch.tramp_regs) { @@ -668,14 +663,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return -EINVAL; } - /* - * Out of range jumps are called from modules. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - return __ftrace_modify_call(rec, old_addr, addr); } #endif @@ -829,7 +816,7 @@ out: void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); + arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, arch_ftrace_regs(fregs)->regs.gpr[1]); } #else unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 76dbe9fd2c0f..2c1b24100eca 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -39,13 +39,37 @@ /* Create our stack frame + pt_regs */ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) + .if \allregs == 1 + SAVE_GPRS(11, 12, r1) + .endif + + /* Get the _mcount() call site out of LR */ + mflr r11 + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Load the ftrace_op */ + PPC_LL r12, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + + /* Load direct_call from the ftrace_op */ + PPC_LL r12, FTRACE_OPS_DIRECT_CALL(r12) + PPC_LCMPI r12, 0 + .if \allregs == 1 + bne .Lftrace_direct_call_regs + .else + bne .Lftrace_direct_call + .endif +#endif + + /* Save the previous LR in pt_regs->link */ + PPC_STL r0, _LINK(r1) + /* Also save it in A's stack frame */ + PPC_STL r0, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE+LRSAVE(r1) + /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) SAVE_GPRS(3, 10, r1) #ifdef CONFIG_PPC64 - /* Save the original return address in A's stack frame */ - std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -54,9 +78,9 @@ .if \allregs == 1 SAVE_GPR(2, r1) - SAVE_GPRS(11, 31, r1) + SAVE_GPRS(13, 31, r1) .else -#ifdef CONFIG_LIVEPATCH_64 +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) SAVE_GPR(14, r1) #endif .endif @@ -67,80 +91,143 @@ .if \allregs == 1 /* Load special regs for save below */ + mfcr r7 mfmsr r8 mfctr r9 mfxer r10 - mfcr r11 .else /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ li r8, 0 .endif - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip */ - PPC_STL r7, _NIP(r1) - /* Also save it in B's stackframe header for proper unwind */ - PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) - /* Save the read LR in pt_regs->link */ - PPC_STL r0, _LINK(r1) - #ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, STK_GOT(r1) LOAD_PACA_TOC() /* get kernel TOC in r2 */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* r11 points to the instruction following the call to ftrace */ + PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + PPC_LL r12, FTRACE_OPS_FUNC(r5) + mtctr r12 +#else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */ +#ifdef CONFIG_PPC64 LOAD_REG_ADDR(r3, function_trace_op) ld r5,0(r3) #else lis r3,function_trace_op@ha lwz r5,function_trace_op@l(r3) #endif - -#ifdef CONFIG_LIVEPATCH_64 - mr r14, r7 /* remember old NIP */ #endif - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - /* Save special regs */ PPC_STL r8, _MSR(r1) .if \allregs == 1 + PPC_STL r7, _CCR(r1) PPC_STL r9, _CTR(r1) PPC_STL r10, _XER(r1) - PPC_STL r11, _CCR(r1) .endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Clear orig_gpr3 to later detect ftrace_direct call */ + li r7, 0 + PPC_STL r7, ORIG_GPR3(r1) +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Save our real return address in nvr for return */ + .if \allregs == 0 + SAVE_GPR(15, r1) + .endif + mr r15, r11 + /* + * We want the ftrace location in the function, but our lr (in r11) + * points at the 'mtlr r0' instruction in the out of line stub. To + * recover the ftrace location, we read the branch instruction in the + * stub, and adjust our lr by the branch offset. + * + * See ftrace_init_ool_stub() for the profile sequence. + */ + lwz r8, MCOUNT_INSN_SIZE(r11) + slwi r8, r8, 6 + srawi r8, r8, 6 + add r3, r11, r8 + /* + * Override our nip to point past the branch in the original function. + * This allows reliable stack trace and the ftrace stack tracer to work as-is. + */ + addi r11, r3, MCOUNT_INSN_SIZE +#else + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r11, MCOUNT_INSN_SIZE +#endif + + /* Save NIP as pt_regs->nip */ + PPC_STL r11, _NIP(r1) + /* Also save it in B's stackframe header for proper unwind */ + PPC_STL r11, LRSAVE+SWITCH_FRAME_SIZE(r1) +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + mr r14, r11 /* remember old NIP */ +#endif + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + /* Load &pt_regs in r6 for call below */ addi r6, r1, STACK_INT_FRAME_REGS .endm .macro ftrace_regs_exit allregs - /* Load ctr with the possibly modified NIP */ - PPC_LL r3, _NIP(r1) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Check orig_gpr3 to detect ftrace_direct call */ + PPC_LL r3, ORIG_GPR3(r1) + PPC_LCMPI cr1, r3, 0 mtctr r3 +#endif + /* Restore possibly modified LR */ + PPC_LL r0, _LINK(r1) + +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Load ctr with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + beq cr1,2f + mtlr r3 + b 3f +#endif +2: mtctr r3 + mtlr r0 +3: + +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + /* Load LR with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) + cmpd r14, r3 /* has NIP been altered? */ + bne- 1f + + mr r3, r15 + .if \allregs == 0 + REST_GPR(15, r1) + .endif +1: mtlr r3 +#endif + /* Restore gprs */ .if \allregs == 1 REST_GPRS(2, 31, r1) .else REST_GPRS(3, 10, r1) -#ifdef CONFIG_LIVEPATCH_64 +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) REST_GPR(14, r1) #endif .endif - /* Restore possibly modified LR */ - PPC_LL r0, _LINK(r1) - mtlr r0 - #ifdef CONFIG_PPC64 /* Restore callee's TOC */ ld r2, STK_GOT(r1) @@ -153,23 +240,46 @@ /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - bctr /* jump after _mcount site */ + + /* jump after _mcount site */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnectr cr1 +#endif + /* + * Return with blr to keep the link stack balanced. The function profiling sequence + * uses 'mtlr r0' to restore LR. + */ + blr +#else + bctr +#endif .endm -_GLOBAL(ftrace_regs_caller) - ftrace_regs_entry 1 - /* ftrace_call(r3, r4, r5, r6) */ +.macro ftrace_regs_func allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + bctrl +#else + .if \allregs == 1 .globl ftrace_regs_call ftrace_regs_call: + .else +.globl ftrace_call +ftrace_call: + .endif + /* ftrace_call(r3, r4, r5, r6) */ bl ftrace_stub +#endif +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + ftrace_regs_func 1 ftrace_regs_exit 1 _GLOBAL(ftrace_caller) ftrace_regs_entry 0 - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub + ftrace_regs_func 0 ftrace_regs_exit 0 _GLOBAL(ftrace_stub) @@ -177,6 +287,11 @@ _GLOBAL(ftrace_stub) #ifdef CONFIG_PPC64 ftrace_no_trace: +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + blr +#else mflr r3 mtctr r3 REST_GPR(3, r1) @@ -184,6 +299,22 @@ ftrace_no_trace: mtlr r0 bctr #endif +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +.Lftrace_direct_call_regs: + mtctr r12 + REST_GPRS(11, 12, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +.Lftrace_direct_call: + mtctr r12 + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +SYM_FUNC_START(ftrace_stub_direct_tramp) + blr +SYM_FUNC_END(ftrace_stub_direct_tramp) +#endif #ifdef CONFIG_LIVEPATCH_64 /* @@ -194,11 +325,17 @@ ftrace_no_trace: * We get here when a function A, calls another function B, but B has * been live patched with a new function C. * - * On entry: - * - we have no stack frame and can not allocate one + * On entry, we have no stack frame and can not allocate one. + * + * With PPC_FTRACE_OUT_OF_LINE=n, on entry: * - LR points back to the original caller (in A) * - CTR holds the new NIP in C * - r0, r11 & r12 are free + * + * With PPC_FTRACE_OUT_OF_LINE=y, on entry: + * - r0 points back to the original caller (in A) + * - LR holds the new NIP in C + * - r11 & r12 are free */ livepatch_handler: ld r12, PACA_THREAD_INFO(r13) @@ -208,18 +345,23 @@ livepatch_handler: addi r11, r11, 24 std r11, TI_livepatch_sp(r12) - /* Save toc & real LR on livepatch stack */ - std r2, -24(r11) - mflr r12 - std r12, -16(r11) - /* Store stack end marker */ lis r12, STACK_END_MAGIC@h ori r12, r12, STACK_END_MAGIC@l std r12, -8(r11) - /* Put ctr in r12 for global entry and branch there */ + /* Save toc & real LR on livepatch stack */ + std r2, -24(r11) +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + mflr r12 + std r12, -16(r11) mfctr r12 +#else + std r0, -16(r11) + mflr r12 + /* Put ctr in r12 for global entry and branch there */ + mtctr r12 +#endif bctrl /* @@ -308,6 +450,14 @@ _GLOBAL(return_to_handler) blr #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE) + +SYM_START(ftrace_ool_stub_text, SYM_L_GLOBAL, .balign SZL) + .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_text) +#endif + .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 4b99208f5adc..0a72a537f879 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -39,9 +39,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE) /* RTAS console debug */ udbg_init_rtas_console(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE) - /* Maple real mode debug */ - udbg_init_maple_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 313802aff571..dfe8ed2192e8 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -205,29 +205,6 @@ void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) udbg_use_uart(); } -#ifdef CONFIG_PPC_MAPLE - -#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8) - -static u8 udbg_uart_in_maple(unsigned int reg) -{ - return real_readb(UDBG_UART_MAPLE_ADDR + reg); -} - -static void udbg_uart_out_maple(unsigned int reg, u8 val) -{ - real_writeb(val, UDBG_UART_MAPLE_ADDR + reg); -} - -void __init udbg_init_maple_realmode(void) -{ - udbg_uart_in = udbg_uart_in_maple; - udbg_uart_out = udbg_uart_out_maple; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_MAPLE */ - #ifdef CONFIG_PPC_PASEMI #define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ee4b9d676cff..43379365ce1b 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -16,7 +16,6 @@ #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> -#include <linux/memblock.h> #include <linux/syscalls.h> #include <linux/time_namespace.h> #include <vdso/datapage.h> @@ -48,12 +47,13 @@ long sys_ni_syscall(void); */ static union { struct vdso_arch_data data; - u8 page[PAGE_SIZE]; + u8 page[2 * PAGE_SIZE]; } vdso_data_store __page_aligned_data; struct vdso_arch_data *vdso_data = &vdso_data_store.data; enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, + VVAR_BASE_PAGE_OFFSET, + VVAR_TIME_PAGE_OFFSET, VVAR_TIMENS_PAGE_OFFSET, VVAR_NR_PAGES, }; @@ -119,7 +119,7 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = { #ifdef CONFIG_TIME_NS struct vdso_data *arch_get_vdso_data(void *vvar_page) { - return ((struct vdso_arch_data *)vvar_page)->data; + return vvar_page; } /* @@ -153,11 +153,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, unsigned long pfn; switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: + case VVAR_BASE_PAGE_OFFSET: + pfn = virt_to_pfn(vdso_data); + break; + case VVAR_TIME_PAGE_OFFSET: if (timens_page) pfn = page_to_pfn(timens_page); else - pfn = virt_to_pfn(vdso_data); + pfn = virt_to_pfn(vdso_data->data); break; #ifdef CONFIG_TIME_NS case VVAR_TIMENS_PAGE_OFFSET: @@ -170,7 +173,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, */ if (!timens_page) return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); + pfn = virt_to_pfn(vdso_data->data); break; #endif /* CONFIG_TIME_NS */ default: @@ -349,25 +352,6 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) static int __init vdso_init(void) { #ifdef CONFIG_PPC64 - /* - * Fill up the "systemcfg" stuff for backward compatibility - */ - strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); - vdso_data->version.major = SYSTEMCFG_MAJOR; - vdso_data->version.minor = SYSTEMCFG_MINOR; - vdso_data->processor = mfspr(SPRN_PVR); - /* - * Fake the old platform number for pSeries and add - * in LPAR bit if necessary - */ - vdso_data->platform = 0x100; - if (firmware_has_feature(FW_FEATURE_LPAR)) - vdso_data->platform |= 1; - vdso_data->physicalMemorySize = memblock_phys_mem_size(); - vdso_data->dcache_size = ppc64_caches.l1d.size; - vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; - vdso_data->icache_size = ppc64_caches.l1i.size; - vdso_data->icache_line_size = ppc64_caches.l1i.line_size; vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; vdso_data->icache_block_size = ppc64_caches.l1i.block_size; vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 31ca5a547004..0e3ed6fb199f 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -50,14 +50,18 @@ ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) # Filter flags that clang will warn are unused for linking -ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CFLAGS)) +ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) CC32FLAGS := -m32 CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc - # This flag is supported by clang for 64-bit but not 32-bit so it will cause - # an unused command line flag warning for this file. ifdef CONFIG_CC_IS_CLANG +# This flag is supported by clang for 64-bit but not 32-bit so it will cause +# an unused command line flag warning for this file. CC32FLAGSREMOVE += -fno-stack-clash-protection +# -mstack-protector-guard values from the 64-bit build are not valid for the +# 32-bit one. clang validates the values passed to these arguments during +# parsing, even when -fno-stack-protector is passed afterwards. +CC32FLAGSREMOVE += -mstack-protector-guard% endif LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 AS32FLAGS := -D__VDSO32__ diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index 3b2479bd2f9a..0085ae464dac 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -30,7 +30,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_realdatapage r10, r11 + get_datapage r10 mtlr r12 .cfi_restore lr #endif diff --git a/arch/powerpc/kernel/vdso/datapage.S b/arch/powerpc/kernel/vdso/datapage.S index 2b19b6201a33..db8e167f0166 100644 --- a/arch/powerpc/kernel/vdso/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -28,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_realdatapage r3, r11 + get_datapage r3 mtlr r12 #ifdef __powerpc64__ addi r3,r3,CFG_SYSCALL_MAP64 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_realdatapage r3, r11 + get_datapage r3 #ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) #endif diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S index f3bbf931931c..a80d9fb436f7 100644 --- a/arch/powerpc/kernel/vdso/getrandom.S +++ b/arch/powerpc/kernel/vdso/getrandom.S @@ -31,8 +31,6 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif - get_realdatapage r8, r11 - addi r8, r8, VDSO_RNG_DATA_OFFSET bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 5540d7021fa2..5333848322ca 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -32,11 +32,10 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif - get_datapage r5 .ifeq \call_time - addi r5, r5, VDSO_DATA_OFFSET + get_datapage r5 VDSO_DATA_OFFSET .else - addi r4, r5, VDSO_DATA_OFFSET + get_datapage r4 VDSO_DATA_OFFSET .endif bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index 7b41d5d256e8..1a1b0b6d681a 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -16,7 +16,7 @@ OUTPUT_ARCH(powerpc:common) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 3 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index 9481e4b892ed..e21b5506cad6 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -16,7 +16,7 @@ OUTPUT_ARCH(powerpc:common64) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 3 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso/vgetrandom.c b/arch/powerpc/kernel/vdso/vgetrandom.c index 5f855d45fb7b..cc79b960a541 100644 --- a/arch/powerpc/kernel/vdso/vgetrandom.c +++ b/arch/powerpc/kernel/vdso/vgetrandom.c @@ -8,7 +8,7 @@ #include <linux/types.h> ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, - size_t opaque_len, const struct vdso_rng_data *vd) + size_t opaque_len) { - return __cvdso_getrandom_data(vd, buffer, len, flags, opaque_state, opaque_len); + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); } diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7ab4e2fb28b1..b4c9decc7a75 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -265,14 +265,13 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT - + *(.tramp.ftrace.init); /* *.init.text might be RO so we must ensure this section ends on * a page boundary. */ . = ALIGN(PAGE_SIZE); _einittext = .; - *(.tramp.ftrace.init); } :text /* .exit.text is discarded at runtime, not link time, diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 9738adabeb1f..dc65c1391157 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -736,13 +736,18 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, if (dn) { u64 val; - of_property_read_u64(dn, "opal-base-address", &val); + ret = of_property_read_u64(dn, "opal-base-address", &val); + if (ret) + goto out; + ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val, sizeof(val), false); if (ret) goto out; - of_property_read_u64(dn, "opal-entry-address", &val); + ret = of_property_read_u64(dn, "opal-entry-address", &val); + if (ret) + goto out; ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val, sizeof(val), false); } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ff6c38373957..d79c5d1098c0 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -422,7 +422,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, - bool *writable) + bool *writable, struct page **page) { ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM; gfn_t gfn = gpa >> PAGE_SHIFT; @@ -437,13 +437,14 @@ kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, kvm_pfn_t pfn; pfn = (kvm_pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT; - get_page(pfn_to_page(pfn)); + *page = pfn_to_page(pfn); + get_page(*page); if (writable) *writable = true; return pfn; } - return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); + return kvm_faultin_pfn(vcpu, gfn, writing, writable, page); } EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn); diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 4b3a8d80cfa3..5b7212edbb13 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -130,6 +130,7 @@ extern char etext[]; int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool iswrite) { + struct page *page; kvm_pfn_t hpaddr; u64 vpn; u64 vsid; @@ -145,7 +146,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, bool writable; /* Get host physical address for gpa */ - hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); + hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable, &page); if (is_error_noslot_pfn(hpaddr)) { printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", orig_pte->raddr); @@ -232,7 +233,7 @@ next_pteg: pte = kvmppc_mmu_hpte_cache_next(vcpu); if (!pte) { - kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); + kvm_release_page_unused(page); r = -EAGAIN; goto out; } @@ -250,7 +251,7 @@ next_pteg: kvmppc_mmu_hpte_cache_map(vcpu, pte); - kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); + kvm_release_page_clean(page); out: return r; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index bc6a381b5346..be20aee6fd7d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -88,13 +88,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, struct hpte_cache *cpte; unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT; unsigned long pfn; + struct page *page; /* used to check for invalidations in progress */ mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); /* Get host physical address for gpa */ - pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable); + pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable, &page); if (is_error_noslot_pfn(pfn)) { printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n", orig_pte->raddr); @@ -121,13 +122,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M); - kvm_set_pfn_accessed(pfn); if (!orig_pte->may_write || !writable) rflags |= PP_RXRX; - else { + else mark_page_dirty(vcpu->kvm, gfn); - kvm_set_pfn_dirty(pfn); - } if (!orig_pte->may_execute) rflags |= HPTE_R_N; @@ -202,8 +200,10 @@ map_again: } out_unlock: + /* FIXME: Don't unconditionally pass unused=false. */ + kvm_release_faultin_page(kvm, page, false, + orig_pte->may_write && writable); spin_unlock(&kvm->mmu_lock); - kvm_release_pfn_clean(pfn); if (cpte) kvmppc_mmu_hpte_cache_free(cpte); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1b51b1c4713b..f305395cf26e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -603,27 +603,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - /* - * Do a fast check first, since __gfn_to_pfn_memslot doesn't - * do it with !atomic && !async, which is how we call it. - * We always ask for write permission since the common case - * is that the page is writable. - */ - if (get_user_page_fast_only(hva, FOLL_WRITE, &page)) { - write_ok = true; - } else { - /* Call KVM generic code to do the slow-path check */ - pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, - writing, &write_ok, NULL); - if (is_error_noslot_pfn(pfn)) - return -EFAULT; - page = NULL; - if (pfn_valid(pfn)) { - page = pfn_to_page(pfn); - if (PageReserved(page)) - page = NULL; - } - } + pfn = __kvm_faultin_pfn(memslot, gfn, writing ? FOLL_WRITE : 0, + &write_ok, &page); + if (is_error_noslot_pfn(pfn)) + return -EFAULT; /* * Read the PTE from the process' radix tree and use that diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 408d98f8a514..b3e6e73d6a08 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -821,7 +821,7 @@ bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, unsigned long gpa, struct kvm_memory_slot *memslot, - bool writing, bool kvm_ro, + bool writing, pte_t *inserted_pte, unsigned int *levelp) { struct kvm *kvm = vcpu->kvm; @@ -829,40 +829,21 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, unsigned long mmu_seq; unsigned long hva, gfn = gpa >> PAGE_SHIFT; bool upgrade_write = false; - bool *upgrade_p = &upgrade_write; pte_t pte, *ptep; unsigned int shift, level; int ret; bool large_enable; + kvm_pfn_t pfn; /* used to check for invalidations in progress */ mmu_seq = kvm->mmu_invalidate_seq; smp_rmb(); - /* - * Do a fast check first, since __gfn_to_pfn_memslot doesn't - * do it with !atomic && !async, which is how we call it. - * We always ask for write permission since the common case - * is that the page is writable. - */ hva = gfn_to_hva_memslot(memslot, gfn); - if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) { - upgrade_write = true; - } else { - unsigned long pfn; - - /* Call KVM generic code to do the slow-path check */ - pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, - writing, upgrade_p, NULL); - if (is_error_noslot_pfn(pfn)) - return -EFAULT; - page = NULL; - if (pfn_valid(pfn)) { - page = pfn_to_page(pfn); - if (PageReserved(page)) - page = NULL; - } - } + pfn = __kvm_faultin_pfn(memslot, gfn, writing ? FOLL_WRITE : 0, + &upgrade_write, &page); + if (is_error_noslot_pfn(pfn)) + return -EFAULT; /* * Read the PTE from the process' radix tree and use that @@ -950,7 +931,6 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot; long ret; bool writing = !!(dsisr & DSISR_ISSTORE); - bool kvm_ro = false; /* Check for unusual errors */ if (dsisr & DSISR_UNSUPP_MMU) { @@ -1003,7 +983,6 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, ea, DSISR_ISSTORE | DSISR_PROTFAULT); return RESUME_GUEST; } - kvm_ro = true; } /* Failed to set the reference/change bits */ @@ -1021,7 +1000,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, /* Try to insert a pte */ ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing, - kvm_ro, NULL, NULL); + NULL, NULL); if (ret == 0 || ret == -EAGAIN) ret = RESUME_GUEST; diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 34c0adb9fdbf..742aa58a7c7e 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -115,10 +115,9 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, struct iommu_table_group *table_group; long i; struct kvmppc_spapr_tce_iommu_table *stit; - struct fd f; + CLASS(fd, f)(tablefd); - f = fdget(tablefd); - if (!fd_file(f)) + if (fd_empty(f)) return -EBADF; rcu_read_lock(); @@ -130,16 +129,12 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, } rcu_read_unlock(); - if (!found) { - fdput(f); + if (!found) return -EINVAL; - } table_group = iommu_group_get_iommudata(grp); - if (WARN_ON(!table_group)) { - fdput(f); + if (WARN_ON(!table_group)) return -EFAULT; - } for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { struct iommu_table *tbltmp = table_group->tables[i]; @@ -160,10 +155,8 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, break; } } - if (!tbl) { - fdput(f); + if (!tbl) return -EINVAL; - } rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { @@ -174,7 +167,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, /* stit is being destroyed */ iommu_tce_table_put(tbl); rcu_read_unlock(); - fdput(f); return -ENOTTY; } /* @@ -182,7 +174,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, * its KVM reference counter and can return. */ rcu_read_unlock(); - fdput(f); return 0; } rcu_read_unlock(); @@ -190,7 +181,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { iommu_tce_table_put(tbl); - fdput(f); return -ENOMEM; } @@ -199,7 +189,6 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, list_add_rcu(&stit->next, &stt->iommu_tables); - fdput(f); return 0; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ad8dc4ccdaab..25429905ae90 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -400,7 +400,10 @@ static inline unsigned long map_pcr_to_cap(unsigned long pcr) cap = H_GUEST_CAP_POWER9; break; case PCR_ARCH_31: - cap = H_GUEST_CAP_POWER10; + if (cpu_has_feature(CPU_FTR_P11_PVR)) + cap = H_GUEST_CAP_POWER11; + else + cap = H_GUEST_CAP_POWER10; break; default: break; @@ -415,7 +418,7 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) struct kvmppc_vcore *vc = vcpu->arch.vcore; /* We can (emulate) our own architecture version and anything older */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) + if (cpu_has_feature(CPU_FTR_P11_PVR) || cpu_has_feature(CPU_FTR_ARCH_31)) host_pcr_bit = PCR_ARCH_31; else if (cpu_has_feature(CPU_FTR_ARCH_300)) host_pcr_bit = PCR_ARCH_300; @@ -2060,36 +2063,9 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) fallthrough; /* go to facility unavailable handler */ #endif - case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { - u64 cause = vcpu->arch.hfscr >> 56; - - /* - * Only pass HFU interrupts to the L1 if the facility is - * permitted but disabled by the L1's HFSCR, otherwise - * the interrupt does not make sense to the L1 so turn - * it into a HEAI. - */ - if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || - (vcpu->arch.nested_hfscr & (1UL << cause))) { - ppc_inst_t pinst; - vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; - - /* - * If the fetch failed, return to guest and - * try executing it again. - */ - r = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst); - vcpu->arch.emul_inst = ppc_inst_val(pinst); - if (r != EMULATE_DONE) - r = RESUME_GUEST; - else - r = RESUME_HOST; - } else { - r = RESUME_HOST; - } - + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + r = RESUME_HOST; break; - } case BOOK3S_INTERRUPT_HV_RM_HARD: vcpu->arch.trap = 0; @@ -4153,8 +4129,9 @@ void kvmhv_set_l2_counters_status(int cpu, bool status) else lppaca_of(cpu).l2_counters_enable = 0; } +EXPORT_SYMBOL(kvmhv_set_l2_counters_status); -int kmvhv_counters_tracepoint_regfunc(void) +int kvmhv_counters_tracepoint_regfunc(void) { int cpu; @@ -4164,7 +4141,7 @@ int kmvhv_counters_tracepoint_regfunc(void) return 0; } -void kmvhv_counters_tracepoint_unregfunc(void) +void kvmhv_counters_tracepoint_unregfunc(void) { int cpu; @@ -4190,7 +4167,73 @@ static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu) *l1_to_l2_cs_ptr = l1_to_l2_ns; *l2_to_l1_cs_ptr = l2_to_l1_ns; *l2_runtime_agg_ptr = l2_runtime_ns; + vcpu->arch.l1_to_l2_cs = l1_to_l2_ns; + vcpu->arch.l2_to_l1_cs = l2_to_l1_ns; + vcpu->arch.l2_runtime_agg = l2_runtime_ns; +} + +u64 kvmhv_get_l1_to_l2_cs_time(void) +{ + return tb_to_ns(be64_to_cpu(get_lppaca()->l1_to_l2_cs_tb)); +} +EXPORT_SYMBOL(kvmhv_get_l1_to_l2_cs_time); + +u64 kvmhv_get_l2_to_l1_cs_time(void) +{ + return tb_to_ns(be64_to_cpu(get_lppaca()->l2_to_l1_cs_tb)); +} +EXPORT_SYMBOL(kvmhv_get_l2_to_l1_cs_time); + +u64 kvmhv_get_l2_runtime_agg(void) +{ + return tb_to_ns(be64_to_cpu(get_lppaca()->l2_runtime_tb)); +} +EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg); + +u64 kvmhv_get_l1_to_l2_cs_time_vcpu(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_arch *arch; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (vcpu) { + arch = &vcpu->arch; + return arch->l1_to_l2_cs; + } else { + return 0; + } } +EXPORT_SYMBOL(kvmhv_get_l1_to_l2_cs_time_vcpu); + +u64 kvmhv_get_l2_to_l1_cs_time_vcpu(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_arch *arch; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (vcpu) { + arch = &vcpu->arch; + return arch->l2_to_l1_cs; + } else { + return 0; + } +} +EXPORT_SYMBOL(kvmhv_get_l2_to_l1_cs_time_vcpu); + +u64 kvmhv_get_l2_runtime_agg_vcpu(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_arch *arch; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (vcpu) { + arch = &vcpu->arch; + return arch->l2_runtime_agg; + } else { + return 0; + } +} +EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg_vcpu); #else int kvmhv_get_l2_counters_status(void) @@ -4310,6 +4353,15 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns hvregs.hdec_expiry = time_limit; /* + * hvregs has the doorbell status, so zero it here which + * enables us to receive doorbells when H_ENTER_NESTED is + * in progress for this vCPU + */ + + if (vcpu->arch.doorbell_request) + vcpu->arch.doorbell_request = 0; + + /* * When setting DEC, we must always deal with irq_work_raise * via NMI vs setting DEC. The problem occurs right as we * switch into guest mode if a NMI hits and sets pending work @@ -4912,7 +4964,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, lpcr &= ~LPCR_MER; } } else if (vcpu->arch.pending_exceptions || - vcpu->arch.doorbell_request || xive_interrupt_pending(vcpu)) { vcpu->arch.ret = RESUME_HOST; goto out; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 05f5220960c6..5f8c2321cfb5 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -32,7 +32,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) struct kvmppc_vcore *vc = vcpu->arch.vcore; hr->pcr = vc->pcr | PCR_MASK; - hr->dpdes = vc->dpdes; + hr->dpdes = vcpu->arch.doorbell_request; hr->hfscr = vcpu->arch.hfscr; hr->tb_offset = vc->tb_offset; hr->dawr0 = vcpu->arch.dawr0; @@ -105,7 +105,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, { struct kvmppc_vcore *vc = vcpu->arch.vcore; - hr->dpdes = vc->dpdes; + hr->dpdes = vcpu->arch.doorbell_request; hr->purr = vcpu->arch.purr; hr->spurr = vcpu->arch.spurr; hr->ic = vcpu->arch.ic; @@ -143,7 +143,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state * struct kvmppc_vcore *vc = vcpu->arch.vcore; vc->pcr = hr->pcr | PCR_MASK; - vc->dpdes = hr->dpdes; + vcpu->arch.doorbell_request = hr->dpdes; vcpu->arch.hfscr = hr->hfscr; vcpu->arch.dawr0 = hr->dawr0; vcpu->arch.dawrx0 = hr->dawrx0; @@ -170,7 +170,13 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, { struct kvmppc_vcore *vc = vcpu->arch.vcore; - vc->dpdes = hr->dpdes; + /* + * This L2 vCPU might have received a doorbell while H_ENTER_NESTED was being handled. + * Make sure we preserve the doorbell if it was either: + * a) Sent after H_ENTER_NESTED was called on this vCPU (arch.doorbell_request would be 1) + * b) Doorbell was not handled and L2 exited for some other reason (hr->dpdes would be 1) + */ + vcpu->arch.doorbell_request = vcpu->arch.doorbell_request | hr->dpdes; vcpu->arch.hfscr = hr->hfscr; vcpu->arch.purr = hr->purr; vcpu->arch.spurr = hr->spurr; @@ -445,6 +451,8 @@ long kvmhv_nested_init(void) if (rc == H_SUCCESS) { unsigned long capabilities = 0; + if (cpu_has_feature(CPU_FTR_P11_PVR)) + capabilities |= H_GUEST_CAP_POWER11; if (cpu_has_feature(CPU_FTR_ARCH_31)) capabilities |= H_GUEST_CAP_POWER10; if (cpu_has_feature(CPU_FTR_ARCH_300)) @@ -1527,7 +1535,6 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, unsigned long n_gpa, gpa, gfn, perm = 0UL; unsigned int shift, l1_shift, level; bool writing = !!(dsisr & DSISR_ISSTORE); - bool kvm_ro = false; long int ret; if (!gp->l1_gr_to_hr) { @@ -1607,7 +1614,6 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, ea, DSISR_ISSTORE | DSISR_PROTFAULT); return RESUME_GUEST; } - kvm_ro = true; } /* 2. Find the host pte for this L1 guest real address */ @@ -1629,7 +1635,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) { /* No suitable pte found -> try to insert a mapping */ ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, - writing, kvm_ro, &pte, &level); + writing, &pte, &level); if (ret == -EAGAIN) return RESUME_GUEST; else if (ret) diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index eeecea8f202b..e5c7ce1fb761 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -370,7 +370,9 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, * default to L1's PVR. */ if (!vcpu->arch.vcore->arch_compat) { - if (cpu_has_feature(CPU_FTR_ARCH_31)) + if (cpu_has_feature(CPU_FTR_P11_PVR)) + arch_compat = PVR_ARCH_31_P11; + else if (cpu_has_feature(CPU_FTR_ARCH_31)) arch_compat = PVR_ARCH_31; else if (cpu_has_feature(CPU_FTR_ARCH_300)) arch_compat = PVR_ARCH_300; diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 92f33115144b..3a6592a31a10 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -879,9 +879,8 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, { int ret = H_PARAMETER; - struct page *uvmem_page; + struct page *page, *uvmem_page; struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn; unsigned long gfn = gpa >> page_shift; int srcu_idx; unsigned long uvmem_pfn; @@ -901,8 +900,8 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, retry: mutex_unlock(&kvm->arch.uvmem_lock); - pfn = gfn_to_pfn(kvm, gfn); - if (is_error_noslot_pfn(pfn)) + page = gfn_to_page(kvm, gfn); + if (!page) goto out; mutex_lock(&kvm->arch.uvmem_lock); @@ -911,16 +910,16 @@ retry: pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; pvt->remove_gfn = false; /* it continues to be a valid GFN */ - kvm_release_pfn_clean(pfn); + kvm_release_page_unused(page); goto retry; } - if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, + if (!uv_page_in(kvm->arch.lpid, page_to_pfn(page) << page_shift, gpa, 0, page_shift)) { kvmppc_gfn_shared(gfn, kvm); ret = H_SUCCESS; } - kvm_release_pfn_clean(pfn); + kvm_release_page_clean(page); mutex_unlock(&kvm->arch.uvmem_lock); out: srcu_read_unlock(&kvm->srcu, srcu_idx); @@ -1083,21 +1082,21 @@ out: int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) { - unsigned long pfn; + struct page *page; int ret = U_SUCCESS; - pfn = gfn_to_pfn(kvm, gfn); - if (is_error_noslot_pfn(pfn)) + page = gfn_to_page(kvm, gfn); + if (!page) return -EFAULT; mutex_lock(&kvm->arch.uvmem_lock); if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL)) goto out; - ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT, - 0, PAGE_SHIFT); + ret = uv_page_in(kvm->arch.lpid, page_to_pfn(page) << PAGE_SHIFT, + gfn << PAGE_SHIFT, 0, PAGE_SHIFT); out: - kvm_release_pfn_clean(pfn); + kvm_release_page_clean(page); mutex_unlock(&kvm->arch.uvmem_lock); return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; } diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index ce79ac33e8d3..d904e13e069b 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -92,12 +92,6 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) spin_unlock(&vcpu3s->mmu_lock); } -static void free_pte_rcu(struct rcu_head *head) -{ - struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head); - kmem_cache_free(hpte_cache, pte); -} - static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); @@ -126,7 +120,7 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) spin_unlock(&vcpu3s->mmu_lock); - call_rcu(&pte->rcu_head, free_pte_rcu); + kfree_rcu(pte, rcu_head); } static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7b8ae509328f..83bcdc80ce51 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -639,29 +639,27 @@ static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) */ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { - struct page *hpage; + struct kvm_host_map map; u64 hpage_offset; u32 *page; - int i; + int i, r; - hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (is_error_page(hpage)) + r = kvm_vcpu_map(vcpu, pte->raddr >> PAGE_SHIFT, &map); + if (r) return; hpage_offset = pte->raddr & ~PAGE_MASK; hpage_offset &= ~0xFFFULL; hpage_offset /= 4; - get_page(hpage); - page = kmap_atomic(hpage); + page = map.hva; /* patch dcbz into reserved instruction, so we trap */ for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ) page[i] &= cpu_to_be32(0xfffffff7); - kunmap_atomic(page); - put_page(hpage); + kvm_vcpu_unmap(vcpu, &map); } static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 6e2ebbd8aaac..d9bf1bc3ff61 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -654,7 +654,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, } page = gfn_to_page(kvm, gfn); - if (is_error_page(page)) { + if (!page) { srcu_read_unlock(&kvm->srcu, srcu_idx); pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); return -EINVAL; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index c664fdec75b1..e5a145b578a4 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -242,7 +242,7 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); } -static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, +static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref, struct kvm_book3e_206_tlb_entry *gtlbe, kvm_pfn_t pfn, unsigned int wimg) { @@ -252,11 +252,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, /* Use guest supplied MAS2_G and MAS2_E */ ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; - /* Mark the page accessed */ - kvm_set_pfn_accessed(pfn); - - if (tlbe_is_writable(gtlbe)) - kvm_set_pfn_dirty(pfn); + return tlbe_is_writable(gtlbe); } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) @@ -326,6 +322,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, { struct kvm_memory_slot *slot; unsigned long pfn = 0; /* silence GCC warning */ + struct page *page = NULL; unsigned long hva; int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; @@ -337,6 +334,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned int wimg = 0; pgd_t *pgdir; unsigned long flags; + bool writable = false; /* used to check for invalidations in progress */ mmu_seq = kvm->mmu_invalidate_seq; @@ -446,7 +444,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (likely(!pfnmap)) { tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT); - pfn = gfn_to_pfn_memslot(slot, gfn); + pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, NULL, &page); if (is_error_noslot_pfn(pfn)) { if (printk_ratelimit()) pr_err("%s: real page not found for gfn %lx\n", @@ -490,7 +488,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, goto out; } } - kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); + writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, ref, gvaddr, stlbe); @@ -499,11 +497,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, kvmppc_mmu_flush_icache(pfn); out: + kvm_release_faultin_page(kvm, page, !!ret, writable); spin_unlock(&kvm->mmu_lock); - - /* Drop refcount on page, so that mmu notifiers can clear it */ - kvm_release_pfn_clean(pfn); - return ret; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f14329989e9a..ce1d91eed231 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -612,9 +612,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 8 | 4 | 2 | 1; } break; - case KVM_CAP_PPC_RMA: - r = 0; - break; case KVM_CAP_PPC_HWRNG: r = kvmppc_hwrng_present(); break; @@ -1933,12 +1930,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, #endif #ifdef CONFIG_KVM_MPIC case KVM_CAP_IRQ_MPIC: { - struct fd f; + CLASS(fd, f)(cap->args[0]); struct kvm_device *dev; r = -EBADF; - f = fdget(cap->args[0]); - if (!fd_file(f)) + if (fd_empty(f)) break; r = -EPERM; @@ -1946,18 +1942,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, if (dev) r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]); - fdput(f); break; } #endif #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: { - struct fd f; + CLASS(fd, f)(cap->args[0]); struct kvm_device *dev; r = -EBADF; - f = fdget(cap->args[0]); - if (!fd_file(f)) + if (fd_empty(f)) break; r = -EPERM; @@ -1968,34 +1962,27 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, else r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); } - - fdput(f); break; } #endif /* CONFIG_KVM_XICS */ #ifdef CONFIG_KVM_XIVE case KVM_CAP_PPC_IRQ_XIVE: { - struct fd f; + CLASS(fd, f)(cap->args[0]); struct kvm_device *dev; r = -EBADF; - f = fdget(cap->args[0]); - if (!fd_file(f)) + if (fd_empty(f)) break; r = -ENXIO; - if (!xive_enabled()) { - fdput(f); + if (!xive_enabled()) break; - } r = -EPERM; dev = kvm_device_from_filp(fd_file(f)); if (dev) r = kvmppc_xive_native_connect_vcpu(dev, vcpu, cap->args[1]); - - fdput(f); break; } #endif /* CONFIG_KVM_XIVE */ diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 77ebc724e6cd..35fccaa575cc 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -538,7 +538,7 @@ TRACE_EVENT_FN_COND(kvmppc_vcpu_stats, TP_printk("VCPU %d: l1_to_l2_cs_time=%llu ns l2_to_l1_cs_time=%llu ns l2_runtime=%llu ns", __entry->vcpu_id, __entry->l1_to_l2_cs, __entry->l2_to_l1_cs, __entry->l2_runtime), - kmvhv_counters_tracepoint_regfunc, kmvhv_counters_tracepoint_unregfunc + kvmhv_counters_tracepoint_regfunc, kvmhv_counters_tracepoint_unregfunc ); #endif #endif /* _TRACE_KVM_HV_H */ diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index acdab294b340..af97fbb3c257 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -17,7 +17,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/page.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index b7201ba50b2e..587c8cf1230f 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -16,7 +16,7 @@ #include <linux/sched/mm.h> #include <linux/stop_machine.h> #include <asm/cputable.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/interrupt.h> #include <asm/page.h> #include <asm/sections.h> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e65f3fb68d06..ac3ee19531d8 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -780,8 +780,8 @@ static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea, #endif /* __powerpc64 */ #ifdef CONFIG_VSX -void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem, bool rev) +static nokprobe_inline void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, + const void *mem, bool rev) { int size, read_size; int i, j; @@ -863,11 +863,9 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, break; } } -EXPORT_SYMBOL_GPL(emulate_vsx_load); -NOKPROBE_SYMBOL(emulate_vsx_load); -void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem, bool rev) +static nokprobe_inline void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, + void *mem, bool rev) { int size, write_size; int i, j; @@ -955,8 +953,6 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, break; } } -EXPORT_SYMBOL_GPL(emulate_vsx_store); -NOKPROBE_SYMBOL(emulate_vsx_store); static nokprobe_inline int do_vsx_load(struct instruction_op *op, unsigned long ea, struct pt_regs *regs, diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c index 8cd3b32f805b..1440d99630b3 100644 --- a/arch/powerpc/lib/test-code-patching.c +++ b/arch/powerpc/lib/test-code-patching.c @@ -6,7 +6,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) { diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 23c7805fb7b3..66b5b4fa1686 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -11,7 +11,7 @@ #include <asm/cpu_has_feature.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> #define MAX_SUBTESTS 16 diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 2db167f4233f..6978344edcb4 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -25,7 +25,7 @@ #include <asm/mmu.h> #include <asm/machdep.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sections.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e1eadd03f133..c8b4fa71d4a7 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -40,6 +40,7 @@ #include <linux/random.h> #include <linux/elf-randomize.h> #include <linux/of_fdt.h> +#include <linux/kfence.h> #include <asm/interrupt.h> #include <asm/processor.h> @@ -57,7 +58,7 @@ #include <asm/sections.h> #include <asm/copro.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/fadump.h> #include <asm/firmware.h> #include <asm/tm.h> @@ -66,6 +67,7 @@ #include <asm/pte-walk.h> #include <asm/asm-prototypes.h> #include <asm/ultravisor.h> +#include <asm/kfence.h> #include <mm/mmu_decl.h> @@ -123,8 +125,6 @@ EXPORT_SYMBOL_GPL(mmu_slb_size); #ifdef CONFIG_PPC_64K_PAGES int mmu_ci_restrictions; #endif -static u8 *linear_map_hash_slots; -static unsigned long linear_map_hash_count; struct mmu_hash_ops mmu_hash_ops __ro_after_init; EXPORT_SYMBOL(mmu_hash_ops); @@ -273,6 +273,270 @@ void hash__tlbiel_all(unsigned int action) WARN(1, "%s called on pre-POWER7 CPU\n", __func__); } +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +static void kernel_map_linear_page(unsigned long vaddr, unsigned long idx, + u8 *slots, raw_spinlock_t *lock) +{ + unsigned long hash; + unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); + long ret; + + hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); + + /* Don't create HPTE entries for bad address */ + if (!vsid) + return; + + if (slots[idx] & 0x80) + return; + + ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, + HPTE_V_BOLTED, + mmu_linear_psize, mmu_kernel_ssize); + + BUG_ON (ret < 0); + raw_spin_lock(lock); + BUG_ON(slots[idx] & 0x80); + slots[idx] = ret | 0x80; + raw_spin_unlock(lock); +} + +static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long idx, + u8 *slots, raw_spinlock_t *lock) +{ + unsigned long hash, hslot, slot; + unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); + + hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); + raw_spin_lock(lock); + if (!(slots[idx] & 0x80)) { + raw_spin_unlock(lock); + return; + } + hslot = slots[idx] & 0x7f; + slots[idx] = 0; + raw_spin_unlock(lock); + if (hslot & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hslot & _PTEIDX_GROUP_IX; + mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, + mmu_linear_psize, + mmu_kernel_ssize, 0); +} +#endif + +static inline bool hash_supports_debug_pagealloc(void) +{ + unsigned long max_hash_count = ppc64_rma_size / 4; + unsigned long linear_map_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + + if (!debug_pagealloc_enabled() || linear_map_count > max_hash_count) + return false; + return true; +} + +#ifdef CONFIG_DEBUG_PAGEALLOC +static u8 *linear_map_hash_slots; +static unsigned long linear_map_hash_count; +static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); +static void hash_debug_pagealloc_alloc_slots(void) +{ + if (!hash_supports_debug_pagealloc()) + return; + + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = memblock_alloc_try_nid( + linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); + if (!linear_map_hash_slots) + panic("%s: Failed to allocate %lu bytes max_addr=%pa\n", + __func__, linear_map_hash_count, &ppc64_rma_size); +} + +static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, + int slot) +{ + if (!debug_pagealloc_enabled() || !linear_map_hash_count) + return; + if ((paddr >> PAGE_SHIFT) < linear_map_hash_count) + linear_map_hash_slots[paddr >> PAGE_SHIFT] = slot | 0x80; +} + +static int hash_debug_pagealloc_map_pages(struct page *page, int numpages, + int enable) +{ + unsigned long flags, vaddr, lmi; + int i; + + if (!debug_pagealloc_enabled() || !linear_map_hash_count) + return 0; + + local_irq_save(flags); + for (i = 0; i < numpages; i++, page++) { + vaddr = (unsigned long)page_address(page); + lmi = __pa(vaddr) >> PAGE_SHIFT; + if (lmi >= linear_map_hash_count) + continue; + if (enable) + kernel_map_linear_page(vaddr, lmi, + linear_map_hash_slots, &linear_map_hash_lock); + else + kernel_unmap_linear_page(vaddr, lmi, + linear_map_hash_slots, &linear_map_hash_lock); + } + local_irq_restore(flags); + return 0; +} + +#else /* CONFIG_DEBUG_PAGEALLOC */ +static inline void hash_debug_pagealloc_alloc_slots(void) {} +static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) {} +static int __maybe_unused +hash_debug_pagealloc_map_pages(struct page *page, int numpages, int enable) +{ + return 0; +} +#endif /* CONFIG_DEBUG_PAGEALLOC */ + +#ifdef CONFIG_KFENCE +static u8 *linear_map_kf_hash_slots; +static unsigned long linear_map_kf_hash_count; +static DEFINE_RAW_SPINLOCK(linear_map_kf_hash_lock); + +static phys_addr_t kfence_pool; + +static inline void hash_kfence_alloc_pool(void) +{ + if (!kfence_early_init_enabled()) + goto err; + + /* allocate linear map for kfence within RMA region */ + linear_map_kf_hash_count = KFENCE_POOL_SIZE >> PAGE_SHIFT; + linear_map_kf_hash_slots = memblock_alloc_try_nid( + linear_map_kf_hash_count, 1, + MEMBLOCK_LOW_LIMIT, ppc64_rma_size, + NUMA_NO_NODE); + if (!linear_map_kf_hash_slots) { + pr_err("%s: memblock for linear map (%lu) failed\n", __func__, + linear_map_kf_hash_count); + goto err; + } + + /* allocate kfence pool early */ + kfence_pool = memblock_phys_alloc_range(KFENCE_POOL_SIZE, PAGE_SIZE, + MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ANYWHERE); + if (!kfence_pool) { + pr_err("%s: memblock for kfence pool (%lu) failed\n", __func__, + KFENCE_POOL_SIZE); + memblock_free(linear_map_kf_hash_slots, + linear_map_kf_hash_count); + linear_map_kf_hash_count = 0; + goto err; + } + memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); + + return; +err: + pr_info("Disabling kfence\n"); + disable_kfence(); +} + +static inline void hash_kfence_map_pool(void) +{ + unsigned long kfence_pool_start, kfence_pool_end; + unsigned long prot = pgprot_val(PAGE_KERNEL); + + if (!kfence_pool) + return; + + kfence_pool_start = (unsigned long) __va(kfence_pool); + kfence_pool_end = kfence_pool_start + KFENCE_POOL_SIZE; + __kfence_pool = (char *) kfence_pool_start; + BUG_ON(htab_bolt_mapping(kfence_pool_start, kfence_pool_end, + kfence_pool, prot, mmu_linear_psize, + mmu_kernel_ssize)); + memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); +} + +static inline void hash_kfence_add_slot(phys_addr_t paddr, int slot) +{ + unsigned long vaddr = (unsigned long) __va(paddr); + unsigned long lmi = (vaddr - (unsigned long)__kfence_pool) + >> PAGE_SHIFT; + + if (!kfence_pool) + return; + BUG_ON(!is_kfence_address((void *)vaddr)); + BUG_ON(lmi >= linear_map_kf_hash_count); + linear_map_kf_hash_slots[lmi] = slot | 0x80; +} + +static int hash_kfence_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long flags, vaddr, lmi; + int i; + + WARN_ON_ONCE(!linear_map_kf_hash_count); + local_irq_save(flags); + for (i = 0; i < numpages; i++, page++) { + vaddr = (unsigned long)page_address(page); + lmi = (vaddr - (unsigned long)__kfence_pool) >> PAGE_SHIFT; + + /* Ideally this should never happen */ + if (lmi >= linear_map_kf_hash_count) { + WARN_ON_ONCE(1); + continue; + } + + if (enable) + kernel_map_linear_page(vaddr, lmi, + linear_map_kf_hash_slots, + &linear_map_kf_hash_lock); + else + kernel_unmap_linear_page(vaddr, lmi, + linear_map_kf_hash_slots, + &linear_map_kf_hash_lock); + } + local_irq_restore(flags); + return 0; +} +#else +static inline void hash_kfence_alloc_pool(void) {} +static inline void hash_kfence_map_pool(void) {} +static inline void hash_kfence_add_slot(phys_addr_t paddr, int slot) {} +static int __maybe_unused +hash_kfence_map_pages(struct page *page, int numpages, int enable) +{ + return 0; +} +#endif + +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +int hash__kernel_map_pages(struct page *page, int numpages, int enable) +{ + void *vaddr = page_address(page); + + if (is_kfence_address(vaddr)) + return hash_kfence_map_pages(page, numpages, enable); + else + return hash_debug_pagealloc_map_pages(page, numpages, enable); +} + +static void hash_linear_map_add_slot(phys_addr_t paddr, int slot) +{ + if (is_kfence_address(__va(paddr))) + hash_kfence_add_slot(paddr, slot); + else + hash_debug_pagealloc_add_slot(paddr, slot); +} +#else +static void hash_linear_map_add_slot(phys_addr_t paddr, int slot) {} +#endif + /* * 'R' and 'C' update notes: * - Under pHyp or KVM, the updatepp path will not set C, thus it *will* @@ -431,9 +695,8 @@ repeat: break; cond_resched(); - if (debug_pagealloc_enabled_or_kfence() && - (paddr >> PAGE_SHIFT) < linear_map_hash_count) - linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80; + /* add slot info in debug_pagealloc / kfence linear map */ + hash_linear_map_add_slot(paddr, ret); } return ret < 0 ? ret : 0; } @@ -814,7 +1077,7 @@ static void __init htab_init_page_sizes(void) bool aligned = true; init_hpte_page_sizes(); - if (!debug_pagealloc_enabled_or_kfence()) { + if (!hash_supports_debug_pagealloc() && !kfence_early_init_enabled()) { /* * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default @@ -1134,16 +1397,8 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); - if (debug_pagealloc_enabled_or_kfence()) { - linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = memblock_alloc_try_nid( - linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, - ppc64_rma_size, NUMA_NO_NODE); - if (!linear_map_hash_slots) - panic("%s: Failed to allocate %lu bytes max_addr=%pa\n", - __func__, linear_map_hash_count, &ppc64_rma_size); - } - + hash_debug_pagealloc_alloc_slots(); + hash_kfence_alloc_pool(); /* create bolted the linear mapping in the hash table */ for_each_mem_range(i, &base, &end) { size = end - base; @@ -1160,6 +1415,7 @@ static void __init htab_initialize(void) BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } + hash_kfence_map_pool(); memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* @@ -2120,82 +2376,6 @@ void hpt_do_stress(unsigned long ea, unsigned long hpte_group) } } -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) -static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); - -static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) -{ - unsigned long hash; - unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); - unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY); - long ret; - - hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); - - /* Don't create HPTE entries for bad address */ - if (!vsid) - return; - - if (linear_map_hash_slots[lmi] & 0x80) - return; - - ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode, - HPTE_V_BOLTED, - mmu_linear_psize, mmu_kernel_ssize); - - BUG_ON (ret < 0); - raw_spin_lock(&linear_map_hash_lock); - BUG_ON(linear_map_hash_slots[lmi] & 0x80); - linear_map_hash_slots[lmi] = ret | 0x80; - raw_spin_unlock(&linear_map_hash_lock); -} - -static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) -{ - unsigned long hash, hidx, slot; - unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); - unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - - hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); - raw_spin_lock(&linear_map_hash_lock); - if (!(linear_map_hash_slots[lmi] & 0x80)) { - raw_spin_unlock(&linear_map_hash_lock); - return; - } - hidx = linear_map_hash_slots[lmi] & 0x7f; - linear_map_hash_slots[lmi] = 0; - raw_spin_unlock(&linear_map_hash_lock); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, - mmu_linear_psize, - mmu_kernel_ssize, 0); -} - -int hash__kernel_map_pages(struct page *page, int numpages, int enable) -{ - unsigned long flags, vaddr, lmi; - int i; - - local_irq_save(flags); - for (i = 0; i < numpages; i++, page++) { - vaddr = (unsigned long)page_address(page); - lmi = __pa(vaddr) >> PAGE_SHIFT; - if (lmi >= linear_map_hash_count) - continue; - if (enable) - kernel_map_linear_page(vaddr, lmi); - else - kernel_unmap_linear_page(vaddr, lmi); - } - local_irq_restore(flags); - return 0; -} -#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */ - void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 5a4a75369043..374542528080 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -37,6 +37,19 @@ EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; EXPORT_SYMBOL(__pmd_frag_size_shift); +#ifdef CONFIG_KFENCE +extern bool kfence_early_init; +static int __init parse_kfence_early_init(char *arg) +{ + int val; + + if (get_option(&arg, &val)) + kfence_early_init = !!val; + return 0; +} +early_param("kfence.sample_interval", parse_kfence_early_init); +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * This is called when relaxing access to a hugepage. It's also called in the page diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index b0d927009af8..311e2112d782 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -363,18 +363,6 @@ static int __meminit create_physical_mapping(unsigned long start, } #ifdef CONFIG_KFENCE -static bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; - -static int __init parse_kfence_early_init(char *arg) -{ - int val; - - if (get_option(&arg, &val)) - kfence_early_init = !!val; - return 0; -} -early_param("kfence.sample_interval", parse_kfence_early_init); - static inline phys_addr_t alloc_kfence_pool(void) { phys_addr_t kfence_pool; diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index f2708c8629a5..6b783552403c 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -24,7 +24,7 @@ #include <linux/pgtable.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include "internal.h" diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c index 87307d0fc3b8..bc9a39821d1c 100644 --- a/arch/powerpc/mm/book3s64/slice.c +++ b/arch/powerpc/mm/book3s64/slice.c @@ -633,6 +633,20 @@ return_addr: } EXPORT_SYMBOL_GPL(slice_get_unmapped_area); +#ifdef CONFIG_HUGETLB_PAGE +static int file_to_psize(struct file *file) +{ + struct hstate *hstate = hstate_file(file); + + return shift_to_mmu_psize(huge_page_shift(hstate)); +} +#else +static int file_to_psize(struct file *file) +{ + return 0; +} +#endif + unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, @@ -640,11 +654,17 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long flags, vm_flags_t vm_flags) { + unsigned int psize; + if (radix_enabled()) return generic_get_unmapped_area(filp, addr, len, pgoff, flags, vm_flags); - return slice_get_unmapped_area(addr, len, flags, - mm_ctx_user_psize(¤t->mm->context), 0); + if (filp && is_file_hugepages(filp)) + psize = file_to_psize(filp); + else + psize = mm_ctx_user_psize(¤t->mm->context); + + return slice_get_unmapped_area(addr, len, flags, psize, 0); } unsigned long arch_get_unmapped_area_topdown(struct file *filp, @@ -654,11 +674,17 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long flags, vm_flags_t vm_flags) { + unsigned int psize; + if (radix_enabled()) return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags, vm_flags); - return slice_get_unmapped_area(addr0, len, flags, - mm_ctx_user_psize(¤t->mm->context), 1); + if (filp && is_file_hugepages(filp)) + psize = file_to_psize(filp); + else + psize = mm_ctx_user_psize(¤t->mm->context); + + return slice_get_unmapped_area(addr0, len, flags, psize, 1); } unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) @@ -788,20 +814,4 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, vma->vm_start)); } - -static int file_to_psize(struct file *file) -{ - struct hstate *hstate = hstate_file(file); - return shift_to_mmu_psize(huge_page_shift(hstate)); -} - -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - if (radix_enabled()) - return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); - - return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1); -} #endif diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 81c77ddce2e3..c156fe0d53c3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -439,10 +439,16 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, /* * The kernel should never take an execute fault nor should it * take a page fault to a kernel address or a page fault to a user - * address outside of dedicated places + * address outside of dedicated places. + * + * Rather than kfence directly reporting false negatives, search whether + * the NIP belongs to the fixup table for cases where fault could come + * from functions like copy_from_kernel_nofault(). */ if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) { - if (kfence_handle_page_fault(address, is_write, regs)) + if (is_kfence_address((void *)address) && + !search_exception_tables(instruction_pointer(regs)) && + kfence_handle_page_fault(address, is_write, regs)) return 0; return SIGSEGV; diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 2978fcbe307e..745097554bea 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -33,6 +33,7 @@ bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); #ifdef CONFIG_KFENCE bool __ro_after_init kfence_disabled; +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL; #endif static int __init parse_nosmep(char *p) diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index aa9aa11927b2..03666d790a53 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -7,7 +7,7 @@ #include <linux/memblock.h> #include <linux/sched/task.h> #include <asm/pgalloc.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <mm/mmu_decl.h> static pgprot_t __init kasan_prot_ro(void) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1221c561b43a..c7708c8fad29 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -26,7 +26,7 @@ #include <asm/svm.h> #include <asm/mmzone.h> #include <asm/ftrace.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/setup.h> #include <asm/fixmap.h> diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index 1beae802bb1c..6d10c6d8be71 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -24,7 +24,7 @@ #include <asm/mmu.h> #include <asm/page.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/smp.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index ad2a7c26f2a0..062e8785c1bb 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -10,7 +10,7 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/dma.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index b653a7be4cb1..0a650742f3a0 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -37,7 +37,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cputhreads.h> #include <asm/hugetlb.h> #include <asm/paca.h> diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c index d26656b07b72..4f925adf2695 100644 --- a/arch/powerpc/mm/nohash/tlb_64e.c +++ b/arch/powerpc/mm/nohash/tlb_64e.c @@ -24,7 +24,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cputhreads.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 7316396e452d..61df5aed7989 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -398,7 +398,7 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) */ if (pmd_none(*pmd)) return; - pte = pte_offset_map_nolock(mm, pmd, addr, &ptl); + pte = pte_offset_map_ro_nolock(mm, pmd, addr, &ptl); BUG_ON(!pte); assert_spin_locked(ptl); pte_unmap(pte); diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index cdea5dccaefe..6beacaec63d3 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -12,6 +12,7 @@ #include <asm/types.h> #include <asm/ppc-opcode.h> +#include <linux/build_bug.h> #ifdef CONFIG_PPC64_ELF_ABI_V1 #define FUNCTION_DESCR_SIZE 24 @@ -21,6 +22,9 @@ #define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4) +#define SZL sizeof(unsigned long) +#define BPF_INSN_SAFETY 64 + #define PLANT_INSTR(d, idx, instr) \ do { if (d) { (d)[idx] = instr; } idx++; } while (0) #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) @@ -81,6 +85,18 @@ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ } } while (0) +#define PPC_LI_ADDR PPC_LI64 + +#ifndef CONFIG_PPC_KERNEL_PCREL +#define PPC64_LOAD_PACA() \ + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))) +#else +#define PPC64_LOAD_PACA() do {} while (0) +#endif +#else +#define PPC_LI64(d, i) BUILD_BUG() +#define PPC_LI_ADDR PPC_LI32 +#define PPC64_LOAD_PACA() BUILD_BUG() #endif /* @@ -165,6 +181,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code u32 *addrs, int pass, bool extra_pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); +void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 2a36cc2e7e9e..2991bb171a9b 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -18,15 +18,85 @@ #include <linux/bpf.h> #include <asm/kprobes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include "bpf_jit.h" +/* These offsets are from bpf prog end and stay the same across progs */ +static int bpf_jit_ool_stub, bpf_jit_long_branch_stub; + static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } +void dummy_tramp(void); + +asm ( +" .pushsection .text, \"ax\", @progbits ;" +" .global dummy_tramp ;" +" .type dummy_tramp, @function ;" +"dummy_tramp: ;" +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +" blr ;" +#else +/* LR is always in r11, so we don't need a 'mflr r11' here */ +" mtctr 11 ;" +" mtlr 0 ;" +" bctr ;" +#endif +" .size dummy_tramp, .-dummy_tramp ;" +" .popsection ;" +); + +void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx) +{ + int ool_stub_idx, long_branch_stub_idx; + + /* + * Out-of-line stub: + * mflr r0 + * [b|bl] tramp + * mtlr r0 // only with CONFIG_PPC_FTRACE_OUT_OF_LINE + * b bpf_func + 4 + */ + ool_stub_idx = ctx->idx; + EMIT(PPC_RAW_MFLR(_R0)); + EMIT(PPC_RAW_NOP()); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + EMIT(PPC_RAW_MTLR(_R0)); + WARN_ON_ONCE(!is_offset_in_branch_range(4 - (long)ctx->idx * 4)); + EMIT(PPC_RAW_BRANCH(4 - (long)ctx->idx * 4)); + + /* + * Long branch stub: + * .long <dummy_tramp_addr> + * mflr r11 + * bcl 20,31,$+4 + * mflr r12 + * ld r12, -8-SZL(r12) + * mtctr r12 + * mtlr r11 // needed to retain ftrace ABI + * bctr + */ + if (image) + *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp; + ctx->idx += SZL / 4; + long_branch_stub_idx = ctx->idx; + EMIT(PPC_RAW_MFLR(_R11)); + EMIT(PPC_RAW_BCL4()); + EMIT(PPC_RAW_MFLR(_R12)); + EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL)); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_MTLR(_R11)); + EMIT(PPC_RAW_BCTR()); + + if (!bpf_jit_ool_stub) { + bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4; + bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4; + } +} + int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) { if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { @@ -222,7 +292,7 @@ skip_init_ctx: fp->bpf_func = (void *)fimage; fp->jited = 1; - fp->jited_len = proglen + FUNCTION_DESCR_SIZE; + fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE; if (!fp->is_func || extra_pass) { if (bpf_jit_binary_pack_finalize(fhdr, hdr)) { @@ -369,3 +439,778 @@ bool bpf_jit_supports_far_kfunc_call(void) { return IS_ENABLED(CONFIG_PPC64); } + +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns); +} + +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +int arch_protect_bpf_trampoline(void *image, unsigned int size) +{ + return 0; +} + +static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx, + struct bpf_tramp_link *l, int regs_off, int retval_off, + int run_ctx_off, bool save_ret) +{ + struct bpf_prog *p = l->link.prog; + ppc_inst_t branch_insn; + u32 jmp_idx; + int ret = 0; + + /* Save cookie */ + if (IS_ENABLED(CONFIG_PPC64)) { + PPC_LI64(_R3, l->cookie); + EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx, + bpf_cookie))); + } else { + PPC_LI32(_R3, l->cookie >> 32); + PPC_LI32(_R4, l->cookie); + EMIT(PPC_RAW_STW(_R3, _R1, + run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie))); + EMIT(PPC_RAW_STW(_R4, _R1, + run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie) + 4)); + } + + /* __bpf_prog_enter(p, &bpf_tramp_run_ctx) */ + PPC_LI_ADDR(_R3, p); + EMIT(PPC_RAW_MR(_R25, _R3)); + EMIT(PPC_RAW_ADDI(_R4, _R1, run_ctx_off)); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)bpf_trampoline_enter(p)); + if (ret) + return ret; + + /* Remember prog start time returned by __bpf_prog_enter */ + EMIT(PPC_RAW_MR(_R26, _R3)); + + /* + * if (__bpf_prog_enter(p) == 0) + * goto skip_exec_of_prog; + * + * Emit a nop to be later patched with conditional branch, once offset is known + */ + EMIT(PPC_RAW_CMPLI(_R3, 0)); + jmp_idx = ctx->idx; + EMIT(PPC_RAW_NOP()); + + /* p->bpf_func(ctx) */ + EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off)); + if (!p->jited) + PPC_LI_ADDR(_R4, (unsigned long)p->insnsi); + if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func, + BRANCH_SET_LINK)) { + if (image) + image[ctx->idx] = ppc_inst_val(branch_insn); + ctx->idx++; + } else { + EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); + } + + if (save_ret) + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + + /* Fix up branch */ + if (image) { + if (create_cond_branch(&branch_insn, &image[jmp_idx], + (unsigned long)&image[ctx->idx], COND_EQ << 16)) + return -EINVAL; + image[jmp_idx] = ppc_inst_val(branch_insn); + } + + /* __bpf_prog_exit(p, start_time, &bpf_tramp_run_ctx) */ + EMIT(PPC_RAW_MR(_R3, _R25)); + EMIT(PPC_RAW_MR(_R4, _R26)); + EMIT(PPC_RAW_ADDI(_R5, _R1, run_ctx_off)); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)bpf_trampoline_exit(p)); + + return ret; +} + +static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx, + struct bpf_tramp_links *tl, int regs_off, int retval_off, + int run_ctx_off, u32 *branches) +{ + int i; + + /* + * The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + EMIT(PPC_RAW_LI(_R3, 0)); + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off, + run_ctx_off, true)) + return -EINVAL; + + /* + * mod_ret prog stored return value after prog ctx. Emit: + * if (*(u64 *)(ret_val) != 0) + * goto do_fexit; + */ + EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); + EMIT(PPC_RAW_CMPLI(_R3, 0)); + + /* + * Save the location of the branch and generate a nop, which is + * replaced with a conditional jump once do_fexit (i.e. the + * start of the fexit invocation) is finalized. + */ + branches[i] = ctx->idx; + EMIT(PPC_RAW_NOP()); + } + + return 0; +} + +static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int r4_off) +{ + if (IS_ENABLED(CONFIG_PPC64)) { + /* See bpf_jit_stack_tailcallcnt() */ + int tailcallcnt_offset = 6 * 8; + + EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); + EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset)); + } else { + /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ + EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); + } +} + +static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int r4_off) +{ + if (IS_ENABLED(CONFIG_PPC64)) { + /* See bpf_jit_stack_tailcallcnt() */ + int tailcallcnt_offset = 6 * 8; + + EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset)); + EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); + } else { + /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ + EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); + } +} + +static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset, + int nr_regs, int regs_off) +{ + int param_save_area_offset; + + param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ + + for (int i = 0; i < nr_regs; i++) { + if (i < 8) { + EMIT(PPC_RAW_STL(_R3 + i, _R1, regs_off + i * SZL)); + } else { + EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); + EMIT(PPC_RAW_STL(_R3, _R1, regs_off + i * SZL)); + } + } +} + +/* Used when restoring just the register parameters when returning back */ +static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context *ctx, + int nr_regs, int regs_off) +{ + for (int i = 0; i < nr_regs && i < 8; i++) + EMIT(PPC_RAW_LL(_R3 + i, _R1, regs_off + i * SZL)); +} + +/* Used when we call into the traced function. Replicate parameter save area */ +static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int nr_regs, int regs_off) +{ + int param_save_area_offset; + + param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ + + for (int i = 8; i < nr_regs; i++) { + EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); + EMIT(PPC_RAW_STL(_R3, _R1, STACK_FRAME_MIN_SIZE + i * SZL)); + } + bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); +} + +static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, + void *rw_image_end, void *ro_image, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) +{ + int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0; + int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct codegen_context codegen_ctx, *ctx; + u32 *image = (u32 *)rw_image; + ppc_inst_t branch_insn; + u32 *branches = NULL; + bool save_ret; + + if (IS_ENABLED(CONFIG_PPC32)) + return -EOPNOTSUPP; + + nr_regs = m->nr_args; + /* Extra registers for struct arguments */ + for (i = 0; i < m->nr_args; i++) + if (m->arg_size[i] > SZL) + nr_regs += round_up(m->arg_size[i], SZL) / SZL - 1; + + if (nr_regs > MAX_BPF_FUNC_ARGS) + return -EOPNOTSUPP; + + ctx = &codegen_ctx; + memset(ctx, 0, sizeof(*ctx)); + + /* + * Generated stack layout: + * + * func prev back chain [ back chain ] + * [ ] + * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc) + * [ ] -- + * LR save area [ r0 save (64-bit) ] | header + * [ r0 save (32-bit) ] | + * dummy frame for unwind [ back chain 1 ] -- + * [ padding ] align stack frame + * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc + * alt_lr_off [ real lr (ool stub)] optional - actual lr + * [ r26 ] + * nvr_off [ r25 ] nvr save area + * retval_off [ return value ] + * [ reg argN ] + * [ ... ] + * regs_off [ reg_arg1 ] prog ctx context + * nregs_off [ args count ] + * ip_off [ traced function ] + * [ ... ] + * run_ctx_off [ bpf_tramp_run_ctx ] + * [ reg argN ] + * [ ... ] + * param_save_area [ reg_arg1 ] min 8 doublewords, per ABI + * [ TOC save (64-bit) ] -- + * [ LR save (64-bit) ] | header + * [ LR save (32-bit) ] | + * bpf trampoline frame [ back chain 2 ] -- + * + */ + + /* Minimum stack frame header */ + bpf_frame_size = STACK_FRAME_MIN_SIZE; + + /* + * Room for parameter save area. + * + * As per the ABI, this is required if we call into the traced + * function (BPF_TRAMP_F_CALL_ORIG): + * - if the function takes more than 8 arguments for the rest to spill onto the stack + * - or, if the function has variadic arguments + * - or, if this functions's prototype was not available to the caller + * + * Reserve space for at least 8 registers for now. This can be optimized later. + */ + bpf_frame_size += (nr_regs > 8 ? nr_regs : 8) * SZL; + + /* Room for struct bpf_tramp_run_ctx */ + run_ctx_off = bpf_frame_size; + bpf_frame_size += round_up(sizeof(struct bpf_tramp_run_ctx), SZL); + + /* Room for IP address argument */ + ip_off = bpf_frame_size; + if (flags & BPF_TRAMP_F_IP_ARG) + bpf_frame_size += SZL; + + /* Room for args count */ + nregs_off = bpf_frame_size; + bpf_frame_size += SZL; + + /* Room for args */ + regs_off = bpf_frame_size; + bpf_frame_size += nr_regs * SZL; + + /* Room for return value of func_addr or fentry prog */ + retval_off = bpf_frame_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + bpf_frame_size += SZL; + + /* Room for nvr save area */ + nvr_off = bpf_frame_size; + bpf_frame_size += 2 * SZL; + + /* Optional save area for actual LR in case of ool ftrace */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + alt_lr_off = bpf_frame_size; + bpf_frame_size += SZL; + } + + if (IS_ENABLED(CONFIG_PPC32)) { + if (nr_regs < 2) { + r4_off = bpf_frame_size; + bpf_frame_size += SZL; + } else { + r4_off = regs_off + SZL; + } + } + + /* Padding to align stack frame, if any */ + bpf_frame_size = round_up(bpf_frame_size, SZL * 2); + + /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */ + bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64; + + /* Offset to the traced function's stack frame */ + func_frame_offset = bpf_dummy_frame_size + bpf_frame_size; + + /* Create dummy frame for unwind, store original return value */ + EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); + /* Protect red zone where tail call count goes */ + EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size)); + + /* Create our stack frame */ + EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); + + /* 64-bit: Save TOC and load kernel TOC */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + EMIT(PPC_RAW_STD(_R2, _R1, 24)); + PPC64_LOAD_PACA(); + } + + /* 32-bit: save tail call count in r4 */ + if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) + EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); + + bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off); + + /* Save our return address */ + EMIT(PPC_RAW_MFLR(_R3)); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); + else + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + + /* + * Save ip address of the traced function. + * We could recover this from LR, but we will need to address for OOL trampoline, + * and optional GEP area. + */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) { + EMIT(PPC_RAW_LWZ(_R4, _R3, 4)); + EMIT(PPC_RAW_SLWI(_R4, _R4, 6)); + EMIT(PPC_RAW_SRAWI(_R4, _R4, 6)); + EMIT(PPC_RAW_ADD(_R3, _R3, _R4)); + EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); + } + + if (flags & BPF_TRAMP_F_IP_ARG) + EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); + + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + /* Fake our LR for unwind */ + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + + /* Save function arg count -- see bpf_get_func_arg_cnt() */ + EMIT(PPC_RAW_LI(_R3, nr_regs)); + EMIT(PPC_RAW_STL(_R3, _R1, nregs_off)); + + /* Save nv regs */ + EMIT(PPC_RAW_STL(_R25, _R1, nvr_off)); + EMIT(PPC_RAW_STL(_R26, _R1, nvr_off + SZL)); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + PPC_LI_ADDR(_R3, (unsigned long)im); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)__bpf_tramp_enter); + if (ret) + return ret; + } + + for (i = 0; i < fentry->nr_links; i++) + if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off, + run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) + return -EINVAL; + + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL); + if (!branches) + return -ENOMEM; + + if (invoke_bpf_mod_ret(image, ro_image, ctx, fmod_ret, regs_off, retval_off, + run_ctx_off, branches)) { + ret = -EINVAL; + goto cleanup; + } + } + + /* Call the traced function */ + if (flags & BPF_TRAMP_F_CALL_ORIG) { + /* + * The address in LR save area points to the correct point in the original function + * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction + * sequence + */ + EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTCTR(_R3)); + + /* Replicate tail_call_cnt before calling the original BPF prog */ + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + + /* Restore args */ + bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); + + /* Restore TOC for 64-bit */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + EMIT(PPC_RAW_LD(_R2, _R1, 24)); + EMIT(PPC_RAW_BCTRL()); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + PPC64_LOAD_PACA(); + + /* Store return value for bpf prog to access */ + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + + /* Restore updated tail_call_cnt */ + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + + /* Reserve space to patch branch instruction to skip fexit progs */ + im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; + EMIT(PPC_RAW_NOP()); + } + + /* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */ + for (i = 0; i < fmod_ret->nr_links && image; i++) { + if (create_cond_branch(&branch_insn, &image[branches[i]], + (unsigned long)&image[ctx->idx], COND_NE << 16)) { + ret = -EINVAL; + goto cleanup; + } + + image[branches[i]] = ppc_inst_val(branch_insn); + } + + for (i = 0; i < fexit->nr_links; i++) + if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off, + run_ctx_off, false)) { + ret = -EINVAL; + goto cleanup; + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; + PPC_LI_ADDR(_R3, im); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)__bpf_tramp_exit); + if (ret) + goto cleanup; + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); + + /* Restore return value of func_addr or fentry prog */ + if (save_ret) + EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); + + /* Restore nv regs */ + EMIT(PPC_RAW_LL(_R26, _R1, nvr_off + SZL)); + EMIT(PPC_RAW_LL(_R25, _R1, nvr_off)); + + /* Epilogue */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + EMIT(PPC_RAW_LD(_R2, _R1, 24)); + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* Skip the traced function and return to parent */ + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_BLR()); + } else { + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_BLR()); + } else { + EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTCTR(_R0)); + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_BCTR()); + } + } + + /* Make sure the trampoline generation logic doesn't overflow */ + if (image && WARN_ON_ONCE(&image[ctx->idx] > (u32 *)rw_image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; + goto cleanup; + } + ret = ctx->idx * 4 + BPF_INSN_SAFETY * 4; + +cleanup: + kfree(branches); + return ret; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) +{ + struct bpf_tramp_image im; + void *image; + int ret; + + /* + * Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). + * This will NOT cause fragmentation in direct map, as we do not + * call set_memory_*() on this buffer. + * + * We cannot use kvmalloc here, because we need image to be in + * module memory range. + */ + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) + return -ENOMEM; + + ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, + m, flags, tlinks, func_addr); + bpf_jit_free_exec(image); + + return ret; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) +{ + u32 size = image_end - image; + void *rw_image, *tmp; + int ret; + + /* + * rw_image doesn't need to be in module memory range, so we can + * use kvmalloc. + */ + rw_image = kvmalloc(size, GFP_KERNEL); + if (!rw_image) + return -ENOMEM; + + ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, + flags, tlinks, func_addr); + if (ret < 0) + goto out; + + if (bpf_jit_enable > 1) + bpf_jit_dump(1, ret - BPF_INSN_SAFETY * 4, 1, rw_image); + + tmp = bpf_arch_text_copy(image, rw_image, size); + if (IS_ERR(tmp)) + ret = PTR_ERR(tmp); + +out: + kvfree(rw_image); + return ret; +} + +static int bpf_modify_inst(void *ip, ppc_inst_t old_inst, ppc_inst_t new_inst) +{ + ppc_inst_t org_inst; + + if (copy_inst_from_kernel_nofault(&org_inst, ip)) { + pr_err("0x%lx: fetching instruction failed\n", (unsigned long)ip); + return -EFAULT; + } + + if (!ppc_inst_equal(org_inst, old_inst)) { + pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", + (unsigned long)ip, ppc_inst_as_ulong(old_inst), ppc_inst_as_ulong(org_inst)); + return -EINVAL; + } + + if (ppc_inst_equal(old_inst, new_inst)) + return 0; + + return patch_instruction(ip, new_inst); +} + +static void do_isync(void *info __maybe_unused) +{ + isync(); +} + +/* + * A 3-step process for bpf prog entry: + * 1. At bpf prog entry, a single nop/b: + * bpf_func: + * [nop|b] ool_stub + * 2. Out-of-line stub: + * ool_stub: + * mflr r0 + * [b|bl] <bpf_prog>/<long_branch_stub> + * mtlr r0 // CONFIG_PPC_FTRACE_OUT_OF_LINE only + * b bpf_func + 4 + * 3. Long branch stub: + * long_branch_stub: + * .long <branch_addr>/<dummy_tramp> + * mflr r11 + * bcl 20,31,$+4 + * mflr r12 + * ld r12, -16(r12) + * mtctr r12 + * mtlr r11 // needed to retain ftrace ABI + * bctr + * + * dummy_tramp is used to reduce synchronization requirements. + * + * When attaching a bpf trampoline to a bpf prog, we do not need any + * synchronization here since we always have a valid branch target regardless + * of the order in which the above stores are seen. dummy_tramp ensures that + * the long_branch stub goes to a valid destination on other cpus, even when + * the branch to the long_branch stub is seen before the updated trampoline + * address. + * + * However, when detaching a bpf trampoline from a bpf prog, or if changing + * the bpf trampoline address, we need synchronization to ensure that other + * cpus can no longer branch into the older trampoline so that it can be + * safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus + * make forward progress, but we still need to ensure that other cpus + * execute isync (or some CSI) so that they don't go back into the + * trampoline again. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + unsigned long bpf_func, bpf_func_end, size, offset; + ppc_inst_t old_inst, new_inst; + int ret = 0, branch_flags; + char name[KSYM_NAME_LEN]; + + if (IS_ENABLED(CONFIG_PPC32)) + return -EOPNOTSUPP; + + bpf_func = (unsigned long)ip; + branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0; + + /* We currently only support poking bpf programs */ + if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) { + pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func); + return -EOPNOTSUPP; + } + + /* + * If we are not poking at bpf prog entry, then we are simply patching in/out + * an unconditional branch instruction at im->ip_after_call + */ + if (offset) { + if (poke_type != BPF_MOD_JUMP) { + pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__, + bpf_func); + return -EOPNOTSUPP; + } + old_inst = ppc_inst(PPC_RAW_NOP()); + if (old_addr) + if (create_branch(&old_inst, ip, (unsigned long)old_addr, 0)) + return -ERANGE; + new_inst = ppc_inst(PPC_RAW_NOP()); + if (new_addr) + if (create_branch(&new_inst, ip, (unsigned long)new_addr, 0)) + return -ERANGE; + mutex_lock(&text_mutex); + ret = bpf_modify_inst(ip, old_inst, new_inst); + mutex_unlock(&text_mutex); + + /* Make sure all cpus see the new instruction */ + smp_call_function(do_isync, NULL, 1); + return ret; + } + + bpf_func_end = bpf_func + size; + + /* Address of the jmp/call instruction in the out-of-line stub */ + ip = (void *)(bpf_func_end - bpf_jit_ool_stub + 4); + + if (!is_offset_in_branch_range((long)ip - 4 - bpf_func)) { + pr_err("%s (0x%lx): bpf prog too large, ool stub out of branch range\n", __func__, + bpf_func); + return -ERANGE; + } + + old_inst = ppc_inst(PPC_RAW_NOP()); + if (old_addr) { + if (is_offset_in_branch_range(ip - old_addr)) + create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags); + else + create_branch(&old_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, + branch_flags); + } + new_inst = ppc_inst(PPC_RAW_NOP()); + if (new_addr) { + if (is_offset_in_branch_range(ip - new_addr)) + create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags); + else + create_branch(&new_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, + branch_flags); + } + + mutex_lock(&text_mutex); + + /* + * 1. Update the address in the long branch stub: + * If new_addr is out of range, we will have to use the long branch stub, so patch new_addr + * here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here. + */ + if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) || + (old_addr && !is_offset_in_branch_range(old_addr - ip))) + ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL), + (new_addr && !is_offset_in_branch_range(new_addr - ip)) ? + (unsigned long)new_addr : (unsigned long)dummy_tramp); + if (ret) + goto out; + + /* 2. Update the branch/call in the out-of-line stub */ + ret = bpf_modify_inst(ip, old_inst, new_inst); + if (ret) + goto out; + + /* 3. Update instruction at bpf prog entry */ + ip = (void *)bpf_func; + if (!old_addr || !new_addr) { + if (!old_addr) { + old_inst = ppc_inst(PPC_RAW_NOP()); + create_branch(&new_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); + } else { + new_inst = ppc_inst(PPC_RAW_NOP()); + create_branch(&old_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); + } + ret = bpf_modify_inst(ip, old_inst, new_inst); + } + +out: + mutex_unlock(&text_mutex); + + /* + * Sync only if we are not attaching a trampoline to a bpf prog so the older + * trampoline can be freed safely. + */ + if (old_addr) + smp_call_function(do_isync, NULL, 1); + + return ret; +} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index a0c4f1bde83e..c4db278dae36 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -127,13 +127,16 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + /* Instruction for trampoline attach */ + EMIT(PPC_RAW_NOP()); + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ if (ctx->seen & SEEN_TAILCALL) EMIT(PPC_RAW_LI(_R4, 0)); else EMIT(PPC_RAW_NOP()); -#define BPF_TAILCALL_PROLOGUE_SIZE 4 +#define BPF_TAILCALL_PROLOGUE_SIZE 8 if (bpf_has_stack_frame(ctx)) EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); @@ -198,6 +201,8 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); EMIT(PPC_RAW_BLR()); + + bpf_jit_build_fentry_stubs(image, ctx); } /* Relative offset needs to be calculated based on final image location */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 2cbcdf93cc19..233703b06d7c 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -84,7 +84,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) } /* - * When not setting up our own stackframe, the redzone usage is: + * When not setting up our own stackframe, the redzone (288 bytes) usage is: * * [ prev sp ] <------------- * [ ... ] | @@ -92,7 +92,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ nv gpr save area ] 5*8 * [ tail_call_cnt ] 8 * [ local_tmp_var ] 16 - * [ unused red zone ] 208 bytes protected + * [ unused red zone ] 224 */ static int bpf_jit_stack_local(struct codegen_context *ctx) { @@ -126,6 +126,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + /* Instruction for trampoline attach */ + EMIT(PPC_RAW_NOP()); + #ifndef CONFIG_PPC_KERNEL_PCREL if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); @@ -200,16 +203,26 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); EMIT(PPC_RAW_BLR()); + + bpf_jit_build_fentry_stubs(image, ctx); } -static int -bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) { unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; long reladdr; - if (WARN_ON_ONCE(!kernel_text_address(func_addr))) - return -EINVAL; + /* bpf to bpf call, func is not known in the initial pass. Emit 5 nops as a placeholder */ + if (!func) { + for (int i = 0; i < 5; i++) + EMIT(PPC_RAW_NOP()); + /* elfv1 needs an additional instruction to load addr from descriptor */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); + return 0; + } #ifdef CONFIG_PPC_KERNEL_PCREL reladdr = func_addr - local_paca->kernelbase; @@ -266,7 +279,8 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, * We can clobber r2 since we get called through a * function pointer (so caller will save/restore r2). */ - EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8)); + if (is_module_text_address(func_addr)) + EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8)); } else { PPC_LI64(_R12, func); EMIT(PPC_RAW_MTCTR(_R12)); @@ -276,46 +290,14 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, * Load r2 with kernel TOC as kernel TOC is used if function address falls * within core kernel text. */ - EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); + if (is_module_text_address(func_addr)) + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); } #endif return 0; } -int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) -{ - unsigned int i, ctx_idx = ctx->idx; - - if (WARN_ON_ONCE(func && is_module_text_address(func))) - return -EINVAL; - - /* skip past descriptor if elf v1 */ - func += FUNCTION_DESCR_SIZE; - - /* Load function address into r12 */ - PPC_LI64(_R12, func); - - /* For bpf-to-bpf function calls, the callee's address is unknown - * until the last extra pass. As seen above, we use PPC_LI64() to - * load the callee's address, but this may optimize the number of - * instructions required based on the nature of the address. - * - * Since we don't want the number of instructions emitted to increase, - * we pad the optimized PPC_LI64() call with NOPs to guarantee that - * we always have a five-instruction sequence, which is the maximum - * that PPC_LI64() can emit. - */ - if (!image) - for (i = ctx->idx - ctx_idx; i < 5; i++) - EMIT(PPC_RAW_NOP()); - - EMIT(PPC_RAW_MTCTR(_R12)); - EMIT(PPC_RAW_BCTRL()); - - return 0; -} - static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* @@ -326,7 +308,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o */ int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); int b2p_index = bpf_to_ppc(BPF_REG_3); - int bpf_tailcall_prologue_size = 8; + int bpf_tailcall_prologue_size = 12; if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) bpf_tailcall_prologue_size += 4; /* skip past the toc load */ @@ -1102,11 +1084,7 @@ emit_clear: if (ret < 0) return ret; - if (func_addr_fixed) - ret = bpf_jit_emit_func_call_hlp(image, fimage, ctx, func_addr); - else - ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); - + ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); if (ret) return ret; diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index 308a2e40d7be..1d2972229e3a 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -14,7 +14,7 @@ #include <asm/machdep.h> #include <asm/firmware.h> #include <asm/ptrace.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> #define PERF_8xx_ID_CPU_CYCLES 1 diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 4f53d0b97539..ac2cf58d62db 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -16,6 +16,8 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o +obj-$(CONFIG_VPA_PMU) += vpa-pmu.o + obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 6b4434dd0ff3..26aa26482c9a 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -51,7 +51,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re lr = regs->link; sp = regs->gpr[1]; - perf_callchain_store(entry, perf_instruction_pointer(regs)); + perf_callchain_store(entry, perf_arch_instruction_pointer(regs)); if (!validate_sp(sp, current)) return; diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index ea8cfe3806dc..ddcc2d8aa64a 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -139,7 +139,7 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, long level = 0; unsigned int __user *fp, *uregs; - next_ip = perf_instruction_pointer(regs); + next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 488e8a21a11e..115d1c105e8a 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -74,7 +74,7 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct signal_frame_64 __user *sigframe; unsigned long __user *fp, *uregs; - next_ip = perf_instruction_pointer(regs); + next_ip = perf_arch_instruction_pointer(regs); lr = regs->link; sp = regs->gpr[1]; perf_callchain_store(entry, next_ip); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 42867469752d..2b79171ee185 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -16,7 +16,7 @@ #include <asm/machdep.h> #include <asm/firmware.h> #include <asm/ptrace.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/hw_irq.h> #include <asm/interrupt.h> @@ -2332,7 +2332,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * Called from generic code to get the misc flags (i.e. processor mode) * for an event_id. */ -unsigned long perf_misc_flags(struct pt_regs *regs) +unsigned long perf_arch_misc_flags(struct pt_regs *regs) { u32 flags = perf_get_misc_flags(regs); @@ -2346,7 +2346,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) * Called from generic code to get the instruction pointer * for an event_id. */ -unsigned long perf_instruction_pointer(struct pt_regs *regs) +unsigned long perf_arch_instruction_pointer(struct pt_regs *regs) { unsigned long siar = mfspr(SPRN_SIAR); diff --git a/arch/powerpc/perf/vpa-pmu.c b/arch/powerpc/perf/vpa-pmu.c new file mode 100644 index 000000000000..6a5bfd2a13b5 --- /dev/null +++ b/arch/powerpc/perf/vpa-pmu.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance monitoring support for Virtual Processor Area(VPA) based counters + * + * Copyright (C) 2024 IBM Corporation + */ +#define pr_fmt(fmt) "vpa_pmu: " fmt + +#include <linux/module.h> +#include <linux/perf_event.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s_64.h> + +#define MODULE_VERS "1.0" +#define MODULE_NAME "pseries_vpa_pmu" + +#define EVENT(_name, _code) enum{_name = _code} + +#define VPA_PMU_EVENT_VAR(_id) event_attr_##_id +#define VPA_PMU_EVENT_PTR(_id) (&event_attr_##_id.attr.attr) + +static ssize_t vpa_pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define VPA_PMU_EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR(_name, VPA_PMU_EVENT_VAR(_id), _id, \ + vpa_pmu_events_sysfs_show) + +EVENT(L1_TO_L2_CS_LAT, 0x1); +EVENT(L2_TO_L1_CS_LAT, 0x2); +EVENT(L2_RUNTIME_AGG, 0x3); + +VPA_PMU_EVENT_ATTR(l1_to_l2_lat, L1_TO_L2_CS_LAT); +VPA_PMU_EVENT_ATTR(l2_to_l1_lat, L2_TO_L1_CS_LAT); +VPA_PMU_EVENT_ATTR(l2_runtime_agg, L2_RUNTIME_AGG); + +static struct attribute *vpa_pmu_events_attr[] = { + VPA_PMU_EVENT_PTR(L1_TO_L2_CS_LAT), + VPA_PMU_EVENT_PTR(L2_TO_L1_CS_LAT), + VPA_PMU_EVENT_PTR(L2_RUNTIME_AGG), + NULL +}; + +static const struct attribute_group vpa_pmu_events_group = { + .name = "events", + .attrs = vpa_pmu_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-31"); +static struct attribute *vpa_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group vpa_pmu_format_group = { + .name = "format", + .attrs = vpa_pmu_format_attr, +}; + +static const struct attribute_group *vpa_pmu_attr_groups[] = { + &vpa_pmu_events_group, + &vpa_pmu_format_group, + NULL +}; + +static int vpa_pmu_event_init(struct perf_event *event) +{ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* it does not support event sampling mode */ + if (is_sampling_event(event)) + return -EOPNOTSUPP; + + /* no branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + /* Invalid event code */ + if ((event->attr.config <= 0) || (event->attr.config > 3)) + return -EINVAL; + + return 0; +} + +static unsigned long get_counter_data(struct perf_event *event) +{ + unsigned int config = event->attr.config; + u64 data; + + switch (config) { + case L1_TO_L2_CS_LAT: + if (event->attach_state & PERF_ATTACH_TASK) + data = kvmhv_get_l1_to_l2_cs_time_vcpu(); + else + data = kvmhv_get_l1_to_l2_cs_time(); + break; + case L2_TO_L1_CS_LAT: + if (event->attach_state & PERF_ATTACH_TASK) + data = kvmhv_get_l2_to_l1_cs_time_vcpu(); + else + data = kvmhv_get_l2_to_l1_cs_time(); + break; + case L2_RUNTIME_AGG: + if (event->attach_state & PERF_ATTACH_TASK) + data = kvmhv_get_l2_runtime_agg_vcpu(); + else + data = kvmhv_get_l2_runtime_agg(); + break; + default: + data = 0; + break; + } + + return data; +} + +static int vpa_pmu_add(struct perf_event *event, int flags) +{ + u64 data; + + kvmhv_set_l2_counters_status(smp_processor_id(), true); + + data = get_counter_data(event); + local64_set(&event->hw.prev_count, data); + + return 0; +} + +static void vpa_pmu_read(struct perf_event *event) +{ + u64 prev_data, new_data, final_data; + + prev_data = local64_read(&event->hw.prev_count); + new_data = get_counter_data(event); + final_data = new_data - prev_data; + + local64_add(final_data, &event->count); +} + +static void vpa_pmu_del(struct perf_event *event, int flags) +{ + vpa_pmu_read(event); + + /* + * Disable vpa counter accumulation + */ + kvmhv_set_l2_counters_status(smp_processor_id(), false); +} + +static struct pmu vpa_pmu = { + .task_ctx_nr = perf_sw_context, + .name = "vpa_pmu", + .event_init = vpa_pmu_event_init, + .add = vpa_pmu_add, + .del = vpa_pmu_del, + .read = vpa_pmu_read, + .attr_groups = vpa_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, +}; + +static int __init pseries_vpa_pmu_init(void) +{ + /* + * List of current Linux on Power platforms and + * this driver is supported only in PowerVM LPAR + * (L1) platform. + * + * Enabled Linux on Power Platforms + * ---------------------------------------- + * [X] PowerVM LPAR (L1) + * [ ] KVM Guest On PowerVM KoP(L2) + * [ ] Baremetal(PowerNV) + * [ ] KVM Guest On PowerNV + */ + if (!firmware_has_feature(FW_FEATURE_LPAR) || is_kvm_guest()) + return -ENODEV; + + perf_pmu_register(&vpa_pmu, vpa_pmu.name, -1); + pr_info("Virtual Processor Area PMU registered.\n"); + + return 0; +} + +static void __exit pseries_vpa_pmu_cleanup(void) +{ + perf_pmu_unregister(&vpa_pmu); + pr_info("Virtual Processor Area PMU unregistered.\n"); +} + +module_init(pseries_vpa_pmu_init); +module_exit(pseries_vpa_pmu_cleanup); +MODULE_DESCRIPTION("Perf Driver for pSeries VPA pmu counter"); +MODULE_AUTHOR("Kajol Jain <[email protected]>"); +MODULE_AUTHOR("Madhavan Srinivasan <[email protected]>"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/44x/pci.c b/arch/powerpc/platforms/44x/pci.c index db6d33ca753f..364aeb86ab64 100644 --- a/arch/powerpc/platforms/44x/pci.c +++ b/arch/powerpc/platforms/44x/pci.c @@ -94,10 +94,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, struct resource *res) { u64 size; - const u32 *ranges; - int rlen; - int pna = of_n_addr_cells(hose->dn); - int np = pna + 5; + struct of_range_parser parser; + struct of_range range; /* Default */ res->start = 0; @@ -105,18 +103,15 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, res->end = size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; - /* Get dma-ranges property */ - ranges = of_get_property(hose->dn, "dma-ranges", &rlen); - if (ranges == NULL) + if (of_pci_dma_range_parser_init(&parser, hose->dn)) goto out; - /* Walk it */ - while ((rlen -= np * 4) >= 0) { - u32 pci_space = ranges[0]; - u64 pci_addr = of_read_number(ranges + 1, 2); - u64 cpu_addr = of_translate_dma_address(hose->dn, ranges + 3); - size = of_read_number(ranges + pna + 3, 2); - ranges += np; + for_each_of_range(&parser, &range) { + u32 pci_space = range.flags; + u64 pci_addr = range.bus_addr; + u64 cpu_addr = range.cpu_addr; + size = range.size; + if (cpu_addr == OF_BAD_ADDR || size == 0) continue; diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 37a67120f257..a7172f9ebaad 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -13,6 +13,7 @@ #include <generated/utsrelease.h> #include <linux/pci.h> #include <linux/of.h> +#include <linux/seq_file.h> #include <asm/dma.h> #include <asm/time.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 3dc65ce1f175..8f918916e631 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -128,7 +128,7 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev) bus->name = "ep8248e-mdio-bitbang"; bus->parent = &ofdev->dev; - snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); + snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res.start); ret = of_mdiobus_register(bus, ofdev->dev.of_node); if (ret) diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index c86da3f2b74b..99f0f0f41876 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -27,15 +27,15 @@ static void __init km82xx_pic_init(void) { - struct device_node *np = of_find_compatible_node(NULL, NULL, - "fsl,pq2-pic"); + struct device_node *np __free(device_node); + np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic"); + if (!np) { pr_err("PIC init: can not find cpm-pic node\n"); return; } cpm2_pic_init(np); - of_node_put(np); } struct cpm_pin { diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 9315a3b69d6d..604c1b4b6d45 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -40,27 +40,6 @@ config BSC9132_QDS and dual StarCore SC3850 DSP cores. Manufacturer : Freescale Semiconductor, Inc -config MPC8540_ADS - bool "Freescale MPC8540 ADS" - select DEFAULT_UIMAGE - help - This option enables support for the MPC 8540 ADS board - -config MPC8560_ADS - bool "Freescale MPC8560 ADS" - select DEFAULT_UIMAGE - select CPM2 - help - This option enables support for the MPC 8560 ADS board - -config MPC85xx_CDS - bool "Freescale MPC85xx CDS" - select DEFAULT_UIMAGE - select PPC_I8259 - select HAVE_RAPIDIO - help - This option enables support for the MPC85xx CDS board - config MPC85xx_MDS bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS" select DEFAULT_UIMAGE diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index e52b848b64b7..32fa5fb557c0 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -23,7 +23,7 @@ #include <asm/mpic.h> #include <asm/cacheflush.h> #include <asm/dbell.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cputhreads.h> #include <asm/fsl_pm.h> diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 8a7e55acf090..9be33e41af6d 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -12,7 +12,7 @@ #include <linux/delay.h> #include <linux/pgtable.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/page.h> #include <asm/pci-bridge.h> #include <asm/mpic.h> diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 1112a5831619..a454149ae02f 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -7,7 +7,6 @@ source "arch/powerpc/platforms/chrp/Kconfig" source "arch/powerpc/platforms/512x/Kconfig" source "arch/powerpc/platforms/52xx/Kconfig" source "arch/powerpc/platforms/powermac/Kconfig" -source "arch/powerpc/platforms/maple/Kconfig" source "arch/powerpc/platforms/pasemi/Kconfig" source "arch/powerpc/platforms/ps3/Kconfig" source "arch/powerpc/platforms/cell/Kconfig" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 786d374bff31..3cee4a842736 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -14,7 +14,6 @@ obj-$(CONFIG_FSL_SOC_BOOKE) += 85xx/ obj-$(CONFIG_PPC_86xx) += 86xx/ obj-$(CONFIG_PPC_POWERNV) += powernv/ obj-$(CONFIG_PPC_PSERIES) += pseries/ -obj-$(CONFIG_PPC_MAPLE) += maple/ obj-$(CONFIG_PPC_PASEMI) += pasemi/ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index f381b177ea06..0b6365d85d11 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -464,7 +464,43 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +/* + * During mmap() paste address, mapping VMA is saved in VAS window + * struct which is used to unmap during migration if the window is + * still open. But the user space can remove this mapping with + * munmap() before closing the window and the VMA address will + * be invalid. Set VAS window VMA to NULL in this function which + * is called before VMA free. + */ +static void vas_mmap_close(struct vm_area_struct *vma) +{ + struct file *fp = vma->vm_file; + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + + /* Should not happen */ + if (!cp_inst || !cp_inst->txwin) { + pr_err("No attached VAS window for the paste address mmap\n"); + return; + } + + txwin = cp_inst->txwin; + /* + * task_ref.vma is set in coproc_mmap() during mmap paste + * address. So it has to be the same VMA that is getting freed. + */ + if (WARN_ON(txwin->task_ref.vma != vma)) { + pr_err("Invalid paste address mmaping\n"); + return; + } + + mutex_lock(&txwin->task_ref.mmap_mutex); + txwin->task_ref.vma = NULL; + mutex_unlock(&txwin->task_ref.mmap_mutex); +} + static const struct vm_operations_struct vas_vm_ops = { + .close = vas_mmap_close, .fault = vas_mmap_fault, }; diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 28dc86744cac..d243f7fd8982 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -112,7 +112,7 @@ static void axon_msi_cascade(struct irq_desc *desc) pr_devel("axon_msi: woff %x roff %x msi %x\n", write_offset, msic->read_offset, msi); - if (msi < nr_irqs && irq_get_chip_data(msi) == msic) { + if (msi < irq_get_nr_irqs() && irq_get_chip_data(msi) == msic) { generic_handle_irq(msi); msic->fifo_virt[idx] = cpu_to_le32(0xffffffff); } else { diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 4cd9c0de22c2..62c9679b8ca3 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -779,58 +779,41 @@ static int __init cell_iommu_init_disabled(void) static u64 cell_iommu_get_fixed_address(struct device *dev) { - u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR; + u64 best_size, dev_addr = OF_BAD_ADDR; struct device_node *np; - const u32 *ranges = NULL; - int i, len, best, naddr, nsize, pna, range_size; + struct of_range_parser parser; + struct of_range range; /* We can be called for platform devices that have no of_node */ np = of_node_get(dev->of_node); if (!np) goto out; - while (1) { - naddr = of_n_addr_cells(np); - nsize = of_n_size_cells(np); - np = of_get_next_parent(np); - if (!np) - break; - - ranges = of_get_property(np, "dma-ranges", &len); + while ((np = of_get_next_parent(np))) { + if (of_pci_dma_range_parser_init(&parser, np)) + continue; - /* Ignore empty ranges, they imply no translation required */ - if (ranges && len > 0) + if (of_range_count(&parser)) break; } - if (!ranges) { + if (!np) { dev_dbg(dev, "iommu: no dma-ranges found\n"); goto out; } - len /= sizeof(u32); - - pna = of_n_addr_cells(np); - range_size = naddr + nsize + pna; - - /* dma-ranges format: - * child addr : naddr cells - * parent addr : pna cells - * size : nsize cells - */ - for (i = 0, best = -1, best_size = 0; i < len; i += range_size) { - cpu_addr = of_translate_dma_address(np, ranges + i + naddr); - size = of_read_number(ranges + i + naddr + pna, nsize); + best_size = 0; + for_each_of_range(&parser, &range) { + if (!range.cpu_addr) + continue; - if (cpu_addr == 0 && size > best_size) { - best = i; - best_size = size; + if (range.size > best_size) { + best_size = range.size; + dev_addr = range.bus_addr; } } - if (best >= 0) { - dev_addr = of_read_number(ranges + best, naddr); - } else + if (!best_size) dev_dbg(dev, "iommu: no suitable range found!\n"); out: diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index fee638fd8970..0e8f20ecca08 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -35,7 +35,7 @@ #include <asm/firmware.h> #include <asm/rtas.h> #include <asm/cputhreads.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include "interrupt.h" #include <asm/udbg.h> diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index cd7d42fc12a6..000894e07b02 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -36,6 +36,9 @@ static inline struct spufs_calls *spufs_calls_get(void) static inline void spufs_calls_put(struct spufs_calls *calls) { + if (!calls) + return; + BUG_ON(calls != spufs_calls); /* we don't need to rcu this, as we hold a reference to the module */ @@ -53,82 +56,55 @@ static inline void spufs_calls_put(struct spufs_calls *calls) { } #endif /* CONFIG_SPU_FS_MODULE */ +DEFINE_CLASS(spufs_calls, struct spufs_calls *, spufs_calls_put(_T), spufs_calls_get(), void) + SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, umode_t, mode, int, neighbor_fd) { - long ret; - struct spufs_calls *calls; - - calls = spufs_calls_get(); + CLASS(spufs_calls, calls)(); if (!calls) return -ENOSYS; if (flags & SPU_CREATE_AFFINITY_SPU) { - struct fd neighbor = fdget(neighbor_fd); - ret = -EBADF; - if (fd_file(neighbor)) { - ret = calls->create_thread(name, flags, mode, fd_file(neighbor)); - fdput(neighbor); - } - } else - ret = calls->create_thread(name, flags, mode, NULL); - - spufs_calls_put(calls); - return ret; + CLASS(fd, neighbor)(neighbor_fd); + if (fd_empty(neighbor)) + return -EBADF; + return calls->create_thread(name, flags, mode, fd_file(neighbor)); + } else { + return calls->create_thread(name, flags, mode, NULL); + } } SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus) { - long ret; - struct fd arg; - struct spufs_calls *calls; - - calls = spufs_calls_get(); + CLASS(spufs_calls, calls)(); if (!calls) return -ENOSYS; - ret = -EBADF; - arg = fdget(fd); - if (fd_file(arg)) { - ret = calls->spu_run(fd_file(arg), unpc, ustatus); - fdput(arg); - } + CLASS(fd, arg)(fd); + if (fd_empty(arg)) + return -EBADF; - spufs_calls_put(calls); - return ret; + return calls->spu_run(fd_file(arg), unpc, ustatus); } #ifdef CONFIG_COREDUMP int elf_coredump_extra_notes_size(void) { - struct spufs_calls *calls; - int ret; - - calls = spufs_calls_get(); + CLASS(spufs_calls, calls)(); if (!calls) return 0; - ret = calls->coredump_extra_notes_size(); - - spufs_calls_put(calls); - - return ret; + return calls->coredump_extra_notes_size(); } int elf_coredump_extra_notes_write(struct coredump_params *cprm) { - struct spufs_calls *calls; - int ret; - - calls = spufs_calls_get(); + CLASS(spufs_calls, calls)(); if (!calls) return 0; - ret = calls->coredump_extra_notes_write(cprm); - - spufs_calls_put(calls); - - return ret; + return calls->coredump_extra_notes_write(cprm); } #endif diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 18daafbe2e65..301ee7d8b7df 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -73,9 +73,7 @@ static struct spu_context *coredump_next_context(int *fd) return NULL; *fd = n - 1; - rcu_read_lock(); - file = lookup_fdget_rcu(*fd); - rcu_read_unlock(); + file = fget_raw(*fd); if (file) { ctx = SPUFS_I(file_inode(file))->i_ctx; get_spu_context(ctx); diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index e265f026eee2..4012f206ec63 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/initrd.h> #include <linux/of_platform.h> +#include <linux/seq_file.h> #include <asm/time.h> #include <asm/mpic.h> diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 00bec0f051be..5ca41972ef22 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -14,6 +14,7 @@ #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <linux/seq_file.h> #include <asm/i8259.h> #include <asm/pci-bridge.h> diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig deleted file mode 100644 index 4c058cc57c90..000000000000 --- a/arch/powerpc/platforms/maple/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -config PPC_MAPLE - depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN - bool "Maple 970FX Evaluation Board" - select FORCE_PCI - select MPIC - select U3_DART - select MPIC_U3_HT_IRQS - select GENERIC_TBSYNC - select PPC_UDBG_16550 - select PPC_970_NAP - select PPC_64S_HASH_MMU - select PPC_HASH_MMU_NATIVE - select PPC_RTAS - select MMIO_NVRAM - select ATA_NONSTANDARD if ATA - help - This option enables support for the Maple 970FX Evaluation Board. - For more information, refer to <http://www.970eval.com> diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h deleted file mode 100644 index 8ddbaa4ebd0b..000000000000 --- a/arch/powerpc/platforms/maple/maple.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Declarations for maple-specific code. - * - * Maple is the name of a PPC970 evaluation board. - */ -extern int maple_set_rtc_time(struct rtc_time *tm); -extern void maple_get_rtc_time(struct rtc_time *tm); -extern time64_t maple_get_boot_time(void); -extern void maple_pci_init(void); -extern void maple_pci_irq_fixup(struct pci_dev *dev); -extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); - -extern struct pci_controller_ops maple_pci_controller_ops; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c deleted file mode 100644 index b9ff37c7f6f0..000000000000 --- a/arch/powerpc/platforms/maple/pci.c +++ /dev/null @@ -1,672 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2004 Benjamin Herrenschmuidt ([email protected]), - * IBM Corp. - */ - -#undef DEBUG - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/of_irq.h> - -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/pci-bridge.h> -#include <asm/machdep.h> -#include <asm/iommu.h> -#include <asm/ppc-pci.h> -#include <asm/isa-bridge.h> - -#include "maple.h" - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -static struct pci_controller *u3_agp, *u3_ht, *u4_pcie; - -static int __init fixup_one_level_bus_range(struct device_node *node, int higher) -{ - for (; node; node = node->sibling) { - const int *bus_range; - const unsigned int *class_code; - int len; - - /* For PCI<->PCI bridges or CardBus bridges, we go down */ - class_code = of_get_property(node, "class-code", NULL); - if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && - (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) - continue; - bus_range = of_get_property(node, "bus-range", &len); - if (bus_range != NULL && len > 2 * sizeof(int)) { - if (bus_range[1] > higher) - higher = bus_range[1]; - } - higher = fixup_one_level_bus_range(node->child, higher); - } - return higher; -} - -/* This routine fixes the "bus-range" property of all bridges in the - * system since they tend to have their "last" member wrong on macs - * - * Note that the bus numbers manipulated here are OF bus numbers, they - * are not Linux bus numbers. - */ -static void __init fixup_bus_range(struct device_node *bridge) -{ - int *bus_range; - struct property *prop; - int len; - - /* Lookup the "bus-range" property for the hose */ - prop = of_find_property(bridge, "bus-range", &len); - if (prop == NULL || prop->value == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %pOF\n", - bridge); - return; - } - bus_range = prop->value; - bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); -} - - -static unsigned long u3_agp_cfa0(u8 devfn, u8 off) -{ - return (1 << (unsigned long)PCI_SLOT(devfn)) | - ((unsigned long)PCI_FUNC(devfn) << 8) | - ((unsigned long)off & 0xFCUL); -} - -static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off) -{ - return ((unsigned long)bus << 16) | - ((unsigned long)devfn << 8) | - ((unsigned long)off & 0xFCUL) | - 1UL; -} - -static volatile void __iomem *u3_agp_cfg_access(struct pci_controller* hose, - u8 bus, u8 dev_fn, u8 offset) -{ - unsigned int caddr; - - if (bus == hose->first_busno) { - if (dev_fn < (11 << 3)) - return NULL; - caddr = u3_agp_cfa0(dev_fn, offset); - } else - caddr = u3_agp_cfa1(bus, dev_fn, offset); - - /* Uninorth will return garbage if we don't read back the value ! */ - do { - out_le32(hose->cfg_addr, caddr); - } while (in_le32(hose->cfg_addr) != caddr); - - offset &= 0x07; - return hose->cfg_data + offset; -} - -static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u3_agp_pci_ops = -{ - .read = u3_agp_read_config, - .write = u3_agp_write_config, -}; - -static unsigned long u3_ht_cfa0(u8 devfn, u8 off) -{ - return (devfn << 8) | off; -} - -static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off) -{ - return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL; -} - -static volatile void __iomem *u3_ht_cfg_access(struct pci_controller* hose, - u8 bus, u8 devfn, u8 offset) -{ - if (bus == hose->first_busno) { - if (PCI_SLOT(devfn) == 0) - return NULL; - return hose->cfg_data + u3_ht_cfa0(devfn, offset); - } else - return hose->cfg_data + u3_ht_cfa1(bus, devfn, offset); -} - -static int u3_ht_root_read_config(struct pci_controller *hose, u8 offset, - int len, u32 *val) -{ - volatile void __iomem *addr; - - addr = hose->cfg_addr; - addr += ((offset & ~3) << 2) + (4 - len - (offset & 3)); - - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_be16(addr); - break; - default: - *val = in_be32(addr); - break; - } - - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_root_write_config(struct pci_controller *hose, u8 offset, - int len, u32 val) -{ - volatile void __iomem *addr; - - addr = hose->cfg_addr + ((offset & ~3) << 2) + (4 - len - (offset & 3)); - - if (offset >= PCI_BASE_ADDRESS_0 && offset < PCI_CAPABILITY_LIST) - return PCIBIOS_SUCCESSFUL; - - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_be16(addr, val); - break; - default: - out_be32(addr, val); - break; - } - - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0)) - return u3_ht_root_read_config(hose, offset, len, val); - - if (offset > 0xff) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0)) - return u3_ht_root_write_config(hose, offset, len, val); - - if (offset > 0xff) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u3_ht_pci_ops = -{ - .read = u3_ht_read_config, - .write = u3_ht_write_config, -}; - -static unsigned int u4_pcie_cfa0(unsigned int devfn, unsigned int off) -{ - return (1 << PCI_SLOT(devfn)) | - (PCI_FUNC(devfn) << 8) | - ((off >> 8) << 28) | - (off & 0xfcu); -} - -static unsigned int u4_pcie_cfa1(unsigned int bus, unsigned int devfn, - unsigned int off) -{ - return (bus << 16) | - (devfn << 8) | - ((off >> 8) << 28) | - (off & 0xfcu) | 1u; -} - -static volatile void __iomem *u4_pcie_cfg_access(struct pci_controller* hose, - u8 bus, u8 dev_fn, int offset) -{ - unsigned int caddr; - - if (bus == hose->first_busno) - caddr = u4_pcie_cfa0(dev_fn, offset); - else - caddr = u4_pcie_cfa1(bus, dev_fn, offset); - - /* Uninorth will return garbage if we don't read back the value ! */ - do { - out_le32(hose->cfg_addr, caddr); - } while (in_le32(hose->cfg_addr) != caddr); - - offset &= 0x03; - return hose->cfg_data + offset; -} - -static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} -static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u4_pcie_pci_ops = -{ - .read = u4_pcie_read_config, - .write = u4_pcie_write_config, -}; - -static void __init setup_u3_agp(struct pci_controller* hose) -{ - /* On G5, we move AGP up to high bus number so we don't need - * to reassign bus numbers for HT. If we ever have P2P bridges - * on AGP, we'll have to move pci_assign_all_buses to the - * pci_controller structure so we enable it for AGP and not for - * HT childs. - * We hard code the address because of the different size of - * the reg address cell, we shall fix that by killing struct - * reg_property and using some accessor functions instead - */ - hose->first_busno = 0xf0; - hose->last_busno = 0xff; - hose->ops = &u3_agp_pci_ops; - hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); - hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); - - u3_agp = hose; -} - -static void __init setup_u4_pcie(struct pci_controller* hose) -{ - /* We currently only implement the "non-atomic" config space, to - * be optimised later. - */ - hose->ops = &u4_pcie_pci_ops; - hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); - hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); - - u4_pcie = hose; -} - -static void __init setup_u3_ht(struct pci_controller* hose) -{ - hose->ops = &u3_ht_pci_ops; - - /* We hard code the address because of the different size of - * the reg address cell, we shall fix that by killing struct - * reg_property and using some accessor functions instead - */ - hose->cfg_data = ioremap(0xf2000000, 0x02000000); - hose->cfg_addr = ioremap(0xf8070000, 0x1000); - - hose->first_busno = 0; - hose->last_busno = 0xef; - - u3_ht = hose; -} - -static int __init maple_add_bridge(struct device_node *dev) -{ - int len; - struct pci_controller *hose; - char* disp_name; - const int *bus_range; - int primary = 1; - - DBG("Adding PCI host bridge %pOF\n", dev); - - bus_range = of_get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %pOF, assume bus 0\n", - dev); - } - - hose = pcibios_alloc_controller(dev); - if (hose == NULL) - return -ENOMEM; - hose->first_busno = bus_range ? bus_range[0] : 0; - hose->last_busno = bus_range ? bus_range[1] : 0xff; - hose->controller_ops = maple_pci_controller_ops; - - disp_name = NULL; - if (of_device_is_compatible(dev, "u3-agp")) { - setup_u3_agp(hose); - disp_name = "U3-AGP"; - primary = 0; - } else if (of_device_is_compatible(dev, "u3-ht")) { - setup_u3_ht(hose); - disp_name = "U3-HT"; - primary = 1; - } else if (of_device_is_compatible(dev, "u4-pcie")) { - setup_u4_pcie(hose); - disp_name = "U4-PCIE"; - primary = 0; - } - printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", - disp_name, hose->first_busno, hose->last_busno); - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(hose, dev, primary); - - /* Fixup "bus-range" OF property */ - fixup_bus_range(dev); - - /* Check for legacy IOs */ - isa_bridge_find_early(hose); - - /* create pci_dn's for DT nodes under this PHB */ - pci_devs_phb_init_dynamic(hose); - - return 0; -} - - -void maple_pci_irq_fixup(struct pci_dev *dev) -{ - DBG(" -> maple_pci_irq_fixup\n"); - - /* Fixup IRQ for PCIe host */ - if (u4_pcie != NULL && dev->bus->number == 0 && - pci_bus_to_host(dev->bus) == u4_pcie) { - printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n"); - dev->irq = irq_create_mapping(NULL, 1); - if (dev->irq) - irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW); - } - - /* Hide AMD8111 IDE interrupt when in legacy mode so - * the driver calls pci_get_legacy_ide_irq() - */ - if (dev->vendor == PCI_VENDOR_ID_AMD && - dev->device == PCI_DEVICE_ID_AMD_8111_IDE && - (dev->class & 5) != 5) { - dev->irq = 0; - } - - DBG(" <- maple_pci_irq_fixup\n"); -} - -static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge) -{ - struct pci_controller *hose = pci_bus_to_host(bridge->bus); - struct device_node *np, *child; - - if (hose != u3_agp) - return 0; - - /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We - * assume there is no P2P bridge on the AGP bus, which should be a - * safe assumptions hopefully. - */ - np = hose->dn; - PCI_DN(np)->busno = 0xf0; - for_each_child_of_node(np, child) - PCI_DN(child)->busno = 0xf0; - - return 0; -} - -void __init maple_pci_init(void) -{ - struct device_node *np, *root; - struct device_node *ht = NULL; - - /* Probe root PCI hosts, that is on U3 the AGP host and the - * HyperTransport host. That one is actually "kept" around - * and actually added last as its resource management relies - * on the AGP resources to have been setup first - */ - root = of_find_node_by_path("/"); - if (root == NULL) { - printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n"); - return; - } - for_each_child_of_node(root, np) { - if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "ht")) - continue; - if ((of_device_is_compatible(np, "u4-pcie") || - of_device_is_compatible(np, "u3-agp")) && - maple_add_bridge(np) == 0) - of_node_get(np); - - if (of_device_is_compatible(np, "u3-ht")) { - of_node_get(np); - ht = np; - } - } - of_node_put(root); - - /* Now setup the HyperTransport host if we found any - */ - if (ht && maple_add_bridge(ht) != 0) - of_node_put(ht); - - ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare; - - /* Tell pci.c to not change any resource allocations. */ - pci_add_flags(PCI_PROBE_ONLY); -} - -int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) -{ - struct device_node *np; - unsigned int defirq = channel ? 15 : 14; - unsigned int irq; - - if (pdev->vendor != PCI_VENDOR_ID_AMD || - pdev->device != PCI_DEVICE_ID_AMD_8111_IDE) - return defirq; - - np = pci_device_to_OF_node(pdev); - if (np == NULL) { - printk("Failed to locate OF node for IDE %s\n", - pci_name(pdev)); - return defirq; - } - irq = irq_of_parse_and_map(np, channel & 0x1); - if (!irq) { - printk("Failed to map onboard IDE interrupt for channel %d\n", - channel); - return defirq; - } - return irq; -} - -static void quirk_ipr_msi(struct pci_dev *dev) -{ - /* Something prevents MSIs from the IPR from working on Bimini, - * and the driver has no smarts to recover. So disable MSI - * on it for now. */ - - if (machine_is(maple)) { - dev->no_msi = 1; - dev_info(&dev->dev, "Quirk disabled MSI\n"); - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN, - quirk_ipr_msi); - -struct pci_controller_ops maple_pci_controller_ops = { -}; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c deleted file mode 100644 index f329a03edf4a..000000000000 --- a/arch/powerpc/platforms/maple/setup.c +++ /dev/null @@ -1,363 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Maple (970 eval board) setup code - * - * (c) Copyright 2004 Benjamin Herrenschmidt ([email protected]), - * IBM Corp. - */ - -#undef DEBUG - -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/ptrace.h> -#include <linux/user.h> -#include <linux/tty.h> -#include <linux/string.h> -#include <linux/delay.h> -#include <linux/ioport.h> -#include <linux/major.h> -#include <linux/initrd.h> -#include <linux/vt_kern.h> -#include <linux/console.h> -#include <linux/pci.h> -#include <linux/adb.h> -#include <linux/cuda.h> -#include <linux/pmu.h> -#include <linux/irq.h> -#include <linux/seq_file.h> -#include <linux/root_dev.h> -#include <linux/serial.h> -#include <linux/smp.h> -#include <linux/bitops.h> -#include <linux/of.h> -#include <linux/of_address.h> -#include <linux/platform_device.h> -#include <linux/memblock.h> - -#include <asm/processor.h> -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/pci-bridge.h> -#include <asm/iommu.h> -#include <asm/machdep.h> -#include <asm/dma.h> -#include <asm/cputable.h> -#include <asm/time.h> -#include <asm/mpic.h> -#include <asm/rtas.h> -#include <asm/udbg.h> -#include <asm/nvram.h> - -#include "maple.h" - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -static unsigned long maple_find_nvram_base(void) -{ - struct device_node *rtcs; - unsigned long result = 0; - - /* find NVRAM device */ - rtcs = of_find_compatible_node(NULL, "nvram", "AMD8111"); - if (rtcs) { - struct resource r; - if (of_address_to_resource(rtcs, 0, &r)) { - printk(KERN_EMERG "Maple: Unable to translate NVRAM" - " address\n"); - goto bail; - } - if (!(r.flags & IORESOURCE_IO)) { - printk(KERN_EMERG "Maple: NVRAM address isn't PIO!\n"); - goto bail; - } - result = r.start; - } else - printk(KERN_EMERG "Maple: Unable to find NVRAM\n"); - bail: - of_node_put(rtcs); - return result; -} - -static void __noreturn maple_restart(char *cmd) -{ - unsigned int maple_nvram_base; - const unsigned int *maple_nvram_offset, *maple_nvram_command; - struct device_node *sp; - - maple_nvram_base = maple_find_nvram_base(); - if (maple_nvram_base == 0) - goto fail; - - /* find service processor device */ - sp = of_find_node_by_name(NULL, "service-processor"); - if (!sp) { - printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); - goto fail; - } - maple_nvram_offset = of_get_property(sp, "restart-addr", NULL); - maple_nvram_command = of_get_property(sp, "restart-value", NULL); - of_node_put(sp); - - /* send command */ - outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); - for (;;) ; - fail: - printk(KERN_EMERG "Maple: Manual Restart Required\n"); - for (;;) ; -} - -static void __noreturn maple_power_off(void) -{ - unsigned int maple_nvram_base; - const unsigned int *maple_nvram_offset, *maple_nvram_command; - struct device_node *sp; - - maple_nvram_base = maple_find_nvram_base(); - if (maple_nvram_base == 0) - goto fail; - - /* find service processor device */ - sp = of_find_node_by_name(NULL, "service-processor"); - if (!sp) { - printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); - goto fail; - } - maple_nvram_offset = of_get_property(sp, "power-off-addr", NULL); - maple_nvram_command = of_get_property(sp, "power-off-value", NULL); - of_node_put(sp); - - /* send command */ - outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); - for (;;) ; - fail: - printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); - for (;;) ; -} - -static void __noreturn maple_halt(void) -{ - maple_power_off(); -} - -#ifdef CONFIG_SMP -static struct smp_ops_t maple_smp_ops = { - .probe = smp_mpic_probe, - .message_pass = smp_mpic_message_pass, - .kick_cpu = smp_generic_kick_cpu, - .setup_cpu = smp_mpic_setup_cpu, - .give_timebase = smp_generic_give_timebase, - .take_timebase = smp_generic_take_timebase, -}; -#endif /* CONFIG_SMP */ - -static void __init maple_use_rtas_reboot_and_halt_if_present(void) -{ - if (rtas_function_implemented(RTAS_FN_SYSTEM_REBOOT) && - rtas_function_implemented(RTAS_FN_POWER_OFF)) { - ppc_md.restart = rtas_restart; - pm_power_off = rtas_power_off; - ppc_md.halt = rtas_halt; - } -} - -static void __init maple_setup_arch(void) -{ - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000; - - /* Setup SMP callback */ -#ifdef CONFIG_SMP - smp_ops = &maple_smp_ops; -#endif - maple_use_rtas_reboot_and_halt_if_present(); - - printk(KERN_DEBUG "Using native/NAP idle loop\n"); - - mmio_nvram_init(); -} - -/* - * This is almost identical to pSeries and CHRP. We need to make that - * code generic at one point, with appropriate bits in the device-tree to - * identify the presence of an HT APIC - */ -static void __init maple_init_IRQ(void) -{ - struct device_node *root, *np, *mpic_node = NULL; - const unsigned int *opprop; - unsigned long openpic_addr = 0; - int naddr, n, i, opplen, has_isus = 0; - struct mpic *mpic; - unsigned int flags = 0; - - /* Locate MPIC in the device-tree. Note that there is a bug - * in Maple device-tree where the type of the controller is - * open-pic and not interrupt-controller - */ - - for_each_node_by_type(np, "interrupt-controller") - if (of_device_is_compatible(np, "open-pic")) { - mpic_node = np; - break; - } - if (mpic_node == NULL) - for_each_node_by_type(np, "open-pic") { - mpic_node = np; - break; - } - if (mpic_node == NULL) { - printk(KERN_ERR - "Failed to locate the MPIC interrupt controller\n"); - return; - } - - /* Find address list in /platform-open-pic */ - root = of_find_node_by_path("/"); - naddr = of_n_addr_cells(root); - opprop = of_get_property(root, "platform-open-pic", &opplen); - if (opprop) { - openpic_addr = of_read_number(opprop, naddr); - has_isus = (opplen > naddr); - printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n", - openpic_addr, has_isus); - } - - BUG_ON(openpic_addr == 0); - - /* Check for a big endian MPIC */ - if (of_property_read_bool(np, "big-endian")) - flags |= MPIC_BIG_ENDIAN; - - /* XXX Maple specific bits */ - flags |= MPIC_U3_HT_IRQS; - /* All U3/U4 are big-endian, older SLOF firmware doesn't encode this */ - flags |= MPIC_BIG_ENDIAN; - - /* Setup the openpic driver. More device-tree junks, we hard code no - * ISUs for now. I'll have to revisit some stuffs with the folks doing - * the firmware for those - */ - mpic = mpic_alloc(mpic_node, openpic_addr, flags, - /*has_isus ? 16 :*/ 0, 0, " MPIC "); - BUG_ON(mpic == NULL); - - /* Add ISUs */ - opplen /= sizeof(u32); - for (n = 0, i = naddr; i < opplen; i += naddr, n++) { - unsigned long isuaddr = of_read_number(opprop + i, naddr); - mpic_assign_isu(mpic, n, isuaddr); - } - - /* All ISUs are setup, complete initialization */ - mpic_init(mpic); - ppc_md.get_irq = mpic_get_irq; - of_node_put(mpic_node); - of_node_put(root); -} - -static void __init maple_progress(char *s, unsigned short hex) -{ - printk("*** %04x : %s\n", hex, s ? s : ""); -} - - -/* - * Called very early, MMU is off, device-tree isn't unflattened - */ -static int __init maple_probe(void) -{ - if (!of_machine_is_compatible("Momentum,Maple") && - !of_machine_is_compatible("Momentum,Apache")) - return 0; - - pm_power_off = maple_power_off; - - iommu_init_early_dart(&maple_pci_controller_ops); - - return 1; -} - -#ifdef CONFIG_EDAC -/* - * Register a platform device for CPC925 memory controller on - * all boards with U3H (CPC925) bridge. - */ -static int __init maple_cpc925_edac_setup(void) -{ - struct platform_device *pdev; - struct device_node *np = NULL; - struct resource r; - int ret; - volatile void __iomem *mem; - u32 rev; - - np = of_find_node_by_type(NULL, "memory-controller"); - if (!np) { - printk(KERN_ERR "%s: Unable to find memory-controller node\n", - __func__); - return -ENODEV; - } - - ret = of_address_to_resource(np, 0, &r); - of_node_put(np); - - if (ret < 0) { - printk(KERN_ERR "%s: Unable to get memory-controller reg\n", - __func__); - return -ENODEV; - } - - mem = ioremap(r.start, resource_size(&r)); - if (!mem) { - printk(KERN_ERR "%s: Unable to map memory-controller memory\n", - __func__); - return -ENOMEM; - } - - rev = __raw_readl(mem); - iounmap(mem); - - if (rev < 0x34 || rev > 0x3f) { /* U3H */ - printk(KERN_ERR "%s: Non-CPC925(U3H) bridge revision: %02x\n", - __func__, rev); - return 0; - } - - pdev = platform_device_register_simple("cpc925_edac", 0, &r, 1); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); - - printk(KERN_INFO "%s: CPC925 platform device created\n", __func__); - - return 0; -} -machine_device_initcall(maple, maple_cpc925_edac_setup); -#endif - -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .discover_phbs = maple_pci_init, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .progress = maple_progress, - .power_save = power4_idle, -}; diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c deleted file mode 100644 index 91606411d2e0..000000000000 --- a/arch/powerpc/platforms/maple/time.c +++ /dev/null @@ -1,170 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * (c) Copyright 2004 Benjamin Herrenschmidt ([email protected]), - * IBM Corp. - */ - -#undef DEBUG - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/param.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/time.h> -#include <linux/adb.h> -#include <linux/pmu.h> -#include <linux/interrupt.h> -#include <linux/mc146818rtc.h> -#include <linux/bcd.h> -#include <linux/of_address.h> - -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/machdep.h> -#include <asm/time.h> - -#include "maple.h" - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -static int maple_rtc_addr; - -static int maple_clock_read(int addr) -{ - outb_p(addr, maple_rtc_addr); - return inb_p(maple_rtc_addr+1); -} - -static void maple_clock_write(unsigned long val, int addr) -{ - outb_p(addr, maple_rtc_addr); - outb_p(val, maple_rtc_addr+1); -} - -void maple_get_rtc_time(struct rtc_time *tm) -{ - do { - tm->tm_sec = maple_clock_read(RTC_SECONDS); - tm->tm_min = maple_clock_read(RTC_MINUTES); - tm->tm_hour = maple_clock_read(RTC_HOURS); - tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH); - tm->tm_mon = maple_clock_read(RTC_MONTH); - tm->tm_year = maple_clock_read(RTC_YEAR); - } while (tm->tm_sec != maple_clock_read(RTC_SECONDS)); - - if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY) - || RTC_ALWAYS_BCD) { - tm->tm_sec = bcd2bin(tm->tm_sec); - tm->tm_min = bcd2bin(tm->tm_min); - tm->tm_hour = bcd2bin(tm->tm_hour); - tm->tm_mday = bcd2bin(tm->tm_mday); - tm->tm_mon = bcd2bin(tm->tm_mon); - tm->tm_year = bcd2bin(tm->tm_year); - } - if ((tm->tm_year + 1900) < 1970) - tm->tm_year += 100; - - tm->tm_wday = -1; -} - -int maple_set_rtc_time(struct rtc_time *tm) -{ - unsigned char save_control, save_freq_select; - int sec, min, hour, mon, mday, year; - - spin_lock(&rtc_lock); - - save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */ - - maple_clock_write((save_control|RTC_SET), RTC_CONTROL); - - save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */ - - maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - sec = tm->tm_sec; - min = tm->tm_min; - hour = tm->tm_hour; - mon = tm->tm_mon; - mday = tm->tm_mday; - year = tm->tm_year; - - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - sec = bin2bcd(sec); - min = bin2bcd(min); - hour = bin2bcd(hour); - mon = bin2bcd(mon); - mday = bin2bcd(mday); - year = bin2bcd(year); - } - maple_clock_write(sec, RTC_SECONDS); - maple_clock_write(min, RTC_MINUTES); - maple_clock_write(hour, RTC_HOURS); - maple_clock_write(mon, RTC_MONTH); - maple_clock_write(mday, RTC_DAY_OF_MONTH); - maple_clock_write(year, RTC_YEAR); - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - maple_clock_write(save_control, RTC_CONTROL); - maple_clock_write(save_freq_select, RTC_FREQ_SELECT); - - spin_unlock(&rtc_lock); - - return 0; -} - -static struct resource rtc_iores = { - .name = "rtc", - .flags = IORESOURCE_IO | IORESOURCE_BUSY, -}; - -time64_t __init maple_get_boot_time(void) -{ - struct rtc_time tm; - struct device_node *rtcs; - - rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00"); - if (rtcs) { - struct resource r; - if (of_address_to_resource(rtcs, 0, &r)) { - printk(KERN_EMERG "Maple: Unable to translate RTC" - " address\n"); - goto bail; - } - if (!(r.flags & IORESOURCE_IO)) { - printk(KERN_EMERG "Maple: RTC address isn't PIO!\n"); - goto bail; - } - maple_rtc_addr = r.start; - printk(KERN_INFO "Maple: Found RTC at IO 0x%x\n", - maple_rtc_addr); - } - bail: - of_node_put(rtcs); - if (maple_rtc_addr == 0) { - maple_rtc_addr = RTC_PORT(0); /* legacy address */ - printk(KERN_INFO "Maple: No device node for RTC, assuming " - "legacy address (0x%x)\n", maple_rtc_addr); - } - - rtc_iores.start = maple_rtc_addr; - rtc_iores.end = maple_rtc_addr + 7; - request_resource(&ioport_resource, &rtc_iores); - - maple_get_rtc_time(&tm); - return rtc_tm_to_time64(&tm); -} - diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c index 12bc01353bd3..79741370c40c 100644 --- a/arch/powerpc/platforms/powermac/backlight.c +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -57,18 +57,10 @@ struct backlight_device *pmac_backlight; int pmac_has_backlight_type(const char *type) { struct device_node* bk_node = of_find_node_by_name(NULL, "backlight"); + int i = of_property_match_string(bk_node, "backlight-control", type); - if (bk_node) { - const char *prop = of_get_property(bk_node, - "backlight-control", NULL); - if (prop && strncmp(prop, type, strlen(type)) == 0) { - of_node_put(bk_node); - return 1; - } - of_node_put(bk_node); - } - - return 0; + of_node_put(bk_node); + return i >= 0; } static void pmac_backlight_key_worker(struct work_struct *work) diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index d21b681f52fb..09e7fe24fac1 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -35,7 +35,7 @@ #include <asm/ptrace.h> #include <linux/atomic.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/irq.h> #include <asm/page.h> #include <asm/sections.h> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index ad41dffe4d92..d98b933e4984 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -18,7 +18,7 @@ #include <asm/opal.h> #include <asm/cputhreads.h> #include <asm/cpuidle.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/smp.h> #include <asm/runlatch.h> #include <asm/dbell.h> diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 8f14f0581a21..8f41ef364fc6 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -28,7 +28,7 @@ #include <asm/xive.h> #include <asm/opal.h> #include <asm/runlatch.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/dbell.h> #include <asm/kvm_ppc.h> #include <asm/ppc-opcode.h> @@ -36,6 +36,7 @@ #include <asm/kexec.h> #include <asm/reg.h> #include <asm/powernv.h> +#include <asm/systemcfg.h> #include "powernv.h" @@ -136,7 +137,9 @@ static int pnv_smp_cpu_disable(void) * the generic fixup_irqs. --BenH. */ set_cpu_online(cpu, false); - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; +#endif if (cpu == boot_cpuid) boot_cpuid = cpumask_any(cpu_online_mask); if (xive_enabled()) diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index b18e1c92e554..61722133eb2d 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -178,7 +178,7 @@ fail_malloc: return result; } -static int __ref ps3_setup_uhc_device( +static int __init ps3_setup_uhc_device( const struct ps3_repository_device *repo, enum ps3_match_id match_id, enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type) { diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 49871427f599..af3fe9f04f24 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -378,9 +378,9 @@ int ps3_send_event_locally(unsigned int virq) /** * ps3_sb_event_receive_port_setup - Setup a system bus event receive port. + * @dev: The system bus device instance. * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be * serviced on. - * @dev: The system bus device instance. * @virq: The assigned Linux virq. * * An event irq represents a virtual device interrupt. The interrupt_id diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index 1abe33fbe529..b8c030eab138 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -940,7 +940,7 @@ int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port) /** * ps3_repository_read_boot_dat_info - Get address and size of cell_ext_os_area. - * address: lpar address of cell_ext_os_area + * @lpar_addr: lpar address of cell_ext_os_area * @size: size of cell_ext_os_area */ diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index b9a7d9bae687..afbaabf182d0 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -453,10 +453,9 @@ static ssize_t modalias_show(struct device *_dev, struct device_attribute *a, char *buf) { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); - int len = snprintf(buf, PAGE_SIZE, "ps3:%d:%d\n", dev->match_id, - dev->match_sub_id); - return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; + return sysfs_emit(buf, "ps3:%d:%d\n", dev->match_id, + dev->match_sub_id); } static DEVICE_ATTR_RO(modalias); diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index afc0f6a61337..42fc66e97539 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -140,6 +140,20 @@ config HV_PERF_CTRS If unsure, select Y. +config VPA_PMU + tristate "VPA PMU events" + depends on KVM_BOOK3S_64_HV && HV_PERF_CTRS + help + Enable access to the VPA PMU counters via perf. This enables + code that support measurement for KVM on PowerVM(KoP) feature. + PAPR hypervisor has introduced three new counters in the VPA area + of LPAR CPUs for KVM L2 guest observability. Two for context switches + from host to guest and vice versa, and one counter for getting + the total time spent inside the KVM guest. This config enables code + that access these software counters via perf. + + If unsure, Select N. + config IBMVIO depends on PPC_PSERIES bool diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 8cb9d36ea491..f293588b8c7b 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -191,7 +191,7 @@ static int dtl_enable(struct dtl *dtl) return -EBUSY; /* ensure there are no other conflicting dtl users */ - if (!read_trylock(&dtl_access_lock)) + if (!down_read_trylock(&dtl_access_lock)) return -EBUSY; n_entries = dtl_buf_entries; @@ -199,7 +199,7 @@ static int dtl_enable(struct dtl *dtl) if (!buf) { printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", __func__, dtl->cpu); - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); return -ENOMEM; } @@ -217,7 +217,7 @@ static int dtl_enable(struct dtl *dtl) spin_unlock(&dtl->lock); if (rc) { - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); kmem_cache_free(dtl_cache, buf); } @@ -232,7 +232,7 @@ static void dtl_disable(struct dtl *dtl) dtl->buf = NULL; dtl->buf_entries = 0; spin_unlock(&dtl->lock); - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); } /* file interface */ diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 6838a0fcda29..bc6926dbf148 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -33,6 +33,7 @@ #include <asm/xive.h> #include <asm/plpar_wrappers.h> #include <asm/topology.h> +#include <asm/systemcfg.h> #include "pseries.h" @@ -83,7 +84,9 @@ static int pseries_cpu_disable(void) int cpu = smp_processor_id(); set_cpu_online(cpu, false); - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; +#endif /*fix boot_cpuid here*/ if (cpu == boot_cpuid) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index c1d8bee8f701..6a415febc53b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -16,6 +16,7 @@ #include <linux/export.h> #include <linux/jump_label.h> #include <linux/delay.h> +#include <linux/seq_file.h> #include <linux/stop_machine.h> #include <linux/spinlock.h> #include <linux/cpuhotplug.h> @@ -169,7 +170,7 @@ struct vcpu_dispatch_data { */ #define NR_CPUS_H NR_CPUS -DEFINE_RWLOCK(dtl_access_lock); +DECLARE_RWSEM(dtl_access_lock); static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data); static DEFINE_PER_CPU(u64, dtl_entry_ridx); static DEFINE_PER_CPU(struct dtl_worker, dtl_workers); @@ -463,7 +464,7 @@ static int dtl_worker_enable(unsigned long *time_limit) { int rc = 0, state; - if (!write_trylock(&dtl_access_lock)) { + if (!down_write_trylock(&dtl_access_lock)) { rc = -EBUSY; goto out; } @@ -479,7 +480,7 @@ static int dtl_worker_enable(unsigned long *time_limit) pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n"); free_dtl_buffers(time_limit); reset_global_dtl_mask(); - write_unlock(&dtl_access_lock); + up_write(&dtl_access_lock); rc = -EINVAL; goto out; } @@ -494,7 +495,7 @@ static void dtl_worker_disable(unsigned long *time_limit) cpuhp_remove_state(dtl_worker_state); free_dtl_buffers(time_limit); reset_global_dtl_mask(); - write_unlock(&dtl_access_lock); + up_write(&dtl_access_lock); } static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p, diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 62da20f9700a..cc22924f159f 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -29,7 +29,6 @@ #include <asm/firmware.h> #include <asm/rtas.h> #include <asm/time.h> -#include <asm/vdso_datapage.h> #include <asm/vio.h> #include <asm/mmu.h> #include <asm/machdep.h> @@ -530,7 +529,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL); if (lrdrp == NULL) { - partition_potential_processors = vdso_data->processorCount; + partition_potential_processors = num_possible_cpus(); } else { partition_potential_processors = be32_to_cpup(lrdrp + 4); } @@ -553,7 +552,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) } else { /* non SPLPAR case */ seq_printf(m, "system_active_processors=%d\n", - partition_potential_processors); + partition_active_processors); seq_printf(m, "system_potential_processors=%d\n", partition_potential_processors); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 6dfb55b52d36..fdc2f7f38dc9 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -9,6 +9,7 @@ #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/msi.h> +#include <linux/seq_file.h> #include <asm/rtas.h> #include <asm/hw_irq.h> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 9e297f88adc5..f84ac9fbe203 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/ioport.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/ndctl.h> #include <linux/sched.h> diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 4a595493d28a..b1667ed05f98 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -683,7 +683,7 @@ void __init plpks_early_init_devtree(void) out: fdt_nop_property(fdt, chosen_node, "ibm,plpks-pw"); // Since we've cleared the password, we must update the FDT checksum - early_init_dt_verify(fdt); + early_init_dt_verify(fdt, __pa(fdt)); } static __init int pseries_plpks_init(void) diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index c597711ef20a..db99725e752b 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -39,7 +39,7 @@ #include <asm/xive.h> #include <asm/dbell.h> #include <asm/plpar_wrappers.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/svm.h> #include <asm/kvm_guest.h> diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 3b4045d508ec..384c9dc1899a 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -8,6 +8,7 @@ #include <linux/mm.h> #include <linux/memblock.h> +#include <linux/mem_encrypt.h> #include <linux/cc_platform.h> #include <asm/machdep.h> #include <asm/svm.h> diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index fa01818c1972..a6c388bdf5d0 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -726,7 +726,7 @@ static int xive_irq_set_affinity(struct irq_data *d, pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq); /* Is this valid ? */ - if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) + if (!cpumask_intersects(cpumask, cpu_online_mask)) return -EINVAL; /* diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f2fa985a2c77..5aedbe3e8e6a 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/irq.h> +#include <linux/seq_file.h> #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/init.h> diff --git a/arch/powerpc/platforms/maple/Makefile b/arch/powerpc/tools/.gitignore index 19f35ab828a7..ec380a14a09a 100644 --- a/arch/powerpc/platforms/maple/Makefile +++ b/arch/powerpc/tools/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += setup.o pci.o time.o +/vmlinux.arch.S diff --git a/arch/powerpc/tools/Makefile b/arch/powerpc/tools/Makefile new file mode 100644 index 000000000000..e1f7afcd9fdf --- /dev/null +++ b/arch/powerpc/tools/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +quiet_cmd_gen_ftrace_ool_stubs = GEN $@ + cmd_gen_ftrace_ool_stubs = $< "$(CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE)" "$(CONFIG_64BIT)" \ + "$(OBJDUMP)" vmlinux.o $@ + +$(obj)/vmlinux.arch.S: $(src)/ftrace-gen-ool-stubs.sh vmlinux.o FORCE + $(call if_changed,gen_ftrace_ool_stubs) + +targets += vmlinux.arch.S diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh new file mode 100755 index 000000000000..bac186bdf64a --- /dev/null +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# Error out on error +set -e + +num_ool_stubs_text_builtin="$1" +is_64bit="$2" +objdump="$3" +vmlinux_o="$4" +arch_vmlinux_S="$5" + +RELOCATION=R_PPC64_ADDR64 +if [ -z "$is_64bit" ]; then + RELOCATION=R_PPC_ADDR32 +fi + +num_ool_stubs_total=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | + grep -c "$RELOCATION") +num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | + grep -e ".init.text" -e ".text.startup" | grep -c "$RELOCATION") +num_ool_stubs_text=$((num_ool_stubs_total - num_ool_stubs_inittext)) + +if [ "$num_ool_stubs_text" -gt "$num_ool_stubs_text_builtin" ]; then + num_ool_stubs_text_end=$((num_ool_stubs_text - num_ool_stubs_text_builtin)) +else + num_ool_stubs_text_end=0 +fi + +cat > "$arch_vmlinux_S" <<EOF +#include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> +#include <linux/linkage.h> + +.pushsection .tramp.ftrace.text,"aw" +SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text_end) + +SYM_START(ftrace_ool_stub_text_end, SYM_L_GLOBAL, .balign SZL) +#if $num_ool_stubs_text_end + .space $num_ool_stubs_text_end * FTRACE_OOL_STUB_SIZE +#endif +SYM_CODE_END(ftrace_ool_stub_text_end) +.popsection + +.pushsection .tramp.ftrace.init,"aw" +SYM_DATA(ftrace_ool_stub_inittext_count, .long $num_ool_stubs_inittext) + +SYM_START(ftrace_ool_stub_inittext, SYM_L_GLOBAL, .balign SZL) + .space $num_ool_stubs_inittext * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_inittext) +.popsection +EOF diff --git a/arch/powerpc/tools/ftrace_check.sh b/arch/powerpc/tools/ftrace_check.sh new file mode 100755 index 000000000000..405e7e306617 --- /dev/null +++ b/arch/powerpc/tools/ftrace_check.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later +# +# This script checks vmlinux to ensure that all functions can call ftrace_caller() either directly, +# or through the stub, ftrace_tramp_text, at the end of kernel text. + +# Error out if any command fails +set -e + +# Allow for verbose output +if [ "$V" = "1" ]; then + set -x +fi + +if [ $# -lt 2 ]; then + echo "$0 [path to nm] [path to vmlinux]" 1>&2 + exit 1 +fi + +# Have Kbuild supply the path to nm so we handle cross compilation. +nm="$1" +vmlinux="$2" + +stext_addr=$($nm "$vmlinux" | grep -e " [TA] _stext$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') +ftrace_caller_addr=$($nm "$vmlinux" | grep -e " T ftrace_caller$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') +ftrace_tramp_addr=$($nm "$vmlinux" | grep -e " T ftrace_tramp_text$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') + +ftrace_caller_offset=$(echo "ibase=16;$ftrace_caller_addr - $stext_addr" | bc) +ftrace_tramp_offset=$(echo "ibase=16;$ftrace_tramp_addr - $ftrace_caller_addr" | bc) +sz_32m=$(printf "%d" 0x2000000) +sz_64m=$(printf "%d" 0x4000000) + +# ftrace_caller - _stext < 32M +if [ "$ftrace_caller_offset" -ge "$sz_32m" ]; then + echo "ERROR: ftrace_caller (0x$ftrace_caller_addr) is beyond 32MiB of _stext" 1>&2 + echo "ERROR: consider disabling CONFIG_FUNCTION_TRACER, or reducing the size \ + of kernel text" 1>&2 + exit 1 +fi + +# ftrace_tramp_text - ftrace_caller < 64M +if [ "$ftrace_tramp_offset" -ge "$sz_64m" ]; then + echo "ERROR: kernel text extends beyond 64MiB from ftrace_caller" 1>&2 + echo "ERROR: consider disabling CONFIG_FUNCTION_TRACER, or reducing the size \ + of kernel text" 1>&2 + exit 1 +fi diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e6cddbb2305f..f4e841a36458 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -50,7 +50,7 @@ #include <asm/xive.h> #include <asm/opal.h> #include <asm/firmware.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sections.h> #include <asm/inst.h> #include <asm/interrupt.h> @@ -3662,7 +3662,7 @@ symbol_lookup(void) int type = inchar(); unsigned long addr, cpu; void __percpu *ptr = NULL; - static char tmp[64]; + static char tmp[KSYM_NAME_LEN]; switch (type) { case 'a': @@ -3671,7 +3671,7 @@ symbol_lookup(void) termch = 0; break; case 's': - getstring(tmp, 64); + getstring(tmp, KSYM_NAME_LEN); if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); @@ -3686,7 +3686,7 @@ symbol_lookup(void) termch = 0; break; case 'p': - getstring(tmp, 64); + getstring(tmp, KSYM_NAME_LEN); if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); |