diff options
Diffstat (limited to 'arch')
311 files changed, 6228 insertions, 3413 deletions
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 4a905bd667e2..83e9eee57a55 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -147,7 +147,7 @@ retry: /* If for any reason at all we couldn't handle the fault, make sure we exit gracefully rather than endlessly redo the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 85814e74677d..601ed173080b 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -74,9 +74,7 @@ endif ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 # Note: No need to add to cflags-y as that happens anyways -# -# Disable the false maybe-uninitialized warings gcc spits out at -O3 -ARCH_CFLAGS += -O3 $(call cc-disable-warning,maybe-uninitialized,) +ARCH_CFLAGS += -O3 endif # small data is default for elf32 tool-chain. If not usable, disable it diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 858f98ef7f1b..0f92d97432a2 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -110,7 +110,7 @@ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 2ee7a4d758a8..5f8c0b47045a 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/cpu.h> #include <linux/of_fdt.h> +#include <linux/of.h> #include <linux/of_platform.h> #include <linux/cache.h> #include <asm/sections.h> diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 98f22d2eb563..f927b8dc6edd 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -296,30 +296,23 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id) return IRQ_HANDLED; } -static int arc_timer_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) + +static int arc_timer_starting_cpu(unsigned int cpu) { struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device); evt->cpumask = cpumask_of(smp_processor_id()); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - clockevents_config_and_register(evt, arc_timer_freq, - 0, ULONG_MAX); - enable_percpu_irq(arc_timer_irq, 0); - break; - case CPU_DYING: - disable_percpu_irq(arc_timer_irq); - break; - } - - return NOTIFY_OK; + clockevents_config_and_register(evt, arc_timer_freq, 0, ARC_TIMER_MAX); + enable_percpu_irq(arc_timer_irq, 0); + return 0; } -static struct notifier_block arc_timer_cpu_nb = { - .notifier_call = arc_timer_cpu_notify, -}; +static int arc_timer_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(arc_timer_irq); + return 0; +} /* * clockevent setup for boot CPU @@ -329,12 +322,6 @@ static int __init arc_clockevent_setup(struct device_node *node) struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device); int ret; - ret = register_cpu_notifier(&arc_timer_cpu_nb); - if (ret) { - pr_err("Failed to register cpu notifier"); - return ret; - } - arc_timer_irq = irq_of_parse_and_map(node, 0); if (arc_timer_irq <= 0) { pr_err("clockevent: missing irq"); @@ -347,11 +334,6 @@ static int __init arc_clockevent_setup(struct device_node *node) return ret; } - evt->irq = arc_timer_irq; - evt->cpumask = cpumask_of(smp_processor_id()); - clockevents_config_and_register(evt, arc_timer_freq, - 0, ARC_TIMER_MAX); - /* Needs apriori irq_set_percpu_devid() done in intc map function */ ret = request_percpu_irq(arc_timer_irq, timer_irq_handler, "Timer0 (per-cpu-tick)", evt); @@ -360,8 +342,14 @@ static int __init arc_clockevent_setup(struct device_node *node) return ret; } - enable_percpu_irq(arc_timer_irq, 0); - + ret = cpuhp_setup_state(CPUHP_AP_ARC_TIMER_STARTING, + "AP_ARC_TIMER_STARTING", + arc_timer_starting_cpu, + arc_timer_dying_cpu); + if (ret) { + pr_err("Failed to setup hotplug state"); + return ret; + } return 0; } diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 73d7e4c75b7d..ab74b5d9186c 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -92,7 +92,8 @@ static void *arc_dma_alloc(struct device *dev, size_t size, static void arc_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) { - struct page *page = virt_to_page(dma_handle); + phys_addr_t paddr = plat_dma_to_phys(dev, dma_handle); + struct page *page = virt_to_page(paddr); int is_non_coh = 1; is_non_coh = dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs) || diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index af63f4a13e60..e94e5aa33985 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -137,7 +137,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); /* If Pagefault was interrupted by SIGKILL, exit page fault "early" */ if (unlikely(fatal_signal_pending(current))) { diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index 49b8abd1115c..f52b7db67fd3 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -49,7 +49,7 @@ EXPORT_SYMBOL(ioremap); /* * ioremap with access flags * Cache semantics wise it is same as ioremap - "forced" uncached. - * However unline vanilla ioremap which bypasses ARC MMU for addresses in + * However unlike vanilla ioremap which bypasses ARC MMU for addresses in * ARC hardware uncached region, this one still goes thru the MMU as caller * might need finer access control (R/W/X) */ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f0636ec94903..4c445fb9c189 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1186,6 +1186,60 @@ config ARM_ERRATA_773022 loop buffer may deliver incorrect instructions. This workaround disables the loop buffer to avoid the erratum. +config ARM_ERRATA_818325_852422 + bool "ARM errata: A12: some seqs of opposed cond code instrs => deadlock or corruption" + depends on CPU_V7 + help + This option enables the workaround for: + - Cortex-A12 818325: Execution of an UNPREDICTABLE STR or STM + instruction might deadlock. Fixed in r0p1. + - Cortex-A12 852422: Execution of a sequence of instructions might + lead to either a data corruption or a CPU deadlock. Not fixed in + any Cortex-A12 cores yet. + This workaround for all both errata involves setting bit[12] of the + Feature Register. This bit disables an optimisation applied to a + sequence of 2 instructions that use opposing condition codes. + +config ARM_ERRATA_821420 + bool "ARM errata: A12: sequence of VMOV to core registers might lead to a dead lock" + depends on CPU_V7 + help + This option enables the workaround for the 821420 Cortex-A12 + (all revs) erratum. In very rare timing conditions, a sequence + of VMOV to Core registers instructions, for which the second + one is in the shadow of a branch or abort, can lead to a + deadlock when the VMOV instructions are issued out-of-order. + +config ARM_ERRATA_825619 + bool "ARM errata: A12: DMB NSHST/ISHST mixed ... might cause deadlock" + depends on CPU_V7 + help + This option enables the workaround for the 825619 Cortex-A12 + (all revs) erratum. Within rare timing constraints, executing a + DMB NSHST or DMB ISHST instruction followed by a mix of Cacheable + and Device/Strongly-Ordered loads and stores might cause deadlock + +config ARM_ERRATA_852421 + bool "ARM errata: A17: DMB ST might fail to create order between stores" + depends on CPU_V7 + help + This option enables the workaround for the 852421 Cortex-A17 + (r1p0, r1p1, r1p2) erratum. Under very rare timing conditions, + execution of a DMB ST instruction might fail to properly order + stores from GroupA and stores from GroupB. + +config ARM_ERRATA_852423 + bool "ARM errata: A17: some seqs of opposed cond code instrs => deadlock or corruption" + depends on CPU_V7 + help + This option enables the workaround for: + - Cortex-A17 852423: Execution of a sequence of instructions might + lead to either a data corruption or a CPU deadlock. Not fixed in + any Cortex-A17 cores yet. + This is identical to Cortex-A12 erratum 852422. It is a separate + config option from the A12 erratum due to the way errata are checked + for and handled. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 274e8a6582f1..229afaf2058b 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -327,6 +327,7 @@ zImage: Image $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ + @$(kecho) ' Kernel: $(boot)/$@ is ready' $(INSTALL_TARGETS): $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 5be33a2d59a9..bdc1d5af03d2 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -31,7 +31,7 @@ ifeq ($(CONFIG_XIP_KERNEL),y) $(obj)/xipImage: vmlinux FORCE $(call if_changed,objcopy) - @$(kecho) ' Kernel: $@ is ready (physical address: $(CONFIG_XIP_PHYS_ADDR))' + @$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)' $(obj)/Image $(obj)/zImage: FORCE @echo 'Kernel configured for XIP (CONFIG_XIP_KERNEL=y)' @@ -46,14 +46,12 @@ $(obj)/xipImage: FORCE $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) - @$(kecho) ' Kernel: $@ is ready' $(obj)/compressed/vmlinux: $(obj)/Image FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) - @$(kecho) ' Kernel: $@ is ready' endif @@ -78,14 +76,12 @@ fi $(obj)/uImage: $(obj)/zImage FORCE @$(check_for_multiple_loadaddr) $(call if_changed,uimage) - @$(kecho) ' Image $@ is ready' $(obj)/bootp/bootp: $(obj)/zImage initrd FORCE $(Q)$(MAKE) $(build)=$(obj)/bootp $@ $(obj)/bootpImage: $(obj)/bootp/bootp FORCE $(call if_changed,objcopy) - @$(kecho) ' Kernel: $@ is ready' PHONY += initrd install zinstall uinstall initrd: diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 52be48bbd2dd..7fa295155543 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -766,7 +766,6 @@ ale_entries = <1024>; bd_ram_size = <0x2000>; no_bd_ram = <0>; - rx_descs = <64>; mac_control = <0x20>; slaves = <2>; active_slave = <0>; @@ -789,7 +788,7 @@ status = "disabled"; davinci_mdio: mdio@4a101000 { - compatible = "ti,davinci_mdio"; + compatible = "ti,cpsw-mdio","ti,davinci_mdio"; #address-cells = <1>; #size-cells = <0>; ti,hwmods = "davinci_mdio"; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 12fcde4d4d2e..cd81ecf12731 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -626,7 +626,6 @@ ale_entries = <1024>; bd_ram_size = <0x2000>; no_bd_ram = <0>; - rx_descs = <64>; mac_control = <0x20>; slaves = <2>; active_slave = <0>; @@ -636,7 +635,7 @@ syscon = <&scm_conf>; davinci_mdio: mdio@4a101000 { - compatible = "ti,am4372-mdio","ti,davinci_mdio"; + compatible = "ti,am4372-mdio","ti,cpsw-mdio","ti,davinci_mdio"; reg = <0x4a101000 0x100>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 81d6c3033b51..c4d04c5293b9 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -86,7 +86,7 @@ led@3 { label = "beagle-x15:usr3"; gpios = <&gpio7 15 GPIO_ACTIVE_HIGH>; - linux,default-trigger = "ide-disk"; + linux,default-trigger = "disk-activity"; default-state = "off"; }; }; diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi index d4537dc61497..f23cae0c2179 100644 --- a/arch/arm/boot/dts/dm814x.dtsi +++ b/arch/arm/boot/dts/dm814x.dtsi @@ -509,7 +509,6 @@ ale_entries = <1024>; bd_ram_size = <0x2000>; no_bd_ram = <0>; - rx_descs = <64>; mac_control = <0x20>; slaves = <2>; active_slave = <0>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 3a8f3976f6f9..de559f6e4fee 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1628,7 +1628,6 @@ ale_entries = <1024>; bd_ram_size = <0x2000>; no_bd_ram = <0>; - rx_descs = <64>; mac_control = <0x20>; slaves = <2>; active_slave = <0>; @@ -1663,7 +1662,7 @@ status = "disabled"; davinci_mdio: mdio@48485000 { - compatible = "ti,davinci_mdio"; + compatible = "ti,cpsw-mdio","ti,davinci_mdio"; #address-cells = <1>; #size-cells = <0>; ti,hwmods = "davinci_mdio"; diff --git a/arch/arm/boot/dts/kirkwood-ns2lite.dts b/arch/arm/boot/dts/kirkwood-ns2lite.dts index 1f2ca60d8b3d..2c661add0cc0 100644 --- a/arch/arm/boot/dts/kirkwood-ns2lite.dts +++ b/arch/arm/boot/dts/kirkwood-ns2lite.dts @@ -26,7 +26,7 @@ blue-sata { label = "ns2:blue:sata"; gpios = <&gpio0 30 GPIO_ACTIVE_LOW>; - linux,default-trigger = "ide-disk"; + linux,default-trigger = "disk-activity"; }; }; }; diff --git a/arch/arm/boot/dts/kirkwood-topkick.dts b/arch/arm/boot/dts/kirkwood-topkick.dts index f5c8c0dd41dc..1e9a72100a45 100644 --- a/arch/arm/boot/dts/kirkwood-topkick.dts +++ b/arch/arm/boot/dts/kirkwood-topkick.dts @@ -129,7 +129,7 @@ disk { label = "topkick:yellow:disk"; gpios = <&gpio0 21 GPIO_ACTIVE_LOW>; - linux,default-trigger = "ide-disk"; + linux,default-trigger = "disk-activity"; }; system2 { label = "topkick:red:system"; diff --git a/arch/arm/boot/dts/meson8-minix-neo-x8.dts b/arch/arm/boot/dts/meson8-minix-neo-x8.dts index 4f536bb1f002..8bceb8d343f6 100644 --- a/arch/arm/boot/dts/meson8-minix-neo-x8.dts +++ b/arch/arm/boot/dts/meson8-minix-neo-x8.dts @@ -80,6 +80,7 @@ pmic@32 { compatible = "ricoh,rn5t618"; reg = <0x32>; + system-power-controller; regulators { }; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 3b44ef3cff12..3ebee530f2b0 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -539,8 +539,9 @@ gmac: ethernet@ff290000 { compatible = "rockchip,rk3288-gmac"; reg = <0xff290000 0x10000>; - interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "macirq"; + interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_wake_irq"; rockchip,grf = <&grf>; clocks = <&cru SCLK_MAC>, <&cru SCLK_MAC_RX>, <&cru SCLK_MAC_TX>, diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index 17e81dc9213e..5820b70c95b3 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -621,6 +621,22 @@ compatible = "altr,socfpga-a10-ocram-ecc"; reg = <0xff8c3000 0x400>; }; + + emac0-rx-ecc@ff8c0800 { + compatible = "altr,socfpga-eth-mac-ecc"; + reg = <0xff8c0800 0x400>; + altr,ecc-parent = <&gmac0>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>, + <36 IRQ_TYPE_LEVEL_HIGH>; + }; + + emac0-tx-ecc@ff8c0c00 { + compatible = "altr,socfpga-eth-mac-ecc"; + reg = <0xff8c0c00 0x400>; + altr,ecc-parent = <&gmac0>; + interrupts = <5 IRQ_TYPE_LEVEL_HIGH>, + <37 IRQ_TYPE_LEVEL_HIGH>; + }; }; rst: rstmgr@ffd05000 { diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts index 6c60b7f91104..5c1fcab4a6f7 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts @@ -85,187 +85,199 @@ reg = <1>; #address-cells = <1>; #size-cells = <0>; + + switch0: switch0@0 { + compatible = "marvell,mv88e6085"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + dsa,member = <0 0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "lan0"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + switch0port5: port@5 { + reg = <5>; + label = "dsa"; + phy-mode = "rgmii-txid"; + link = <&switch1port6 + &switch2port9>; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&fec1>; + fixed-link { + speed = <100>; + full-duplex; + }; + }; + }; + }; }; mdio_mux_2: mdio@2 { reg = <2>; #address-cells = <1>; #size-cells = <0>; - }; - - mdio_mux_4: mdio@4 { - reg = <4>; - #address-cells = <1>; - #size-cells = <0>; - }; - - mdio_mux_8: mdio@8 { - reg = <8>; - #address-cells = <1>; - #size-cells = <0>; - }; - }; - - dsa { - compatible = "marvell,dsa"; - #address-cells = <2>; - #size-cells = <0>; - dsa,ethernet = <&fec1>; - dsa,mii-bus = <&mdio_mux_1>; - - /* 6352 - Primary - 7 ports */ - switch0: switch@0-0 { - #address-cells = <1>; - #size-cells = <0>; - reg = <0x00 0>; - eeprom-length = <512>; - port@0 { + switch1: switch1@0 { + compatible = "marvell,mv88e6085"; + #address-cells = <1>; + #size-cells = <0>; reg = <0>; - label = "lan0"; - }; - - port@1 { - reg = <1>; - label = "lan1"; - }; - - port@2 { - reg = <2>; - label = "lan2"; - }; - - switch0port5: port@5 { - reg = <5>; - label = "dsa"; - phy-mode = "rgmii-txid"; - link = <&switch1port6 - &switch2port9>; - - fixed-link { - speed = <1000>; - full-duplex; + dsa,member = <0 1>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "lan3"; + phy-handle = <&switch1phy0>; + }; + + port@1 { + reg = <1>; + label = "lan4"; + phy-handle = <&switch1phy1>; + }; + + port@2 { + reg = <2>; + label = "lan5"; + phy-handle = <&switch1phy2>; + }; + + switch1port5: port@5 { + reg = <5>; + label = "dsa"; + link = <&switch2port9>; + phy-mode = "rgmii-txid"; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + + switch1port6: port@6 { + reg = <6>; + label = "dsa"; + phy-mode = "rgmii-txid"; + link = <&switch0port5>; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; }; - }; - - port@6 { - reg = <6>; - label = "cpu"; - - fixed-link { - speed = <100>; - full-duplex; + mdio { + #address-cells = <1>; + #size-cells = <0>; + switch1phy0: switch1phy0@0 { + reg = <0>; + }; + switch1phy1: switch1phy0@1 { + reg = <1>; + }; + switch1phy2: switch1phy0@2 { + reg = <2>; + }; }; }; - }; - /* 6352 - Secondary - 7 ports */ - switch1: switch@0-1 { + mdio_mux_4: mdio@4 { #address-cells = <1>; #size-cells = <0>; - reg = <0x00 1>; - eeprom-length = <512>; - mii-bus = <&mdio_mux_2>; + reg = <4>; - port@0 { + switch2: switch2@0 { + compatible = "marvell,mv88e6085"; + #address-cells = <1>; + #size-cells = <0>; reg = <0>; - label = "lan3"; - }; - - port@1 { - reg = <1>; - label = "lan4"; - }; - - port@2 { - reg = <2>; - label = "lan5"; - }; - - switch1port5: port@5 { - reg = <5>; - label = "dsa"; - link = <&switch2port9>; - phy-mode = "rgmii-txid"; - - fixed-link { - speed = <1000>; - full-duplex; - }; - }; - - switch1port6: port@6 { - reg = <6>; - label = "dsa"; - phy-mode = "rgmii-txid"; - link = <&switch0port5>; - - fixed-link { - speed = <1000>; - full-duplex; + dsa,member = <0 2>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "lan6"; + }; + + port@1 { + reg = <1>; + label = "lan7"; + }; + + port@2 { + reg = <2>; + label = "lan8"; + }; + + port@3 { + reg = <3>; + label = "optical3"; + fixed-link { + speed = <1000>; + full-duplex; + link-gpios = <&gpio6 2 + GPIO_ACTIVE_HIGH>; + }; + }; + + port@4 { + reg = <4>; + label = "optical4"; + fixed-link { + speed = <1000>; + full-duplex; + link-gpios = <&gpio6 3 + GPIO_ACTIVE_HIGH>; + }; + }; + + switch2port9: port@9 { + reg = <9>; + label = "dsa"; + phy-mode = "rgmii-txid"; + link = <&switch1port5 + &switch0port5>; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; }; }; }; - /* 6185 - 10 ports */ - switch2: switch@0-2 { + mdio_mux_8: mdio@8 { + reg = <8>; #address-cells = <1>; #size-cells = <0>; - reg = <0x00 2>; - mii-bus = <&mdio_mux_4>; - - port@0 { - reg = <0>; - label = "lan6"; - }; - - port@1 { - reg = <1>; - label = "lan7"; - }; - - port@2 { - reg = <2>; - label = "lan8"; - }; - - port@3 { - reg = <3>; - label = "optical3"; - - fixed-link { - speed = <1000>; - full-duplex; - link-gpios = <&gpio6 2 - GPIO_ACTIVE_HIGH>; - }; - }; - - port@4 { - reg = <4>; - label = "optical4"; - - fixed-link { - speed = <1000>; - full-duplex; - link-gpios = <&gpio6 3 - GPIO_ACTIVE_HIGH>; - }; - }; - - switch2port9: port@9 { - reg = <9>; - label = "dsa"; - phy-mode = "rgmii-txid"; - link = <&switch1port5 - &switch0port5>; - - fixed-link { - speed = <1000>; - full-duplex; - }; - }; }; }; diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 6c56ad086c7c..52dbad5619e2 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -76,7 +76,7 @@ CONFIG_LEDS_CLASS=y CONFIG_LEDS_LOCOMO=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 24636cfdf6df..cf4918a2c51f 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -180,7 +180,7 @@ CONFIG_LEDS_FSG=y CONFIG_LEDS_GPIO=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ISL1208=y diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index b2bc8e11471d..4eaea2173bf8 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -480,13 +480,13 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .macro uaccess_save, tmp #ifdef CONFIG_CPU_SW_DOMAIN_PAN mrc p15, 0, \tmp, c3, c0, 0 - str \tmp, [sp, #S_FRAME_SIZE] + str \tmp, [sp, #SVC_DACR] #endif .endm .macro uaccess_restore #ifdef CONFIG_CPU_SW_DOMAIN_PAN - ldr r0, [sp, #S_FRAME_SIZE] + ldr r0, [sp, #SVC_DACR] mcr p15, 0, r0, c3, c0, 0 #endif .endm diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 112cc1a5d47f..f5d698182d50 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -44,9 +44,7 @@ extern void arm_heavy_mb(void); #define __arm_heavy_mb(x...) dsb(x) #endif -#ifdef CONFIG_ARCH_HAS_BARRIERS -#include <mach/barriers.h> -#elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) +#if defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) #define mb() __arm_heavy_mb() #define rmb() dsb() #define wmb() __arm_heavy_mb(st) diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h index dff714d886d5..b7a428154355 100644 --- a/arch/arm/include/asm/delay.h +++ b/arch/arm/include/asm/delay.h @@ -10,8 +10,8 @@ #include <asm/param.h> /* HZ */ #define MAX_UDELAY_MS 2 -#define UDELAY_MULT ((UL(2199023) * HZ) >> 11) -#define UDELAY_SHIFT 30 +#define UDELAY_MULT UL(2047 * HZ + 483648 * HZ / 1000000) +#define UDELAY_SHIFT 31 #ifndef __ASSEMBLY__ @@ -34,7 +34,7 @@ extern struct arm_delay_ops { * it, it means that you're calling udelay() with an out of range value. * * With currently imposed limits, this means that we support a max delay - * of 2000us. Further limits: HZ<=1000 and bogomips<=3355 + * of 2000us. Further limits: HZ<=1000 */ extern void __bad_udelay(void); diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h index f4882553fbb0..85a34cc8316a 100644 --- a/arch/arm/include/asm/floppy.h +++ b/arch/arm/include/asm/floppy.h @@ -17,7 +17,7 @@ #define fd_outb(val,port) \ do { \ - if ((port) == FD_DOR) \ + if ((port) == (u32)FD_DOR) \ fd_setdor((val)); \ else \ outb((val),(port)); \ diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 781ef5fe235d..021692c64de3 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -282,7 +282,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t); * These perform PCI memory accesses via an ioremap region. They don't * take an address as such, but a cookie. * - * Again, this are defined to perform little endian accesses. See the + * Again, these are defined to perform little endian accesses. See the * IO port primitives for more information. */ #ifndef readl diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 20febb368844..b2902a5cd780 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -57,7 +57,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) static inline void clean_pte_table(pte_t *pte) { diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 51622ba7c4a6..e9c9a117bd25 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -13,10 +13,20 @@ #include <uapi/asm/ptrace.h> #ifndef __ASSEMBLY__ +#include <linux/types.h> + struct pt_regs { unsigned long uregs[18]; }; +struct svc_pt_regs { + struct pt_regs regs; + u32 dacr; + u32 addr_limit; +}; + +#define to_svc_pt_regs(r) container_of(r, struct svc_pt_regs, regs) + #define user_mode(regs) \ (((regs)->ARM_cpsr & 0xf) == 0) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 3cadb726ec88..1e25cd80589e 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -209,17 +209,38 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) tlb_flush(tlb); } -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { + if (tlb->nr == tlb->max) + return true; tlb->pages[tlb->nr++] = page; - VM_BUG_ON(tlb->nr > tlb->max); - return tlb->max - tlb->nr; + return false; } static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (!__tlb_remove_page(tlb, page)) + if (__tlb_remove_page(tlb, page)) { tlb_flush_mmu(tlb); + __tlb_remove_page(tlb, page); + } +} + +static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return __tlb_remove_page(tlb, page); +} + +static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, + struct page *page) +{ + return __tlb_remove_page(tlb, page); +} + +static inline void tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return tlb_remove_page(tlb, page); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 35c9db857ebe..62a6f65029e6 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -104,14 +104,6 @@ static inline void set_fs(mm_segment_t fs) #define segment_eq(a, b) ((a) == (b)) -#define __addr_ok(addr) ({ \ - unsigned long flag; \ - __asm__("cmp %2, %0; movlo %0, #0" \ - : "=&r" (flag) \ - : "0" (current_thread_info()->addr_limit), "r" (addr) \ - : "cc"); \ - (flag == 0); }) - /* We use 33-bit arithmetic here... */ #define __range_ok(addr, size) ({ \ unsigned long flag, roksum; \ @@ -238,49 +230,23 @@ extern int __put_user_2(void *, unsigned int); extern int __put_user_4(void *, unsigned int); extern int __put_user_8(void *, unsigned long long); -#define __put_user_x(__r2, __p, __e, __l, __s) \ - __asm__ __volatile__ ( \ - __asmeq("%0", "r0") __asmeq("%2", "r2") \ - __asmeq("%3", "r1") \ - "bl __put_user_" #__s \ - : "=&r" (__e) \ - : "0" (__p), "r" (__r2), "r" (__l) \ - : "ip", "lr", "cc") - -#define __put_user_check(x, p) \ +#define __put_user_check(__pu_val, __ptr, __err, __s) \ ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ - const typeof(*(p)) __user *__tmp_p = (p); \ - register const typeof(*(p)) __r2 asm("r2") = (x); \ - register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \ + register typeof(__pu_val) __r2 asm("r2") = __pu_val; \ + register const void __user *__p asm("r0") = __ptr; \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ - unsigned int __ua_flags = uaccess_save_and_enable(); \ - switch (sizeof(*(__p))) { \ - case 1: \ - __put_user_x(__r2, __p, __e, __l, 1); \ - break; \ - case 2: \ - __put_user_x(__r2, __p, __e, __l, 2); \ - break; \ - case 4: \ - __put_user_x(__r2, __p, __e, __l, 4); \ - break; \ - case 8: \ - __put_user_x(__r2, __p, __e, __l, 8); \ - break; \ - default: __e = __put_user_bad(); break; \ - } \ - uaccess_restore(__ua_flags); \ - __e; \ + __asm__ __volatile__ ( \ + __asmeq("%0", "r0") __asmeq("%2", "r2") \ + __asmeq("%3", "r1") \ + "bl __put_user_" #__s \ + : "=&r" (__e) \ + : "0" (__p), "r" (__r2), "r" (__l) \ + : "ip", "lr", "cc"); \ + __err = __e; \ }) -#define put_user(x, p) \ - ({ \ - might_fault(); \ - __put_user_check(x, p); \ - }) - #else /* CONFIG_MMU */ /* @@ -298,7 +264,7 @@ static inline void set_fs(mm_segment_t fs) } #define get_user(x, p) __get_user(x, p) -#define put_user(x, p) __put_user(x, p) +#define __put_user_check __put_user_nocheck #endif /* CONFIG_MMU */ @@ -389,36 +355,54 @@ do { \ #define __get_user_asm_word(x, addr, err) \ __get_user_asm(x, addr, err, ldr) + +#define __put_user_switch(x, ptr, __err, __fn) \ + do { \ + const __typeof__(*(ptr)) __user *__pu_ptr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + unsigned int __ua_flags; \ + might_fault(); \ + __ua_flags = uaccess_save_and_enable(); \ + switch (sizeof(*(ptr))) { \ + case 1: __fn(__pu_val, __pu_ptr, __err, 1); break; \ + case 2: __fn(__pu_val, __pu_ptr, __err, 2); break; \ + case 4: __fn(__pu_val, __pu_ptr, __err, 4); break; \ + case 8: __fn(__pu_val, __pu_ptr, __err, 8); break; \ + default: __err = __put_user_bad(); break; \ + } \ + uaccess_restore(__ua_flags); \ + } while (0) + +#define put_user(x, ptr) \ +({ \ + int __pu_err = 0; \ + __put_user_switch((x), (ptr), __pu_err, __put_user_check); \ + __pu_err; \ +}) + #define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err); \ + __put_user_switch((x), (ptr), __pu_err, __put_user_nocheck); \ __pu_err; \ }) #define __put_user_error(x, ptr, err) \ ({ \ - __put_user_err((x), (ptr), err); \ + __put_user_switch((x), (ptr), (err), __put_user_nocheck); \ (void) 0; \ }) -#define __put_user_err(x, ptr, err) \ -do { \ - unsigned long __pu_addr = (unsigned long)(ptr); \ - unsigned int __ua_flags; \ - __typeof__(*(ptr)) __pu_val = (x); \ - __chk_user_ptr(ptr); \ - might_fault(); \ - __ua_flags = uaccess_save_and_enable(); \ - switch (sizeof(*(ptr))) { \ - case 1: __put_user_asm_byte(__pu_val, __pu_addr, err); break; \ - case 2: __put_user_asm_half(__pu_val, __pu_addr, err); break; \ - case 4: __put_user_asm_word(__pu_val, __pu_addr, err); break; \ - case 8: __put_user_asm_dword(__pu_val, __pu_addr, err); break; \ - default: __put_user_bad(); \ - } \ - uaccess_restore(__ua_flags); \ -} while (0) +#define __put_user_nocheck(x, __pu_ptr, __err, __size) \ + do { \ + unsigned long __pu_addr = (unsigned long)__pu_ptr; \ + __put_user_nocheck_##__size(x, __pu_addr, __err); \ + } while (0) + +#define __put_user_nocheck_1 __put_user_asm_byte +#define __put_user_nocheck_2 __put_user_asm_half +#define __put_user_nocheck_4 __put_user_asm_word +#define __put_user_nocheck_8 __put_user_asm_dword #define __put_user_asm(x, __pu_addr, err, instr) \ __asm__ __volatile__( \ diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index b6b962d70db9..9d874db13c0e 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -52,6 +52,7 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); int HYPERVISOR_platform_op_raw(void *arg); static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) { diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h new file mode 100644 index 000000000000..ec154e719b11 --- /dev/null +++ b/arch/arm/include/asm/xen/xen-ops.h @@ -0,0 +1,6 @@ +#ifndef _ASM_XEN_OPS_H +#define _ASM_XEN_OPS_H + +void xen_efi_runtime_setup(void); + +#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 27d05813ff09..608008229c7d 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -107,7 +107,10 @@ int main(void) DEFINE(S_PC, offsetof(struct pt_regs, ARM_pc)); DEFINE(S_PSR, offsetof(struct pt_regs, ARM_cpsr)); DEFINE(S_OLD_R0, offsetof(struct pt_regs, ARM_ORIG_r0)); - DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); + DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); + DEFINE(SVC_DACR, offsetof(struct svc_pt_regs, dacr)); + DEFINE(SVC_ADDR_LIMIT, offsetof(struct svc_pt_regs, addr_limit)); + DEFINE(SVC_REGS_SIZE, sizeof(struct svc_pt_regs)); BLANK(); #ifdef CONFIG_CACHE_L2X0 DEFINE(L2X0_R_PHY_BASE, offsetof(struct l2x0_regs, phy_base)); diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c index a44b268e12e1..7dccc964d75f 100644 --- a/arch/arm/kernel/cpuidle.c +++ b/arch/arm/kernel/cpuidle.c @@ -47,18 +47,13 @@ int arm_cpuidle_simple_enter(struct cpuidle_device *dev, * This function calls the underlying arch specific low level PM code as * registered at the init time. * - * Returns -EOPNOTSUPP if no suspend callback is defined, the result of the - * callback otherwise. + * Returns the result of the suspend callback. */ int arm_cpuidle_suspend(int index) { - int ret = -EOPNOTSUPP; int cpu = smp_processor_id(); - if (cpuidle_ops[cpu].suspend) - ret = cpuidle_ops[cpu].suspend(index); - - return ret; + return cpuidle_ops[cpu].suspend(index); } /** @@ -92,7 +87,8 @@ static const struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method) * process. * * Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if - * no cpuidle_ops is registered for the 'enable-method'. + * no cpuidle_ops is registered for the 'enable-method', or if either init or + * suspend callback isn't defined. */ static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu) { @@ -110,6 +106,12 @@ static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu) return -EOPNOTSUPP; } + if (!ops->init || !ops->suspend) { + pr_warn("cpuidle_ops '%s': no init or suspend callback\n", + enable_method); + return -EOPNOTSUPP; + } + cpuidle_ops[cpu] = *ops; /* structure copy */ pr_notice("cpuidle: enable-method property '%s'" @@ -129,7 +131,8 @@ static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu) * Returns: * 0 on success, * -ENODEV if it fails to find the cpu node in the device tree, - * -EOPNOTSUPP if it does not find a registered cpuidle_ops for this cpu, + * -EOPNOTSUPP if it does not find a registered and valid cpuidle_ops for + * this cpu, * -ENOENT if it fails to find an 'enable-method' property, * -ENXIO if the HW reports a failure or a misconfiguration, * -ENOMEM if the HW report an memory allocation failure @@ -143,7 +146,7 @@ int __init arm_cpuidle_init(int cpu) return -ENODEV; ret = arm_cpuidle_read_ops(cpu_node, cpu); - if (!ret && cpuidle_ops[cpu].init) + if (!ret) ret = cpuidle_ops[cpu].init(cpu_node, cpu); of_node_put(cpu_node); diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 2e26016a91a5..40ecd5f514a2 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -23,6 +23,7 @@ #include <asm/cputype.h> #include <asm/setup.h> #include <asm/page.h> +#include <asm/prom.h> #include <asm/smp_plat.h> #include <asm/mach/arch.h> #include <asm/mach-types.h> @@ -213,6 +214,8 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) #if defined(CONFIG_ARCH_MULTIPLATFORM) || defined(CONFIG_ARM_SINGLE_ARMV7M) DT_MACHINE_START(GENERIC_DT, "Generic DT based system") + .l2c_aux_val = 0x0, + .l2c_aux_mask = ~0x0, MACHINE_END mdesc_best = &__mach_desc_GENERIC_DT; diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index e2550500486d..bc5f50799d75 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -92,7 +92,7 @@ * Invalid mode handlers */ .macro inv_entry, reason - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE ARM( stmib sp, {r1 - lr} ) THUMB( stmia sp, {r0 - r12} ) THUMB( str sp, [sp, #S_SP] ) @@ -152,7 +152,7 @@ ENDPROC(__und_invalid) .macro svc_entry, stack_hole=0, trace=1, uaccess=1 UNWIND(.fnstart ) UNWIND(.save {r0 - pc} ) - sub sp, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4) + sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4) #ifdef CONFIG_THUMB2_KERNEL SPFIX( str r0, [sp] ) @ temporarily saved SPFIX( mov r0, sp ) @@ -167,7 +167,7 @@ ENDPROC(__und_invalid) ldmia r0, {r3 - r5} add r7, sp, #S_SP - 4 @ here for interlock avoidance mov r6, #-1 @ "" "" "" "" - add r2, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4) + add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4) SPFIX( addeq r2, r2, #4 ) str r3, [sp, #-4]! @ save the "real" r0 copied @ from the exception stack @@ -185,6 +185,12 @@ ENDPROC(__und_invalid) @ stmia r7, {r2 - r6} + get_thread_info tsk + ldr r0, [tsk, #TI_ADDR_LIMIT] + mov r1, #TASK_SIZE + str r1, [tsk, #TI_ADDR_LIMIT] + str r0, [sp, #SVC_ADDR_LIMIT] + uaccess_save r0 .if \uaccess uaccess_disable r0 @@ -213,7 +219,6 @@ __irq_svc: irq_handler #ifdef CONFIG_PREEMPT - get_thread_info tsk ldr r8, [tsk, #TI_PREEMPT] @ get preempt count ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 @@ -366,17 +371,17 @@ ENDPROC(__fiq_abt) /* * User mode handlers * - * EABI note: sp_svc is always 64-bit aligned here, so should S_FRAME_SIZE + * EABI note: sp_svc is always 64-bit aligned here, so should PT_REGS_SIZE */ -#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) && (S_FRAME_SIZE & 7) +#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) && (PT_REGS_SIZE & 7) #error "sizeof(struct pt_regs) must be a multiple of 8" #endif .macro usr_entry, trace=1, uaccess=1 UNWIND(.fnstart ) UNWIND(.cantunwind ) @ don't unwind the user space - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE ARM( stmib sp, {r1 - r12} ) THUMB( stmia sp, {r0 - r12} ) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 30a7228eaceb..10c3283d6c19 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -145,7 +145,7 @@ ENTRY(vector_swi) #ifdef CONFIG_CPU_V7M v7m_exception_entry #else - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 ARM( add r8, sp, #S_PC ) ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 0d22ad206d52..6391728c8f03 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -90,7 +90,7 @@ @ Linux expects to have irqs off. Do it here before taking stack space cpsid i - sub sp, #S_FRAME_SIZE-S_IP + sub sp, #PT_REGS_SIZE-S_IP stmdb sp!, {r0-r11} @ load saved r12, lr, return address and xPSR. @@ -160,7 +160,7 @@ ldmia sp!, {r0-r11} @ restore main sp - add sp, sp, #S_FRAME_SIZE-S_IP + add sp, sp, #PT_REGS_SIZE-S_IP cpsie i bx lr @@ -215,7 +215,9 @@ blne trace_hardirqs_off #endif .endif + ldr r1, [sp, #SVC_ADDR_LIMIT] uaccess_restore + str r1, [tsk, #TI_ADDR_LIMIT] #ifndef CONFIG_THUMB2_KERNEL @ ARM mode SVC restore @@ -259,7 +261,9 @@ @ on the stack remains correct). @ .macro svc_exit_via_fiq + ldr r1, [sp, #SVC_ADDR_LIMIT] uaccess_restore + str r1, [tsk, #TI_ADDR_LIMIT] #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore mov r0, sp @@ -307,7 +311,7 @@ .endif mov r0, r0 @ ARMv5T and earlier require a nop @ after ldm {}^ - add sp, sp, #\offset + S_FRAME_SIZE + add sp, sp, #\offset + PT_REGS_SIZE movs pc, lr @ return & move spsr_svc into cpsr #elif defined(CONFIG_CPU_V7M) @ V7M restore. @@ -334,7 +338,7 @@ .else ldmdb sp, {r0 - r12} @ get calling r0 - r12 .endif - add sp, sp, #S_FRAME_SIZE - S_SP + add sp, sp, #PT_REGS_SIZE - S_SP movs pc, lr @ return & move spsr_svc into cpsr #endif /* !CONFIG_THUMB2_KERNEL */ .endm diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index 907534f97053..abcf47848525 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -73,7 +73,7 @@ __irq_entry: @ correctness they don't need to be restored. So only r8-r11 must be @ restored here. The easiest way to do so is to restore r0-r7, too. ldmia sp!, {r0-r11} - add sp, #S_FRAME_SIZE-S_IP + add sp, #PT_REGS_SIZE-S_IP cpsie i bx lr ENDPROC(__irq_entry) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 4a803c5a1ff7..612eb530f33f 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -96,19 +96,23 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; #ifndef CONFIG_CPU_V7M - unsigned int domain; + unsigned int domain, fs; #ifdef CONFIG_CPU_SW_DOMAIN_PAN /* * Get the domain register for the parent context. In user * mode, we don't save the DACR, so lets use what it should * be. For other modes, we place it after the pt_regs struct. */ - if (user_mode(regs)) + if (user_mode(regs)) { domain = DACR_UACCESS_ENABLE; - else - domain = *(unsigned int *)(regs + 1); + fs = get_fs(); + } else { + domain = to_svc_pt_regs(regs)->dacr; + fs = to_svc_pt_regs(regs)->addr_limit; + } #else domain = get_domain(); + fs = get_fs(); #endif #endif @@ -144,7 +148,7 @@ void __show_regs(struct pt_regs *regs) if ((domain & domain_mask(DOMAIN_USER)) == domain_val(DOMAIN_USER, DOMAIN_NOACCESS)) segment = "none"; - else if (get_fs() == get_ds()) + else if (fs == get_ds()) segment = "kernel"; else segment = "user"; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 7b5350060612..85a0bcb1f7ca 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -844,7 +844,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) struct resource *res; kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(_etext - 1); + kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); @@ -1064,6 +1064,7 @@ void __init setup_arch(char **cmdline_p) early_paging_init(mdesc); #endif setup_dma_zone(mdesc); + xen_early_init(); efi_init(); sanity_check_meminfo(); arm_memblock_init(mdesc); @@ -1080,7 +1081,6 @@ void __init setup_arch(char **cmdline_p) arm_dt_init_cpu_maps(); psci_dt_init(); - xen_early_init(); #ifdef CONFIG_SMP if (is_smp()) { if (!mdesc->smp_init || !mdesc->smp_init()) { diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 2e72be4f623e..22313cb53362 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -93,17 +93,53 @@ void erratum_a15_798181_init(void) unsigned int revidr = read_cpuid(CPUID_REVIDR); /* Brahma-B15 r0p0..r0p2 affected - * Cortex-A15 r0p0..r3p2 w/o ECO fix affected */ - if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2) + * Cortex-A15 r0p0..r3p3 w/o ECO fix affected + * Fixes applied to A15 with respect to the revision and revidr are: + * + * r0p0-r2p1: No fixes applied + * r2p2,r2p3: + * REVIDR[4]: 798181 Moving a virtual page that is being accessed + * by an active process can lead to unexpected behavior + * REVIDR[9]: Not defined + * r2p4,r3p0,r3p1,r3p2: + * REVIDR[4]: 798181 Moving a virtual page that is being accessed + * by an active process can lead to unexpected behavior + * REVIDR[9]: 798181 Moving a virtual page that is being accessed + * by an active process can lead to unexpected behavior + * - This is an update to a previously released ECO. + * r3p3: + * REVIDR[4]: Reserved + * REVIDR[9]: 798181 Moving a virtual page that is being accessed + * by an active process can lead to unexpected behavior + * - This is an update to a previously released ECO. + * + * Handling: + * REVIDR[9] set -> No WA + * REVIDR[4] set, REVIDR[9] cleared -> Partial WA + * Both cleared -> Full WA + */ + if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2) { erratum_a15_798181_handler = erratum_a15_798181_broadcast; - else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 && - (revidr & 0x210) != 0x210) { + } else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr < 0x412fc0f2) { + erratum_a15_798181_handler = erratum_a15_798181_broadcast; + } else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr < 0x412fc0f4) { if (revidr & 0x10) erratum_a15_798181_handler = erratum_a15_798181_partial; else erratum_a15_798181_handler = erratum_a15_798181_broadcast; + } else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr < 0x413fc0f3) { + if ((revidr & 0x210) == 0) + erratum_a15_798181_handler = + erratum_a15_798181_broadcast; + else if (revidr & 0x10) + erratum_a15_798181_handler = + erratum_a15_798181_partial; + } else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr < 0x414fc0f0) { + if ((revidr & 0x200) == 0) + erratum_a15_798181_handler = + erratum_a15_798181_partial; } } #endif diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index b6ec65e68009..02d5e5e8d44c 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -310,24 +310,17 @@ static void twd_timer_setup(void) enable_percpu_irq(clk->irq, 0); } -static int twd_timer_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int twd_timer_starting_cpu(unsigned int cpu) { - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - twd_timer_setup(); - break; - case CPU_DYING: - twd_timer_stop(); - break; - } - - return NOTIFY_OK; + twd_timer_setup(); + return 0; } -static struct notifier_block twd_timer_cpu_nb = { - .notifier_call = twd_timer_cpu_notify, -}; +static int twd_timer_dying_cpu(unsigned int cpu) +{ + twd_timer_stop(); + return 0; +} static int __init twd_local_timer_common_register(struct device_node *np) { @@ -345,9 +338,9 @@ static int __init twd_local_timer_common_register(struct device_node *np) goto out_free; } - err = register_cpu_notifier(&twd_timer_cpu_nb); - if (err) - goto out_irq; + cpuhp_setup_state_nocalls(CPUHP_AP_ARM_TWD_STARTING, + "AP_ARM_TWD_STARTING", + twd_timer_starting_cpu, twd_timer_dying_cpu); twd_get_clock(np); if (!of_property_read_bool(np, "always-on")) @@ -365,8 +358,6 @@ static int __init twd_local_timer_common_register(struct device_node *np) return 0; -out_irq: - free_percpu_irq(twd_ppi, twd_evt); out_free: iounmap(twd_base); twd_base = NULL; diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index e2c6da096cef..99420fc1f066 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -125,6 +125,8 @@ SECTIONS #ifdef CONFIG_DEBUG_ALIGN_RODATA . = ALIGN(1<<SECTION_SHIFT); #endif + _etext = .; /* End of text section */ + RO_DATA(PAGE_SIZE) . = ALIGN(4); @@ -155,8 +157,6 @@ SECTIONS NOTES - _etext = .; /* End of text and rodata section */ - #ifdef CONFIG_DEBUG_RODATA . = ALIGN(1<<SECTION_SHIFT); #else diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index d8a780799506..27f4d96258a2 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -29,7 +29,10 @@ else lib-y += io-readsw-armv4.o io-writesw-armv4.o endif -lib-$(CONFIG_ARCH_RPC) += ecard.o io-acorn.o floppydma.o +ifeq ($(CONFIG_ARCH_RPC),y) + lib-y += ecard.o io-acorn.o floppydma.o + AFLAGS_delay-loop.o += -march=armv4 +endif $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S index 518bf6e93f78..792c59d885bc 100644 --- a/arch/arm/lib/delay-loop.S +++ b/arch/arm/lib/delay-loop.S @@ -10,6 +10,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/delay.h> + .text .LC0: .word loops_per_jiffy @@ -17,7 +18,6 @@ /* * r0 <= 2000 - * lpj <= 0x01ffffff (max. 3355 bogomips) * HZ <= 1000 */ @@ -25,16 +25,11 @@ ENTRY(__loop_udelay) ldr r2, .LC1 mul r0, r2, r0 ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 - mov r1, #-1 ldr r2, .LC0 - ldr r2, [r2] @ max = 0x01ffffff - add r0, r0, r1, lsr #32-14 - mov r0, r0, lsr #14 @ max = 0x0001ffff - add r2, r2, r1, lsr #32-10 - mov r2, r2, lsr #10 @ max = 0x00007fff - mul r0, r2, r0 @ max = 2^32-1 - add r0, r0, r1, lsr #32-6 - movs r0, r0, lsr #6 + ldr r2, [r2] + umull r1, r0, r2, r0 + adds r1, r1, #0xffffffff + adcs r0, r0, r0 reteq lr /* diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index 68cc09907828..ab47b8eb1b15 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -288,7 +288,7 @@ static struct gpio_led evm_leds[] = { { .name = "DS2", .active_low = 1, .default_trigger = "mmc0", }, { .name = "DS1", .active_low = 1, - .default_trigger = "ide-disk", }, + .default_trigger = "disk-activity", }, }; static const struct gpio_led_platform_data evm_led_data = { diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index e80f0dde2189..ae2a018b9305 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -111,20 +111,12 @@ static struct notifier_block mvebu_hwcc_pci_nb __maybe_unused = { .notifier_call = mvebu_hwcc_notifier, }; -static int armada_xp_clear_shared_l2_notifier_func(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int armada_xp_clear_l2_starting(unsigned int cpu) { - if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) - armada_xp_clear_shared_l2(); - - return NOTIFY_OK; + armada_xp_clear_shared_l2(); + return 0; } -static struct notifier_block armada_xp_clear_shared_l2_notifier = { - .notifier_call = armada_xp_clear_shared_l2_notifier_func, - .priority = 100, -}; - static void __init armada_370_coherency_init(struct device_node *np) { struct resource res; @@ -155,8 +147,9 @@ static void __init armada_370_coherency_init(struct device_node *np) of_node_put(cpu_config_np); - register_cpu_notifier(&armada_xp_clear_shared_l2_notifier); - + cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY, + "AP_ARM_MVEBU_COHERENCY", + armada_xp_clear_l2_starting, NULL); exit: set_cpu_coherent(); } diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 209aecb0df68..4dfb99504810 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -172,7 +172,7 @@ static struct gpio_led tps_leds[] = { * Also, D9 requires non-battery power. */ { .gpio = OSK_TPS_GPIO_LED_D9, .name = "d9", - .default_trigger = "ide-disk", }, + .default_trigger = "disk-activity", }, { .gpio = OSK_TPS_GPIO_LED_D2, .name = "d2", }, { .gpio = OSK_TPS_GPIO_LED_D3, .name = "d3", .active_low = 1, .default_trigger = "heartbeat", }, diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index bd7cd8b6a286..1080580b1343 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -464,7 +464,7 @@ static struct gpio_led spitz_gpio_leds[] = { }, { .name = "spitz:green:hddactivity", - .default_trigger = "ide-disk", + .default_trigger = "disk-activity", .gpio = SPITZ_GPIO_LED_GREEN, }, }; diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index cb569b65a54d..d15a7fe51618 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1025,12 +1025,6 @@ config ARM_DMA_MEM_BUFFERABLE You are recommended say 'Y' here and debug any affected drivers. -config ARCH_HAS_BARRIERS - bool - help - This option allows the use of custom mandatory barriers - included via the mach/barriers.h file. - config ARM_HEAVY_MB bool diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c61996c256cc..cc12905ae6f8 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -597,17 +597,16 @@ static void l2c310_configure(void __iomem *base) L310_POWER_CTRL); } -static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) +static int l2c310_starting_cpu(unsigned int cpu) { - switch (act & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); - break; - case CPU_DYING: - set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); - break; - } - return NOTIFY_OK; + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + return 0; +} + +static int l2c310_dying_cpu(unsigned int cpu) +{ + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + return 0; } static void __init l2c310_enable(void __iomem *base, unsigned num_lock) @@ -678,10 +677,10 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock) power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); } - if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { - set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); - cpu_notifier(l2c310_cpu_enable_flz, 0); - } + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) + cpuhp_setup_state(CPUHP_AP_ARM_L2X0_STARTING, + "AP_ARM_L2X0_STARTING", l2c310_starting_cpu, + l2c310_dying_cpu); } static void __init l2c310_fixup(void __iomem *base, u32 cache_id, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ff7ed5697d3e..b7eed75960fe 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -49,6 +49,7 @@ struct arm_dma_alloc_args { pgprot_t prot; const void *caller; bool want_vaddr; + int coherent_flag; }; struct arm_dma_free_args { @@ -59,6 +60,9 @@ struct arm_dma_free_args { bool want_vaddr; }; +#define NORMAL 0 +#define COHERENT 1 + struct arm_dma_allocator { void *(*alloc)(struct arm_dma_alloc_args *args, struct page **ret_page); @@ -272,7 +276,7 @@ static u64 get_coherent_dma_mask(struct device *dev) return mask; } -static void __dma_clear_buffer(struct page *page, size_t size) +static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag) { /* * Ensure that the allocated pages are zeroed, and that any data @@ -284,17 +288,21 @@ static void __dma_clear_buffer(struct page *page, size_t size) while (size > 0) { void *ptr = kmap_atomic(page); memset(ptr, 0, PAGE_SIZE); - dmac_flush_range(ptr, ptr + PAGE_SIZE); + if (coherent_flag != COHERENT) + dmac_flush_range(ptr, ptr + PAGE_SIZE); kunmap_atomic(ptr); page++; size -= PAGE_SIZE; } - outer_flush_range(base, end); + if (coherent_flag != COHERENT) + outer_flush_range(base, end); } else { void *ptr = page_address(page); memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); - outer_flush_range(__pa(ptr), __pa(ptr) + size); + if (coherent_flag != COHERENT) { + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + } } } @@ -302,7 +310,8 @@ static void __dma_clear_buffer(struct page *page, size_t size) * Allocate a DMA buffer for 'dev' of size 'size' using the * specified gfp mask. Note that 'size' must be page aligned. */ -static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +static struct page *__dma_alloc_buffer(struct device *dev, size_t size, + gfp_t gfp, int coherent_flag) { unsigned long order = get_order(size); struct page *page, *p, *e; @@ -318,7 +327,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) __free_page(p); - __dma_clear_buffer(page, size); + __dma_clear_buffer(page, size, coherent_flag); return page; } @@ -340,7 +349,8 @@ static void __dma_free_buffer(struct page *page, size_t size) static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, - const void *caller, bool want_vaddr); + const void *caller, bool want_vaddr, + int coherent_flag); static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, @@ -405,10 +415,13 @@ static int __init atomic_pool_init(void) atomic_pool = gen_pool_create(PAGE_SHIFT, -1); if (!atomic_pool) goto out; - + /* + * The atomic pool is only used for non-coherent allocations + * so we must pass NORMAL for coherent_flag. + */ if (dev_get_cma_area(NULL)) ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, - &page, atomic_pool_init, true); + &page, atomic_pool_init, true, NORMAL); else ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, &page, atomic_pool_init, true); @@ -522,7 +535,11 @@ static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, { struct page *page; void *ptr = NULL; - page = __dma_alloc_buffer(dev, size, gfp); + /* + * __alloc_remap_buffer is only called when the device is + * non-coherent + */ + page = __dma_alloc_buffer(dev, size, gfp, NORMAL); if (!page) return NULL; if (!want_vaddr) @@ -577,7 +594,8 @@ static int __free_from_pool(void *start, size_t size) static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, - const void *caller, bool want_vaddr) + const void *caller, bool want_vaddr, + int coherent_flag) { unsigned long order = get_order(size); size_t count = size >> PAGE_SHIFT; @@ -588,7 +606,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size, if (!page) return NULL; - __dma_clear_buffer(page, size); + __dma_clear_buffer(page, size, coherent_flag); if (!want_vaddr) goto out; @@ -638,7 +656,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) #define __get_dma_pgprot(attrs, prot) __pgprot(0) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL #define __alloc_from_pool(size, ret_page) NULL -#define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL +#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag) NULL #define __free_from_pool(cpu_addr, size) do { } while (0) #define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) @@ -649,7 +667,8 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, struct page **ret_page) { struct page *page; - page = __dma_alloc_buffer(dev, size, gfp); + /* __alloc_simple_buffer is only called when the device is coherent */ + page = __dma_alloc_buffer(dev, size, gfp, COHERENT); if (!page) return NULL; @@ -679,7 +698,7 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, { return __alloc_from_contiguous(args->dev, args->size, args->prot, ret_page, args->caller, - args->want_vaddr); + args->want_vaddr, args->coherent_flag); } static void cma_allocator_free(struct arm_dma_free_args *args) @@ -746,6 +765,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, .prot = prot, .caller = caller, .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs), + .coherent_flag = is_coherent ? COHERENT : NORMAL, }; #ifdef CONFIG_DMA_API_DEBUG @@ -1253,7 +1273,8 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping, static const int iommu_order_array[] = { 9, 8, 4, 0 }; static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, - gfp_t gfp, struct dma_attrs *attrs) + gfp_t gfp, struct dma_attrs *attrs, + int coherent_flag) { struct page **pages; int count = size >> PAGE_SHIFT; @@ -1277,7 +1298,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, if (!page) goto error; - __dma_clear_buffer(page, size); + __dma_clear_buffer(page, size, coherent_flag); for (i = 0; i < count; i++) pages[i] = page + i; @@ -1327,7 +1348,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, pages[i + j] = pages[i] + j; } - __dma_clear_buffer(pages[i], PAGE_SIZE << order); + __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag); i += 1 << order; count -= 1 << order; } @@ -1455,13 +1476,16 @@ static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) return NULL; } -static void *__iommu_alloc_atomic(struct device *dev, size_t size, - dma_addr_t *handle) +static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, + dma_addr_t *handle, int coherent_flag) { struct page *page; void *addr; - addr = __alloc_from_pool(size, &page); + if (coherent_flag == COHERENT) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else + addr = __alloc_from_pool(size, &page); if (!addr) return NULL; @@ -1477,14 +1501,18 @@ err_mapping: } static void __iommu_free_atomic(struct device *dev, void *cpu_addr, - dma_addr_t handle, size_t size) + dma_addr_t handle, size_t size, int coherent_flag) { __iommu_remove_mapping(dev, handle, size); - __free_from_pool(cpu_addr, size); + if (coherent_flag == COHERENT) + __dma_free_buffer(virt_to_page(cpu_addr), size); + else + __free_from_pool(cpu_addr, size); } -static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs, + int coherent_flag) { pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); struct page **pages; @@ -1493,8 +1521,9 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); - if (!gfpflags_allow_blocking(gfp)) - return __iommu_alloc_atomic(dev, size, handle); + if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) + return __iommu_alloc_simple(dev, size, gfp, handle, + coherent_flag); /* * Following is a work-around (a.k.a. hack) to prevent pages @@ -1505,7 +1534,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, */ gfp &= ~(__GFP_COMP); - pages = __iommu_alloc_buffer(dev, size, gfp, attrs); + pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag); if (!pages) return NULL; @@ -1530,7 +1559,19 @@ err_buffer: return NULL; } -static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL); +} + +static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT); +} + +static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { @@ -1540,8 +1581,6 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - if (!pages) return -ENXIO; @@ -1562,19 +1601,34 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, return 0; } +static int arm_iommu_mmap_attrs(struct device *dev, + struct vm_area_struct *vma, void *cpu_addr, + dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) +{ + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); + + return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); +} + +static int arm_coherent_iommu_mmap_attrs(struct device *dev, + struct vm_area_struct *vma, void *cpu_addr, + dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) +{ + return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs); +} /* * free a page as defined by the above mapping. * Must not be called with IRQs disabled. */ -void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) +void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs, int coherent_flag) { struct page **pages; size = PAGE_ALIGN(size); - if (__in_atomic_pool(cpu_addr, size)) { - __iommu_free_atomic(dev, cpu_addr, handle, size); + if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size, coherent_flag); return; } @@ -1593,6 +1647,18 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, __iommu_free_buffer(dev, pages, size, attrs); } +void arm_iommu_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL); +} + +void arm_coherent_iommu_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT); +} + static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) @@ -1997,9 +2063,9 @@ struct dma_map_ops iommu_ops = { }; struct dma_map_ops iommu_coherent_ops = { - .alloc = arm_iommu_alloc_attrs, - .free = arm_iommu_free_attrs, - .mmap = arm_iommu_mmap_attrs, + .alloc = arm_coherent_iommu_alloc_attrs, + .free = arm_coherent_iommu_free_attrs, + .mmap = arm_coherent_iommu_mmap_attrs, .get_sgtable = arm_iommu_get_sgtable, .map_page = arm_coherent_iommu_map_page, diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index ad5841856007..3a2e678b8d30 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -243,7 +243,7 @@ good_area: goto out; } - return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + return handle_mm_fault(vma, addr & PAGE_MASK, flags); check_stack: /* Don't allow expansion below FIRST_USER_ADDRESS */ diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index b8d477321730..c1c1a5c67da1 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -23,7 +23,7 @@ #define __pgd_alloc() kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL) #define __pgd_free(pgd) kfree(pgd) #else -#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2) +#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL, 2) #define __pgd_free(pgd) free_pages((unsigned long)pgd, 2) #endif diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 6fcaac8e200f..a7123b4e129d 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -362,6 +362,39 @@ __ca15_errata: #endif b __errata_finish +__ca12_errata: +#ifdef CONFIG_ARM_ERRATA_818325_852422 + mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register + orr r10, r10, #1 << 12 @ set bit #12 + mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_821420 + mrc p15, 0, r10, c15, c0, 2 @ read internal feature reg + orr r10, r10, #1 << 1 @ set bit #1 + mcr p15, 0, r10, c15, c0, 2 @ write internal feature reg +#endif +#ifdef CONFIG_ARM_ERRATA_825619 + mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register + orr r10, r10, #1 << 24 @ set bit #24 + mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif + b __errata_finish + +__ca17_errata: +#ifdef CONFIG_ARM_ERRATA_852421 + cmp r6, #0x12 @ only present up to r1p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 24 @ set bit #24 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_852423 + cmp r6, #0x12 @ only present up to r1p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 12 @ set bit #12 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif + b __errata_finish + __v7_pj4b_setup: #ifdef CONFIG_CPU_PJ4B @@ -443,6 +476,16 @@ __v7_setup_cont: teq r0, r10 beq __ca9_errata + /* Cortex-A12 Errata */ + ldr r10, =0x00000c0d @ Cortex-A12 primary part number + teq r0, r10 + beq __ca12_errata + + /* Cortex-A17 Errata */ + ldr r10, =0x00000c0e @ Cortex-A17 primary part number + teq r0, r10 + beq __ca17_errata + /* Cortex-A15 Errata */ ldr r10, =0x00000c0f @ Cortex-A15 primary part number teq r0, r10 diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 73085d3482ed..da0b33deba6d 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -643,19 +643,19 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, * hardware state at every thread switch. We clear our held state when * a CPU has been killed, indicating that the VFP hardware doesn't contain * a threads VFP state. When a CPU starts up, we re-enable access to the - * VFP hardware. - * - * Both CPU_DYING and CPU_STARTING are called on the CPU which + * VFP hardware. The callbacks below are called on the CPU which * is being offlined/onlined. */ -static int vfp_hotplug(struct notifier_block *b, unsigned long action, - void *hcpu) +static int vfp_dying_cpu(unsigned int cpu) { - if (action == CPU_DYING || action == CPU_DYING_FROZEN) - vfp_current_hw_state[(long)hcpu] = NULL; - else if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) - vfp_enable(NULL); - return NOTIFY_OK; + vfp_force_reload(cpu, current_thread_info()); + return 0; +} + +static int vfp_starting_cpu(unsigned int unused) +{ + vfp_enable(NULL); + return 0; } void vfp_kmode_exception(void) @@ -732,6 +732,10 @@ static int __init vfp_init(void) unsigned int vfpsid; unsigned int cpu_arch = cpu_architecture(); + /* + * Enable the access to the VFP on all online CPUs so the + * following test on FPSID will succeed. + */ if (cpu_arch >= CPU_ARCH_ARMv6) on_each_cpu(vfp_enable, NULL, 1); @@ -794,7 +798,9 @@ static int __init vfp_init(void) VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; } - hotcpu_notifier(vfp_hotplug, 0); + cpuhp_setup_state_nocalls(CPUHP_AP_ARM_VFP_STARTING, + "AP_ARM_VFP_STARTING", vfp_starting_cpu, + vfp_dying_cpu); vfp_vector = vfp_support_entry; diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 12969523414c..227952103b0b 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1 +1,2 @@ obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o +obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c new file mode 100644 index 000000000000..16db419f9e90 --- /dev/null +++ b/arch/arm/xen/efi.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015, Linaro Limited, Shannon Zhao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/efi.h> +#include <xen/xen-ops.h> +#include <asm/xen/xen-ops.h> + +/* Set XEN EFI runtime services function pointers. Other fields of struct efi, + * e.g. efi.systab, will be set like normal EFI. + */ +void __init xen_efi_runtime_setup(void) +{ + efi.get_time = xen_efi_get_time; + efi.set_time = xen_efi_set_time; + efi.get_wakeup_time = xen_efi_get_wakeup_time; + efi.set_wakeup_time = xen_efi_set_wakeup_time; + efi.get_variable = xen_efi_get_variable; + efi.get_next_variable = xen_efi_get_next_variable; + efi.set_variable = xen_efi_set_variable; + efi.query_variable_info = xen_efi_query_variable_info; + efi.update_capsule = xen_efi_update_capsule; + efi.query_capsule_caps = xen_efi_query_capsule_caps; + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; + efi.reset_system = NULL; /* Functionality provided by Xen. */ +} +EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 75cd7345c654..b0b82f5ea338 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -12,14 +12,16 @@ #include <xen/page.h> #include <xen/interface/sched.h> #include <xen/xen-ops.h> -#include <asm/paravirt.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <asm/xen/xen-ops.h> #include <asm/system_misc.h> +#include <asm/efi.h> #include <linux/interrupt.h> #include <linux/irqreturn.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_fdt.h> #include <linux/of_irq.h> #include <linux/of_address.h> #include <linux/cpuidle.h> @@ -30,6 +32,7 @@ #include <linux/time64.h> #include <linux/timekeeping.h> #include <linux/timekeeper_internal.h> +#include <linux/acpi.h> #include <linux/mm.h> @@ -46,14 +49,16 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); static struct vcpu_info __percpu *xen_vcpu_info; +/* Linux <-> Xen vCPU id mapping */ +DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); + /* These are unused until we support booting "pre-ballooned" */ unsigned long xen_released_pages; struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; static __read_mostly unsigned int xen_events_irq; -static __initdata struct device_node *xen_node; - int xen_remap_domain_gfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *gfn, int nr, @@ -84,19 +89,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); -static unsigned long long xen_stolen_accounting(int cpu) -{ - struct vcpu_runstate_info state; - - BUG_ON(cpu != smp_processor_id()); - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; -} - static void xen_read_wallclock(struct timespec64 *ts) { u32 version; @@ -161,12 +153,11 @@ static struct notifier_block xen_pvclock_gtod_notifier = { .notifier_call = xen_pvclock_gtod_notify, }; -static void xen_percpu_init(void) +static int xen_starting_cpu(unsigned int cpu) { struct vcpu_register_vcpu_info info; struct vcpu_info *vcpup; int err; - int cpu = get_cpu(); /* * VCPUOP_register_vcpu_info cannot be called twice for the same @@ -179,10 +170,14 @@ static void xen_percpu_init(void) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); + /* Direct vCPU id mapping for ARM guests. */ + per_cpu(xen_vcpu_id, cpu) = cpu; + info.mfn = virt_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); BUG_ON(err); per_cpu(xen_vcpu, cpu) = vcpup; @@ -190,7 +185,13 @@ static void xen_percpu_init(void) after_register_vcpu_info: enable_percpu_irq(xen_events_irq, 0); - put_cpu(); + return 0; +} + +static int xen_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(xen_events_irq); + return 0; } static void xen_restart(enum reboot_mode reboot_mode, const char *cmd) @@ -209,32 +210,50 @@ static void xen_power_off(void) BUG_ON(rc); } -static int xen_cpu_notification(struct notifier_block *self, - unsigned long action, - void *hcpu) +static irqreturn_t xen_arm_callback(int irq, void *arg) { - switch (action) { - case CPU_STARTING: - xen_percpu_init(); - break; - case CPU_DYING: - disable_percpu_irq(xen_events_irq); - break; - default: - break; - } - - return NOTIFY_OK; + xen_hvm_evtchn_do_upcall(); + return IRQ_HANDLED; } -static struct notifier_block xen_cpu_notifier = { - .notifier_call = xen_cpu_notification, -}; +static __initdata struct { + const char *compat; + const char *prefix; + const char *version; + bool found; +} hyper_node = {"xen,xen", "xen,xen-", NULL, false}; -static irqreturn_t xen_arm_callback(int irq, void *arg) +static int __init fdt_find_hyper_node(unsigned long node, const char *uname, + int depth, void *data) { - xen_hvm_evtchn_do_upcall(); - return IRQ_HANDLED; + const void *s = NULL; + int len; + + if (depth != 1 || strcmp(uname, "hypervisor") != 0) + return 0; + + if (of_flat_dt_is_compatible(node, hyper_node.compat)) + hyper_node.found = true; + + s = of_get_flat_dt_prop(node, "compatible", &len); + if (strlen(hyper_node.prefix) + 3 < len && + !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix))) + hyper_node.version = s + strlen(hyper_node.prefix); + + /* + * Check if Xen supports EFI by checking whether there is the + * "/hypervisor/uefi" node in DT. If so, runtime services are available + * through proxy functions (e.g. in case of Xen dom0 EFI implementation + * they call special hypercall which executes relevant EFI functions) + * and that is why they are always enabled. + */ + if (IS_ENABLED(CONFIG_XEN_EFI)) { + if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) && + !efi_runtime_disabled()) + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + } + + return 0; } /* @@ -244,26 +263,18 @@ static irqreturn_t xen_arm_callback(int irq, void *arg) #define GRANT_TABLE_PHYSADDR 0 void __init xen_early_init(void) { - int len; - const char *s = NULL; - const char *version = NULL; - const char *xen_prefix = "xen,xen-"; - - xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); - if (!xen_node) { + of_scan_flat_dt(fdt_find_hyper_node, NULL); + if (!hyper_node.found) { pr_debug("No Xen support\n"); return; } - s = of_get_property(xen_node, "compatible", &len); - if (strlen(xen_prefix) + 3 < len && - !strncmp(xen_prefix, s, strlen(xen_prefix))) - version = s + strlen(xen_prefix); - if (version == NULL) { + + if (hyper_node.version == NULL) { pr_debug("Xen version not found\n"); return; } - pr_info("Xen %s support found\n", version); + pr_info("Xen %s support found\n", hyper_node.version); xen_domain_type = XEN_HVM_DOMAIN; @@ -278,28 +289,68 @@ void __init xen_early_init(void) add_preferred_console("hvc", 0, NULL); } +static void __init xen_acpi_guest_init(void) +{ +#ifdef CONFIG_ACPI + struct xen_hvm_param a; + int interrupt, trigger, polarity; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + + if (HYPERVISOR_hvm_op(HVMOP_get_param, &a) + || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) { + xen_events_irq = 0; + return; + } + + interrupt = a.value & 0xff; + trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE + : ACPI_LEVEL_SENSITIVE; + polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW + : ACPI_ACTIVE_HIGH; + xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity); +#endif +} + +static void __init xen_dt_guest_init(void) +{ + struct device_node *xen_node; + + xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); + if (!xen_node) { + pr_err("Xen support was detected before, but it has disappeared\n"); + return; + } + + xen_events_irq = irq_of_parse_and_map(xen_node, 0); +} + static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; struct shared_info *shared_info_page = NULL; - struct resource res; - phys_addr_t grant_frames; if (!xen_domain()) return 0; - if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) { - pr_err("Xen grant table base address not found\n"); - return -ENODEV; - } - grant_frames = res.start; + if (!acpi_disabled) + xen_acpi_guest_init(); + else + xen_dt_guest_init(); - xen_events_irq = irq_of_parse_and_map(xen_node, 0); if (!xen_events_irq) { pr_err("Xen event channel interrupt not found\n"); return -ENODEV; } + /* + * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI + * parameters are found. Force enable runtime services. + */ + if (efi_enabled(EFI_RUNTIME_SERVICES)) + xen_efi_runtime_setup(); + shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL); if (!shared_info_page) { @@ -328,7 +379,13 @@ static int __init xen_guest_init(void) if (xen_vcpu_info == NULL) return -ENOMEM; - if (gnttab_setup_auto_xlat_frames(grant_frames)) { + /* Direct vCPU id mapping for ARM guests. */ + per_cpu(xen_vcpu_id, 0) = 0; + + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); + if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, + &xen_auto_xlat_grant_frames.vaddr, + xen_auto_xlat_grant_frames.count)) { free_percpu(xen_vcpu_info); return -ENOMEM; } @@ -351,16 +408,14 @@ static int __init xen_guest_init(void) return -EINVAL; } - xen_percpu_init(); + xen_time_setup_guest(); - register_cpu_notifier(&xen_cpu_notifier); - - pv_time_ops.steal_clock = xen_stolen_accounting; - static_key_slow_inc(¶virt_steal_enabled); if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); - return 0; + return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING, + "AP_ARM_XEN_STARTING", xen_starting_cpu, + xen_dying_cpu); } early_initcall(xen_guest_init); @@ -403,4 +458,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); +EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 9a36f4f49c10..a648dfc3be30 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -91,6 +91,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5a0a691d4220..9f8b99e20557 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -4,9 +4,11 @@ config ARM64 select ACPI_GENERIC_GSI if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_KCOV select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF @@ -85,8 +87,11 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS + select HAVE_KPROBES + select HAVE_KRETPROBES if HAVE_KPROBES select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING @@ -664,6 +669,16 @@ config PARAVIRT_TIME_ACCOUNTING If in doubt, say N here. +config KEXEC + depends on PM_SLEEP_SMP + select KEXEC_CORE + bool "kexec system call" + ---help--- + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + config XEN_DOM0 def_bool y depends on XEN @@ -872,7 +887,7 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS + select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 648a32c89541..d59b6908a21a 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -12,7 +12,6 @@ LDFLAGS_vmlinux :=-p --no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) -OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 ifneq ($(CONFIG_RELOCATABLE),) @@ -121,6 +120,16 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=$(boot)/dts +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h + define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 305c552b5ec1..1f012c506434 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -14,6 +14,8 @@ # Based on the ia64 boot/Makefile. # +OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S + targets := Image Image.gz $(obj)/Image: vmlinux FORCE diff --git a/arch/arm64/boot/dts/apm/apm-merlin.dts b/arch/arm64/boot/dts/apm/apm-merlin.dts index 387c6a8d0da9..b0f64414c1b0 100644 --- a/arch/arm64/boot/dts/apm/apm-merlin.dts +++ b/arch/arm64/boot/dts/apm/apm-merlin.dts @@ -83,3 +83,9 @@ status = "ok"; }; }; + +&mdio { + sgenet0phy: phy@0 { + reg = <0x0>; + }; +}; diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts index 44db32ec5e9c..b7fb5d9295c2 100644 --- a/arch/arm64/boot/dts/apm/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm/apm-mustang.dts @@ -79,3 +79,15 @@ &mmc0 { status = "ok"; }; + +&mdio { + menet0phy: phy@3 { + reg = <0x3>; + }; + sgenet0phy: phy@4 { + reg = <0x4>; + }; + sgenet1phy: phy@5 { + reg = <0x5>; + }; +}; diff --git a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi index c569f761d090..2e1e5daa1dc7 100644 --- a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi +++ b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi @@ -625,10 +625,18 @@ apm,irq-start = <8>; }; + mdio: mdio@1f610000 { + compatible = "apm,xgene-mdio-xfi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x1f610000 0x0 0xd100>; + clocks = <&xge0clk 0>; + }; + sgenet0: ethernet@1f610000 { compatible = "apm,xgene2-sgenet"; status = "disabled"; - reg = <0x0 0x1f610000 0x0 0x10000>, + reg = <0x0 0x1f610000 0x0 0xd100>, <0x0 0x1f600000 0x0 0Xd100>, <0x0 0x20000000 0x0 0X20000>; interrupts = <0 96 4>, @@ -637,6 +645,7 @@ clocks = <&xge0clk 0>; local-mac-address = [00 01 73 00 00 01]; phy-connection-type = "sgmii"; + phy-handle = <&sgenet0phy>; }; xgenet1: ethernet@1f620000 { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index 5147d7698924..6bf7cbe2e72d 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -237,20 +237,11 @@ clocks = <&socplldiv2 0>; reg = <0x0 0x1f21c000 0x0 0x1000>; reg-names = "csr-reg"; - csr-mask = <0x3>; + csr-mask = <0xa>; + enable-mask = <0xf>; clock-output-names = "sge0clk"; }; - sge1clk: sge1clk@1f21c000 { - compatible = "apm,xgene-device-clock"; - #clock-cells = <1>; - clocks = <&socplldiv2 0>; - reg = <0x0 0x1f21c000 0x0 0x1000>; - reg-names = "csr-reg"; - csr-mask = <0xc>; - clock-output-names = "sge1clk"; - }; - xge0clk: xge0clk@1f61c000 { compatible = "apm,xgene-device-clock"; #clock-cells = <1>; @@ -921,6 +912,14 @@ clocks = <&rtcclk 0>; }; + mdio: mdio@17020000 { + compatible = "apm,xgene-mdio-rgmii"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x17020000 0x0 0xd100>; + clocks = <&menetclk 0>; + }; + menet: ethernet@17020000 { compatible = "apm,xgene-enet"; status = "disabled"; @@ -934,7 +933,7 @@ /* mac address will be overwritten by the bootloader */ local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "rgmii"; - phy-handle = <&menetphy>; + phy-handle = <&menet0phy>,<&menetphy>; mdio { compatible = "apm,xgene-mdio"; #address-cells = <1>; @@ -960,6 +959,7 @@ clocks = <&sge0clk 0>; local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "sgmii"; + phy-handle = <&sgenet0phy>; }; sgenet1: ethernet@1f210030 { @@ -973,9 +973,9 @@ <0x0 0xAD 0x4>; port-id = <1>; dma-coherent; - clocks = <&sge1clk 0>; local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "sgmii"; + phy-handle = <&sgenet1phy>; }; xgenet: ethernet@1f610000 { diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts index 54ca40c9f711..ea5603fd106a 100644 --- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts @@ -52,6 +52,14 @@ }; }; +&pci_phy0 { + status = "ok"; +}; + +&pci_phy1 { + status = "ok"; +}; + &pcie0 { status = "ok"; }; @@ -132,3 +140,11 @@ #size-cells = <1>; }; }; + +&mdio_mux_iproc { + mdio@10 { + gphy0: eth-phy@10 { + reg = <0x10>; + }; + }; +}; diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index ec68ec1a80c8..46b78fa89f4c 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -263,6 +263,45 @@ IRQ_TYPE_LEVEL_HIGH)>; }; + mdio_mux_iproc: mdio-mux@6602023c { + compatible = "brcm,mdio-mux-iproc"; + reg = <0x6602023c 0x14>; + #address-cells = <1>; + #size-cells = <0>; + + mdio@0 { + reg = <0x0>; + #address-cells = <1>; + #size-cells = <0>; + + pci_phy0: pci-phy@0 { + compatible = "brcm,ns2-pcie-phy"; + reg = <0x0>; + #phy-cells = <0>; + status = "disabled"; + }; + }; + + mdio@7 { + reg = <0x7>; + #address-cells = <1>; + #size-cells = <0>; + + pci_phy1: pci-phy@0 { + compatible = "brcm,ns2-pcie-phy"; + reg = <0x0>; + #phy-cells = <0>; + status = "disabled"; + }; + }; + + mdio@10 { + reg = <0x10>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + timer0: timer@66030000 { compatible = "arm,sp804", "arm,primecell"; reg = <0x66030000 0x1000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 05f89c4a5413..77b8c4e388ca 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -168,6 +168,18 @@ }; }; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + vpu_dma_reserved: vpu_dma_mem_region { + compatible = "shared-dma-pool"; + reg = <0 0xb7000000 0 0x500000>; + alignment = <0x1000>; + no-map; + }; + }; + timer { compatible = "arm,armv8-timer"; interrupt-parent = <&gic>; @@ -312,6 +324,17 @@ clock-names = "spi", "wrap"; }; + vpu: vpu@10020000 { + compatible = "mediatek,mt8173-vpu"; + reg = <0 0x10020000 0 0x30000>, + <0 0x10050000 0 0x100>; + reg-names = "tcm", "cfg_reg"; + interrupts = <GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&topckgen CLK_TOP_SCP_SEL>; + clock-names = "main"; + memory-region = <&vpu_dma_reserved>; + }; + sysirq: intpol-controller@10200620 { compatible = "mediatek,mt8173-sysirq", "mediatek,mt6577-sysirq"; @@ -754,6 +777,45 @@ clock-names = "apb", "smi"; }; + vcodec_enc: vcodec@18002000 { + compatible = "mediatek,mt8173-vcodec-enc"; + reg = <0 0x18002000 0 0x1000>, /* VENC_SYS */ + <0 0x19002000 0 0x1000>; /* VENC_LT_SYS */ + interrupts = <GIC_SPI 198 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 202 IRQ_TYPE_LEVEL_LOW>; + mediatek,larb = <&larb3>, + <&larb5>; + iommus = <&iommu M4U_PORT_VENC_RCPU>, + <&iommu M4U_PORT_VENC_REC>, + <&iommu M4U_PORT_VENC_BSDMA>, + <&iommu M4U_PORT_VENC_SV_COMV>, + <&iommu M4U_PORT_VENC_RD_COMV>, + <&iommu M4U_PORT_VENC_CUR_LUMA>, + <&iommu M4U_PORT_VENC_CUR_CHROMA>, + <&iommu M4U_PORT_VENC_REF_LUMA>, + <&iommu M4U_PORT_VENC_REF_CHROMA>, + <&iommu M4U_PORT_VENC_NBM_RDMA>, + <&iommu M4U_PORT_VENC_NBM_WDMA>, + <&iommu M4U_PORT_VENC_RCPU_SET2>, + <&iommu M4U_PORT_VENC_REC_FRM_SET2>, + <&iommu M4U_PORT_VENC_BSDMA_SET2>, + <&iommu M4U_PORT_VENC_SV_COMA_SET2>, + <&iommu M4U_PORT_VENC_RD_COMA_SET2>, + <&iommu M4U_PORT_VENC_CUR_LUMA_SET2>, + <&iommu M4U_PORT_VENC_CUR_CHROMA_SET2>, + <&iommu M4U_PORT_VENC_REF_LUMA_SET2>, + <&iommu M4U_PORT_VENC_REC_CHROMA_SET2>; + mediatek,vpu = <&vpu>; + clocks = <&topckgen CLK_TOP_VENCPLL_D2>, + <&topckgen CLK_TOP_VENC_SEL>, + <&topckgen CLK_TOP_UNIVPLL1_D2>, + <&topckgen CLK_TOP_VENC_LT_SEL>; + clock-names = "venc_sel_src", + "venc_sel", + "venc_lt_sel_src", + "venc_lt_sel"; + }; + vencltsys: clock-controller@19000000 { compatible = "mediatek,mt8173-vencltsys", "syscon"; reg = <0 0x19000000 0 0x1000>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index fd2d74d0491e..4ed4756dfa97 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -70,6 +70,7 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA=y CONFIG_XEN=y +CONFIG_KEXEC=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y CONFIG_CPU_IDLE=y diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index cff532a6744e..f43d2c44c765 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += bug.h generic-y += bugs.h -generic-y += checksum.h generic-y += clkdev.h generic-y += cputime.h generic-y += current.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index aee323b13802..5420cb0fcb3e 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -113,4 +113,14 @@ static inline const char *acpi_get_enable_method(int cpu) pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr); #endif +#ifdef CONFIG_ACPI_NUMA +int arm64_acpi_numa_init(void); +int acpi_numa_get_nid(unsigned int cpu, u64 hwid); +#else +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; } +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; } +#endif /* CONFIG_ACPI_NUMA */ + +#define ACPI_TABLE_UPGRADE_MAX_PHYS MEMBLOCK_ALLOC_ACCESSIBLE + #endif /*_ASM_ACPI_H*/ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index beccbdefa106..8746ff6abd77 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -95,13 +95,11 @@ void apply_alternatives(void *start, size_t length); * The code that follows this macro will be assembled and linked as * normal. There are no restrictions on this code. */ -.macro alternative_if_not cap, enable = 1 - .if \enable +.macro alternative_if_not cap .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: - .endif .endm /* @@ -118,27 +116,27 @@ void apply_alternatives(void *start, size_t length); * alternative sequence it is defined in (branches into an * alternative sequence are not fixed up). */ -.macro alternative_else, enable = 1 - .if \enable +.macro alternative_else 662: .pushsection .altinstr_replacement, "ax" 663: - .endif .endm /* * Complete an alternative code sequence. */ -.macro alternative_endif, enable = 1 - .if \enable +.macro alternative_endif 664: .popsection .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) - .endif .endm #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +.macro user_alt, label, oldinstr, newinstr, cond +9999: alternative_insn "\oldinstr", "\newinstr", \cond + _ASM_EXTABLE 9999b, \label +.endm /* * Generate the assembly for UAO alternatives with exception table entries. diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 10b017c4bdd8..d5025c69ca81 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -24,6 +24,7 @@ #define __ASM_ASSEMBLER_H #include <asm/asm-offsets.h> +#include <asm/cpufeature.h> #include <asm/page.h> #include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> @@ -261,7 +262,16 @@ lr .req x30 // link register add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 -9998: dc \op, \kaddr +9998: + .if (\op == cvau || \op == cvac) +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .else + dc \op, \kaddr + .endif add \kaddr, \kaddr, \tmp1 cmp \kaddr, \size b.lo 9998b diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h new file mode 100644 index 000000000000..09f65339d66d --- /dev/null +++ b/arch/arm64/include/asm/checksum.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2016 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CHECKSUM_H +#define __ASM_CHECKSUM_H + +#include <linux/types.h> + +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum += (sum >> 16) | (sum << 16); + return ~(__force __sum16)(sum >> 16); +} +#define csum_fold csum_fold + +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + __uint128_t tmp; + u64 sum; + + tmp = *(const __uint128_t *)iph; + iph += 16; + ihl -= 4; + tmp += ((tmp >> 64) | (tmp << 64)); + sum = tmp >> 64; + do { + sum += *(const u32 *)iph; + iph += 4; + } while (--ihl); + + sum += ((sum >> 32) | (sum << 32)); + return csum_fold(sum >> 32); +} +#define ip_fast_csum ip_fast_csum + +#include <asm-generic/checksum.h> + +#endif /* __ASM_CHECKSUM_H */ diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 13a6103130cd..889226b4c6e1 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -25,10 +25,12 @@ */ struct cpuinfo_arm64 { struct cpu cpu; + struct kobject kobj; u32 reg_ctr; u32 reg_cntfrq; u32 reg_dczid; u32 reg_midr; + u32 reg_revidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 224efe730e46..49dd1bd3ea50 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -191,7 +191,9 @@ void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); +void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); void check_local_cpu_errata(void); +void __init enable_errata_workarounds(void); void verify_local_cpu_errata(void); void verify_local_cpu_capabilities(void); diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 2fcb9b7c876c..4b6b3f72a215 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -66,6 +66,11 @@ #define CACHE_FLUSH_IS_SAFE 1 +/* kprobes BRK opcodes with ESR encoding */ +#define BRK64_ESR_MASK 0xFFFF +#define BRK64_ESR_KPROBES 0x0004 +#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5)) + /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 #define DBG_ESR_EVT_VECC 0x5 diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index bd887663689b..a9e54aad15ef 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,8 +14,7 @@ extern void efi_init(void); #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); - -#define efi_set_mapping_permissions efi_create_mapping +int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_setup() \ ({ \ diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 77eeb2cc648f..f772e15c4766 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -74,6 +74,7 @@ #define ESR_ELx_EC_SHIFT (26) #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) #define ESR_ELx_IL (UL(1) << 25) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 30e50eb54a67..1dbaa901d7e5 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -120,6 +120,29 @@ enum aarch64_insn_register { AARCH64_INSN_REG_SP = 31 /* Stack pointer: as load/store base reg */ }; +enum aarch64_insn_special_register { + AARCH64_INSN_SPCLREG_SPSR_EL1 = 0xC200, + AARCH64_INSN_SPCLREG_ELR_EL1 = 0xC201, + AARCH64_INSN_SPCLREG_SP_EL0 = 0xC208, + AARCH64_INSN_SPCLREG_SPSEL = 0xC210, + AARCH64_INSN_SPCLREG_CURRENTEL = 0xC212, + AARCH64_INSN_SPCLREG_DAIF = 0xDA11, + AARCH64_INSN_SPCLREG_NZCV = 0xDA10, + AARCH64_INSN_SPCLREG_FPCR = 0xDA20, + AARCH64_INSN_SPCLREG_DSPSR_EL0 = 0xDA28, + AARCH64_INSN_SPCLREG_DLR_EL0 = 0xDA29, + AARCH64_INSN_SPCLREG_SPSR_EL2 = 0xE200, + AARCH64_INSN_SPCLREG_ELR_EL2 = 0xE201, + AARCH64_INSN_SPCLREG_SP_EL1 = 0xE208, + AARCH64_INSN_SPCLREG_SPSR_INQ = 0xE218, + AARCH64_INSN_SPCLREG_SPSR_ABT = 0xE219, + AARCH64_INSN_SPCLREG_SPSR_UND = 0xE21A, + AARCH64_INSN_SPCLREG_SPSR_FIQ = 0xE21B, + AARCH64_INSN_SPCLREG_SPSR_EL3 = 0xF200, + AARCH64_INSN_SPCLREG_ELR_EL3 = 0xF201, + AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210 +}; + enum aarch64_insn_variant { AARCH64_INSN_VARIANT_32BIT, AARCH64_INSN_VARIANT_64BIT @@ -223,8 +246,15 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ { return (val); } +__AARCH64_INSN_FUNCS(adr_adrp, 0x1F000000, 0x10000000) +__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) +__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) +__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) +__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) +__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) __AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000) __AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000) @@ -273,10 +303,15 @@ __AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001) __AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002) __AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003) __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) +__AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000) __AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) __AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000) __AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000) __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) +__AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0) +__AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) +__AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) +__AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) #undef __AARCH64_INSN_FUNCS @@ -286,6 +321,8 @@ bool aarch64_insn_is_branch_imm(u32 insn); int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); +bool aarch64_insn_uses_literal(u32 insn); +bool aarch64_insn_is_branch(u32 insn); u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn); u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 insn, u64 imm); @@ -367,9 +404,13 @@ bool aarch32_insn_is_wide(u32 insn); #define A32_RT_OFFSET 12 #define A32_RT2_OFFSET 0 +u32 aarch64_insn_extract_system_reg(u32 insn); u32 aarch32_insn_extract_reg_num(u32 insn, int offset); u32 aarch32_insn_mcr_extract_opc2(u32 insn); u32 aarch32_insn_mcr_extract_crm(u32 insn); + +typedef bool (pstate_check_t)(unsigned long); +extern pstate_check_t * const aarch32_opcode_cond_checks[16]; #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 11cc941bd107..8c581281fa12 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -110,8 +110,5 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) : : "r" (flags) : "memory"); \ } while (0) -#define local_dbg_enable() asm("msr daifclr, #8" : : : "memory") -#define local_dbg_disable() asm("msr daifset, #8" : : : "memory") - #endif #endif diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h new file mode 100644 index 000000000000..04744dc5fb61 --- /dev/null +++ b/arch/arm64/include/asm/kexec.h @@ -0,0 +1,48 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ARM64_KEXEC_H +#define _ARM64_KEXEC_H + +/* Maximum physical address we can use pages from */ + +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) + +/* Maximum address we can reach in physical address mode */ + +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +/* Maximum address we can use for the control code buffer */ + +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +#define KEXEC_CONTROL_PAGE_SIZE 4096 + +#define KEXEC_ARCH KEXEC_ARCH_AARCH64 + +#ifndef __ASSEMBLY__ + +/** + * crash_setup_regs() - save registers for the panic kernel + * + * @newregs: registers are saved here + * @oldregs: registers to be saved (may be %NULL) + */ + +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + /* Empty routine needed to avoid build errors. */ +} + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h new file mode 100644 index 000000000000..61b49150dfa3 --- /dev/null +++ b/arch/arm64/include/asm/kprobes.h @@ -0,0 +1,62 @@ +/* + * arch/arm64/include/asm/kprobes.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KPROBES_H +#define _ARM_KPROBES_H + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 1 +#define MAX_STACK_SIZE 128 + +#define flush_insn_slot(p) do { } while (0) +#define kretprobe_blacklist_size 0 + +#include <asm/probes.h> + +struct prev_kprobe { + struct kprobe *kp; + unsigned int status; +}; + +/* Single step context for kprobe */ +struct kprobe_step_ctx { + unsigned long ss_pending; + unsigned long match_addr; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned int kprobe_status; + unsigned long saved_irqflag; + struct prev_kprobe prev_kprobe; + struct kprobe_step_ctx ss_ctx; + struct pt_regs jprobe_saved_regs; + char jprobes_stack[MAX_STACK_SIZE]; +}; + +void arch_remove_kprobe(struct kprobe *); +int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr); +int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr); +void kretprobe_trampoline(void); +void __kprobes *trampoline_probe_handler(struct pt_regs *regs); + +#endif /* _ARM_KPROBES_H */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 40bc1681b6d5..4cdeae3b17c6 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -210,7 +210,7 @@ static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) >> ESR_ELx_EC_SHIFT; + return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 97b1d8f26b9c..8d9fce037b2f 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -34,7 +34,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot); + pgprot_t prot, bool allow_block_mappings); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); #endif diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index e9b4f2942335..600887e491fd 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -5,6 +5,8 @@ #ifdef CONFIG_NUMA +#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2) + /* currently, arm64 implements flat NUMA topology */ #define parent_node(node) (node) diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h new file mode 100644 index 000000000000..5af574d632fa --- /dev/null +++ b/arch/arm64/include/asm/probes.h @@ -0,0 +1,35 @@ +/* + * arch/arm64/include/asm/probes.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef _ARM_PROBES_H +#define _ARM_PROBES_H + +#include <asm/opcodes.h> + +struct kprobe; +struct arch_specific_insn; + +typedef u32 kprobe_opcode_t; +typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *); + +/* architecture specific copy of original instruction */ +struct arch_specific_insn { + kprobe_opcode_t *insn; + pstate_check_t *pstate_cc; + kprobes_handler_t *handler; + /* restore address after step xol */ + unsigned long restore; +}; + +#endif diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index cef1cf398356..ace0a96e7d6e 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -192,5 +192,6 @@ static inline void spin_lock_prefetch(const void *ptr) void cpu_enable_pan(void *__unused); void cpu_enable_uao(void *__unused); +void cpu_enable_cache_maint_trap(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h new file mode 100644 index 000000000000..07b8ed037dee --- /dev/null +++ b/arch/arm64/include/asm/ptdump.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PTDUMP_H +#define __ASM_PTDUMP_H + +#ifdef CONFIG_ARM64_PTDUMP + +#include <linux/mm_types.h> + +struct addr_marker { + unsigned long start_address; + char *name; +}; + +struct ptdump_info { + struct mm_struct *mm; + const struct addr_marker *markers; + unsigned long base_addr; + unsigned long max_addr; +}; + +int ptdump_register(struct ptdump_info *info, const char *name); + +#else +static inline int ptdump_register(struct ptdump_info *info, const char *name) +{ + return 0; +} +#endif /* CONFIG_ARM64_PTDUMP */ + +#endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 7f94755089e2..ada08b5b036d 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -46,7 +46,6 @@ #define COMPAT_PSR_MODE_UND 0x0000001b #define COMPAT_PSR_MODE_SYS 0x0000001f #define COMPAT_PSR_T_BIT 0x00000020 -#define COMPAT_PSR_E_BIT 0x00000200 #define COMPAT_PSR_F_BIT 0x00000040 #define COMPAT_PSR_I_BIT 0x00000080 #define COMPAT_PSR_A_BIT 0x00000100 @@ -74,6 +73,7 @@ #define COMPAT_PT_DATA_ADDR 0x10004 #define COMPAT_PT_TEXT_END_ADDR 0x10008 #ifndef __ASSEMBLY__ +#include <linux/bug.h> /* sizeof(struct user) for AArch32 */ #define COMPAT_USER_SZ 296 @@ -121,6 +121,8 @@ struct pt_regs { u64 unused; // maintain 16 byte alignment }; +#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate) + #define arch_has_single_step() (1) #ifdef CONFIG_COMPAT @@ -146,9 +148,58 @@ struct pt_regs { #define fast_interrupts_enabled(regs) \ (!((regs)->pstate & PSR_F_BIT)) -#define user_stack_pointer(regs) \ +#define GET_USP(regs) \ (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) +#define SET_USP(ptregs, value) \ + (!compat_user_mode(regs) ? ((regs)->sp = value) : ((regs)->compat_sp = value)) + +extern int regs_query_register_offset(const char *name); +extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n); + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten + * @offset: offset of the register. + * + * regs_get_register returns the value of a register whose offset from @regs. + * The @offset is the offset of the register in struct pt_regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + u64 val = 0; + + WARN_ON(offset & 7); + + offset >>= 3; + switch (offset) { + case 0 ... 30: + val = regs->regs[offset]; + break; + case offsetof(struct pt_regs, sp) >> 3: + val = regs->sp; + break; + case offsetof(struct pt_regs, pc) >> 3: + val = regs->pc; + break; + case offsetof(struct pt_regs, pstate) >> 3: + val = regs->pstate; + break; + default: + val = 0; + } + + return val; +} + +/* Valid only for Kernel mode traps. */ +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + static inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->regs[0]; @@ -158,8 +209,15 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) struct task_struct; int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); -#define instruction_pointer(regs) ((unsigned long)(regs)->pc) +#define GET_IP(regs) ((unsigned long)(regs)->pc) +#define SET_IP(regs, value) ((regs)->pc = ((u64) (value))) + +#define GET_FP(ptregs) ((unsigned long)(ptregs)->regs[29]) +#define SET_FP(ptregs, value) ((ptregs)->regs[29] = ((u64) (value))) + +#include <asm-generic/ptrace.h> +#undef profile_pc extern unsigned long profile_pc(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 751e901c8d37..cc06794b7346 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -98,11 +98,11 @@ SCTLR_ELx_SA | SCTLR_ELx_I) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_UCI (1 << 26) #define SCTLR_EL1_SPAN (1 << 23) #define SCTLR_EL1_SED (1 << 8) #define SCTLR_EL1_CP15BEN (1 << 5) - /* id_aa64isar0 */ #define ID_AA64ISAR0_RDM_SHIFT 28 #define ID_AA64ISAR0_ATOMICS_SHIFT 20 diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 0cc2f29bf9da..9cd03f3e812f 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -34,6 +34,8 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); +void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 9e397a542756..5e834d10b291 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -21,6 +21,7 @@ /* * User space memory access functions */ +#include <linux/kasan-checks.h> #include <linux/string.h> #include <linux/thread_info.h> @@ -256,15 +257,29 @@ do { \ -EFAULT; \ }) -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); +static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + kasan_check_write(to, n); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + kasan_check_read(from, n); + return __arch_copy_to_user(to, from, n); +} + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { + kasan_check_write(to, n); + if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); + n = __arch_copy_from_user(to, from, n); else /* security hole - plug it */ memset(to, 0, n); return n; @@ -272,8 +287,10 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { + kasan_check_read(from, n); + if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + n = __arch_copy_to_user(to, from, n); return n; } diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index de66199673d7..2b9a63771eda 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -22,6 +22,8 @@ struct vdso_data { __u64 cs_cycle_last; /* Timebase at clocksource init */ + __u64 raw_time_sec; /* Raw time */ + __u64 raw_time_nsec; __u64 xtime_clock_sec; /* Kernel time */ __u64 xtime_clock_nsec; __u64 xtime_coarse_sec; /* Coarse time */ @@ -29,8 +31,10 @@ struct vdso_data { __u64 wtm_clock_sec; /* Wall to monotonic time */ __u64 wtm_clock_nsec; __u32 tb_seq_count; /* Timebase sequence counter */ - __u32 cs_mult; /* Clocksource multiplier */ - __u32 cs_shift; /* Clocksource shift */ + /* cs_* members must be adjacent and in this order (ldp accesses) */ + __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ + __u32 cs_shift; /* Clocksource shift (mono = raw) */ + __u32 cs_raw_mult; /* Raw clocksource multiplier */ __u32 tz_minuteswest; /* Whacky timezone stuff */ __u32 tz_dsttime; __u32 use_syscall; diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index dcbcf8dcbefb..bbc6a8cf83f1 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -34,6 +34,11 @@ */ #define HVC_SET_VECTORS 1 +/* + * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine. + */ +#define HVC_SOFT_RESTART 2 + #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h new file mode 100644 index 000000000000..ec154e719b11 --- /dev/null +++ b/arch/arm64/include/asm/xen/xen-ops.h @@ -0,0 +1,6 @@ +#ifndef _ASM_XEN_OPS_H +#define _ASM_XEN_OPS_H + +void xen_efi_runtime_setup(void); + +#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2173149d8954..14f7b651c787 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -26,8 +26,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o entry32.o \ - ../../arm/kernel/opcodes.o + sys_compat.o entry32.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o @@ -42,16 +41,15 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ + cpu-reset.o -obj-y += $(arm64-obj-y) vdso/ +obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds - -# vDSO - this must be built first to generate the symbol offsets -$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h -$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c new file mode 100644 index 000000000000..f85149cc7c71 --- /dev/null +++ b/arch/arm64/kernel/acpi_numa.c @@ -0,0 +1,112 @@ +/* + * ACPI 5.1 based NUMA setup for ARM64 + * Lots of code was borrowed from arch/x86/mm/srat.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2016, Linaro Ltd. + * Author: Hanjun Guo <hanjun.guo@linaro.org> + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: NUMA: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/bootmem.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <acpi/processor.h> +#include <asm/numa.h> + +static int cpus_in_srat; + +struct __node_cpu_hwid { + u32 node_id; /* logical node containing this CPU */ + u64 cpu_hwid; /* MPIDR for this CPU */ +}; + +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = { +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} }; + +int acpi_numa_get_nid(unsigned int cpu, u64 hwid) +{ + int i; + + for (i = 0; i < cpus_in_srat; i++) { + if (hwid == early_node_cpu_hwid[i].cpu_hwid) + return early_node_cpu_hwid[i].node_id; + } + + return NUMA_NO_NODE; +} + +/* Callback for Proximity Domain -> ACPI processor UID mapping */ +void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) +{ + int pxm, node; + phys_cpuid_t mpidr; + + if (srat_disabled()) + return; + + if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) { + pr_err("SRAT: Invalid SRAT header length: %d\n", + pa->header.length); + bad_srat(); + return; + } + + if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) + return; + + if (cpus_in_srat >= NR_CPUS) { + pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n", + NR_CPUS); + return; + } + + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { + pr_err("SRAT: Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + mpidr = acpi_map_madt_entry(pa->acpi_processor_uid); + if (mpidr == PHYS_CPUID_INVALID) { + pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n", + pxm, pa->acpi_processor_uid); + bad_srat(); + return; + } + + early_node_cpu_hwid[cpus_in_srat].node_id = node; + early_node_cpu_hwid[cpus_in_srat].cpu_hwid = mpidr; + node_set(node, numa_nodes_parsed); + cpus_in_srat++; + pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d\n", + pxm, mpidr, node); +} + +int __init arm64_acpi_numa_init(void) +{ + int ret; + + ret = acpi_numa_init(); + if (ret) + return ret; + + return srat_disabled() ? -EINVAL : 0; +} diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 678f30b05a45..78f368039c79 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -27,6 +27,7 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/arm-smccc.h> +#include <linux/kprobes.h> #include <asm/checksum.h> @@ -34,8 +35,8 @@ EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); /* user mem (segment) */ -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(__arch_copy_from_user); +EXPORT_SYMBOL(__arch_copy_to_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_in_user); @@ -68,6 +69,7 @@ EXPORT_SYMBOL(test_and_change_bit); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); +NOKPROBE_SYMBOL(_mcount); #endif /* arm-smccc */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c37202c0c838..42ffdb54e162 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -121,7 +121,7 @@ static int run_all_cpu_set_hw_mode(struct insn_emulation *insn, bool enable) * 0 - If all the hooks ran successfully. * -EINVAL - At least one hook is not supported by the CPU. */ -static int run_all_insn_set_hw_mode(unsigned long cpu) +static int run_all_insn_set_hw_mode(unsigned int cpu) { int rc = 0; unsigned long flags; @@ -131,7 +131,7 @@ static int run_all_insn_set_hw_mode(unsigned long cpu) list_for_each_entry(insn, &insn_emulation, node) { bool enable = (insn->current_mode == INSN_HW); if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) { - pr_warn("CPU[%ld] cannot support the emulation of %s", + pr_warn("CPU[%u] cannot support the emulation of %s", cpu, insn->ops->name); rc = -EINVAL; } @@ -316,28 +316,6 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) */ #define TYPE_SWPB (1 << 22) -/* - * Set up process info to signal segmentation fault - called on access error. - */ -static void set_segfault(struct pt_regs *regs, unsigned long addr) -{ - siginfo_t info; - - down_read(¤t->mm->mmap_sem); - if (find_vma(current->mm, addr) == NULL) - info.si_code = SEGV_MAPERR; - else - info.si_code = SEGV_ACCERR; - up_read(¤t->mm->mmap_sem); - - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_addr = (void *) instruction_pointer(regs); - - pr_debug("SWP{B} emulation: access caused memory abort!\n"); - arm64_notify_die("Illegal memory access", regs, &info, 0); -} - static int emulate_swpX(unsigned int address, unsigned int *data, unsigned int type) { @@ -366,6 +344,21 @@ static int emulate_swpX(unsigned int address, unsigned int *data, return res; } +#define ARM_OPCODE_CONDITION_UNCOND 0xf + +static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) +{ + u32 cc_bits = opcode >> 28; + + if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) { + if ((*aarch32_opcode_cond_checks[cc_bits])(psr)) + return ARM_OPCODE_CONDTEST_PASS; + else + return ARM_OPCODE_CONDTEST_FAIL; + } + return ARM_OPCODE_CONDTEST_UNCOND; +} + /* * swp_handler logs the id of calling process, dissects the instruction, sanity * checks the memory location, calls emulate_swpX for the actual operation and @@ -380,7 +373,7 @@ static int swp_handler(struct pt_regs *regs, u32 instr) type = instr & TYPE_SWPB; - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: @@ -430,7 +423,8 @@ ret: return 0; fault: - set_segfault(regs, address); + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm64_notify_segfault(regs, address); return 0; } @@ -461,7 +455,7 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) { perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: @@ -617,20 +611,6 @@ static struct insn_emulation_ops setend_ops = { .set_hw_mode = setend_set_hw_mode, }; -static int insn_cpu_hotplug_notify(struct notifier_block *b, - unsigned long action, void *hcpu) -{ - int rc = 0; - if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING) - rc = run_all_insn_set_hw_mode((unsigned long)hcpu); - - return notifier_from_errno(rc); -} - -static struct notifier_block insn_cpu_hotplug_notifier = { - .notifier_call = insn_cpu_hotplug_notify, -}; - /* * Invoked as late_initcall, since not needed before init spawned. */ @@ -649,7 +629,9 @@ static int __init armv8_deprecated_init(void) pr_info("setend instruction emulation is not supported on the system"); } - register_cpu_notifier(&insn_cpu_hotplug_notifier); + cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING, + "AP_ARM64_ISNDEP_STARTING", + run_all_insn_set_hw_mode, NULL); register_insn_emulation_sysctl(ctl_abi); return 0; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 2f4ba774488a..05070b72fc28 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -51,6 +51,17 @@ int main(void) DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); + DEFINE(S_X8, offsetof(struct pt_regs, regs[8])); + DEFINE(S_X10, offsetof(struct pt_regs, regs[10])); + DEFINE(S_X12, offsetof(struct pt_regs, regs[12])); + DEFINE(S_X14, offsetof(struct pt_regs, regs[14])); + DEFINE(S_X16, offsetof(struct pt_regs, regs[16])); + DEFINE(S_X18, offsetof(struct pt_regs, regs[18])); + DEFINE(S_X20, offsetof(struct pt_regs, regs[20])); + DEFINE(S_X22, offsetof(struct pt_regs, regs[22])); + DEFINE(S_X24, offsetof(struct pt_regs, regs[24])); + DEFINE(S_X26, offsetof(struct pt_regs, regs[26])); + DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); #ifdef CONFIG_COMPAT @@ -78,6 +89,7 @@ int main(void) BLANK(); DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); @@ -85,6 +97,8 @@ int main(void) DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); BLANK(); DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); + DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); + DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec)); DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); @@ -92,7 +106,8 @@ int main(void) DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult)); + DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); + DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult)); DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S new file mode 100644 index 000000000000..65f42d257414 --- /dev/null +++ b/arch/arm64/kernel/cpu-reset.S @@ -0,0 +1,54 @@ +/* + * CPU reset routines + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2015 Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/sysreg.h> +#include <asm/virt.h> + +.text +.pushsection .idmap.text, "ax" + +/* + * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for + * cpu_soft_restart. + * + * @el2_switch: Flag to indicate a swich to EL2 is needed. + * @entry: Location to jump to for soft reset. + * arg0: First argument passed to @entry. + * arg1: Second argument passed to @entry. + * arg2: Third argument passed to @entry. + * + * Put the CPU into the same state as it would be if it had been reset, and + * branch to what would be the reset vector. It must be executed with the + * flat identity mapping. + */ +ENTRY(__cpu_soft_restart) + /* Clear sctlr_el1 flags. */ + mrs x12, sctlr_el1 + ldr x13, =SCTLR_ELx_FLAGS + bic x12, x12, x13 + msr sctlr_el1, x12 + isb + + cbz x0, 1f // el2_switch? + mov x0, #HVC_SOFT_RESTART + hvc #0 // no return + +1: mov x18, x1 // entry + mov x0, x2 // arg0 + mov x1, x3 // arg1 + mov x2, x4 // arg2 + br x18 +ENDPROC(__cpu_soft_restart) + +.popsection diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h new file mode 100644 index 000000000000..d4e9ecb264f0 --- /dev/null +++ b/arch/arm64/kernel/cpu-reset.h @@ -0,0 +1,34 @@ +/* + * CPU reset routines + * + * Copyright (C) 2015 Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ARM64_CPU_RESET_H +#define _ARM64_CPU_RESET_H + +#include <asm/virt.h> + +void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, + unsigned long arg0, unsigned long arg1, unsigned long arg2); + +static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, + unsigned long entry, unsigned long arg0, unsigned long arg1, + unsigned long arg2) +{ + typeof(__cpu_soft_restart) *restart; + + el2_switch = el2_switch && !is_kernel_in_hyp_mode() && + is_hyp_mode_available(); + restart = (void *)virt_to_phys(__cpu_soft_restart); + + cpu_install_idmap(); + restart(el2_switch, entry, arg0, arg1, arg2); + unreachable(); +} + +#endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index af716b65110d..82b0fc2e637b 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -46,6 +46,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), + .enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_819472 @@ -54,6 +55,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), + .enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_832075 @@ -133,3 +135,8 @@ void check_local_cpu_errata(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); } + +void __init enable_errata_workarounds(void) +{ + enable_cpu_capabilities(arm64_errata); +} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 811773d1c1d0..916d27ad79c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -913,8 +913,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void __init -enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) if (caps->enable && cpus_have_cap(caps->capability)) @@ -1036,6 +1035,7 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); + enable_errata_workarounds(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index e11857fce05f..75a0f8acef66 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -9,13 +9,16 @@ * published by the Free Software Foundation. */ +#include <linux/acpi.h> +#include <linux/cpuidle.h> +#include <linux/cpu_pm.h> #include <linux/of.h> #include <linux/of_device.h> #include <asm/cpuidle.h> #include <asm/cpu_ops.h> -int __init arm_cpuidle_init(unsigned int cpu) +int arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; @@ -39,3 +42,18 @@ int arm_cpuidle_suspend(int index) return cpu_ops[cpu]->cpu_suspend(index); } + +#ifdef CONFIG_ACPI + +#include <acpi/processor.h> + +int acpi_processor_ffh_lpi_probe(unsigned int cpu) +{ + return arm_cpuidle_init(cpu); +} + +int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) +{ + return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index); +} +#endif diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index c173d329397f..ed1b84fe6925 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -183,6 +183,123 @@ const struct seq_operations cpuinfo_op = { .show = c_show }; + +static struct kobj_type cpuregs_kobj_type = { + .sysfs_ops = &kobj_sysfs_ops, +}; + +/* + * The ARM ARM uses the phrase "32-bit register" to describe a register + * whose upper 32 bits are RES0 (per C5.1.1, ARM DDI 0487A.i), however + * no statement is made as to whether the upper 32 bits will or will not + * be made use of in future, and between ARM DDI 0487A.c and ARM DDI + * 0487A.d CLIDR_EL1 was expanded from 32-bit to 64-bit. + * + * Thus, while both MIDR_EL1 and REVIDR_EL1 are described as 32-bit + * registers, we expose them both as 64 bit values to cater for possible + * future expansion without an ABI break. + */ +#define kobj_to_cpuinfo(kobj) container_of(kobj, struct cpuinfo_arm64, kobj) +#define CPUREGS_ATTR_RO(_name, _field) \ + static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ + { \ + struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj); \ + \ + if (info->reg_midr) \ + return sprintf(buf, "0x%016x\n", info->reg_##_field); \ + else \ + return 0; \ + } \ + static struct kobj_attribute cpuregs_attr_##_name = __ATTR_RO(_name) + +CPUREGS_ATTR_RO(midr_el1, midr); +CPUREGS_ATTR_RO(revidr_el1, revidr); + +static struct attribute *cpuregs_id_attrs[] = { + &cpuregs_attr_midr_el1.attr, + &cpuregs_attr_revidr_el1.attr, + NULL +}; + +static struct attribute_group cpuregs_attr_group = { + .attrs = cpuregs_id_attrs, + .name = "identification" +}; + +static int cpuid_add_regs(int cpu) +{ + int rc; + struct device *dev; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + dev = get_cpu_device(cpu); + if (!dev) { + rc = -ENODEV; + goto out; + } + rc = kobject_add(&info->kobj, &dev->kobj, "regs"); + if (rc) + goto out; + rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group); + if (rc) + kobject_del(&info->kobj); +out: + return rc; +} + +static int cpuid_remove_regs(int cpu) +{ + struct device *dev; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + dev = get_cpu_device(cpu); + if (!dev) + return -ENODEV; + if (info->kobj.parent) { + sysfs_remove_group(&info->kobj, &cpuregs_attr_group); + kobject_del(&info->kobj); + } + + return 0; +} + +static int cpuid_callback(struct notifier_block *nb, + unsigned long action, void *hcpu) +{ + int rc = 0; + unsigned long cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + rc = cpuid_add_regs(cpu); + break; + case CPU_DEAD: + rc = cpuid_remove_regs(cpu); + break; + } + + return notifier_from_errno(rc); +} + +static int __init cpuinfo_regs_init(void) +{ + int cpu; + + cpu_notifier_register_begin(); + + for_each_possible_cpu(cpu) { + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + kobject_init(&info->kobj, &cpuregs_kobj_type); + if (cpu_online(cpu)) + cpuid_add_regs(cpu); + } + __hotcpu_notifier(cpuid_callback, 0); + + cpu_notifier_register_done(); + return 0; +} static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) { unsigned int cpu = smp_processor_id(); @@ -212,6 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_ctr = read_cpuid_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); + info->reg_revidr = read_cpuid(REVIDR_EL1); info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); @@ -264,3 +382,5 @@ void __init cpuinfo_store_boot_cpu(void) boot_cpu_data = *info; init_cpu_features(&boot_cpu_data); } + +device_initcall(cpuinfo_regs_init); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 4fbf3c54275c..91fff48d0f57 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -23,6 +23,7 @@ #include <linux/hardirq.h> #include <linux/init.h> #include <linux/ptrace.h> +#include <linux/kprobes.h> #include <linux/stat.h> #include <linux/uaccess.h> @@ -48,6 +49,7 @@ static void mdscr_write(u32 mdscr) asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); local_dbg_restore(flags); } +NOKPROBE_SYMBOL(mdscr_write); static u32 mdscr_read(void) { @@ -55,6 +57,7 @@ static u32 mdscr_read(void) asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); return mdscr; } +NOKPROBE_SYMBOL(mdscr_read); /* * Allow root to disable self-hosted debug from userspace. @@ -103,6 +106,7 @@ void enable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(enable_debug_monitors); void disable_debug_monitors(enum dbg_active_el el) { @@ -123,6 +127,7 @@ void disable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(disable_debug_monitors); /* * OS lock clearing. @@ -151,7 +156,6 @@ static int debug_monitors_init(void) /* Clear the OS lock. */ on_each_cpu(clear_os_lock, NULL, 1); isb(); - local_dbg_enable(); /* Register hotplug handler. */ __register_cpu_notifier(&os_lock_nb); @@ -166,22 +170,15 @@ postcore_initcall(debug_monitors_init); */ static void set_regs_spsr_ss(struct pt_regs *regs) { - unsigned long spsr; - - spsr = regs->pstate; - spsr &= ~DBG_SPSR_SS; - spsr |= DBG_SPSR_SS; - regs->pstate = spsr; + regs->pstate |= DBG_SPSR_SS; } +NOKPROBE_SYMBOL(set_regs_spsr_ss); static void clear_regs_spsr_ss(struct pt_regs *regs) { - unsigned long spsr; - - spsr = regs->pstate; - spsr &= ~DBG_SPSR_SS; - regs->pstate = spsr; + regs->pstate &= ~DBG_SPSR_SS; } +NOKPROBE_SYMBOL(clear_regs_spsr_ss); /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); @@ -225,6 +222,7 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) return retval; } +NOKPROBE_SYMBOL(call_step_hook); static void send_user_sigtrap(int si_code) { @@ -266,6 +264,10 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { +#ifdef CONFIG_KPROBES + if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return 0; +#endif if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) return 0; @@ -279,6 +281,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } +NOKPROBE_SYMBOL(single_step_handler); /* * Breakpoint handler is re-entrant as another breakpoint can @@ -316,19 +319,28 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) return fn ? fn(regs, esr) : DBG_HOOK_ERROR; } +NOKPROBE_SYMBOL(call_break_hook); static int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { if (user_mode(regs)) { send_user_sigtrap(TRAP_BRKPT); - } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { - pr_warning("Unexpected kernel BRK exception at EL1\n"); + } +#ifdef CONFIG_KPROBES + else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) { + if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED) + return -EFAULT; + } +#endif + else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warn("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; } return 0; } +NOKPROBE_SYMBOL(brk_handler); int aarch32_break_handler(struct pt_regs *regs) { @@ -365,6 +377,7 @@ int aarch32_break_handler(struct pt_regs *regs) send_user_sigtrap(TRAP_BRKPT); return 0; } +NOKPROBE_SYMBOL(aarch32_break_handler); static int __init debug_traps_init(void) { @@ -386,6 +399,7 @@ void user_rewind_single_step(struct task_struct *task) if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { @@ -401,6 +415,7 @@ void kernel_enable_single_step(struct pt_regs *regs) mdscr_write(mdscr_read() | DBG_MDSCR_SS); enable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_enable_single_step); void kernel_disable_single_step(void) { @@ -408,12 +423,14 @@ void kernel_disable_single_step(void) mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); disable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_disable_single_step); int kernel_active_single_step(void) { WARN_ON(!irqs_disabled()); return mdscr_read() & DBG_MDSCR_SS; } +NOKPROBE_SYMBOL(kernel_active_single_step); /* ptrace API */ void user_enable_single_step(struct task_struct *task) @@ -421,8 +438,10 @@ void user_enable_single_step(struct task_struct *task) set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_enable_single_step); void user_disable_single_step(struct task_struct *task) { clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); } +NOKPROBE_SYMBOL(user_disable_single_step); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 78f52488f9ff..ba9bee389fd5 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -62,13 +62,61 @@ struct screen_info screen_info __section(.data); int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { pteval_t prot_val = create_mapping_protection(md); + bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); + + if (!PAGE_ALIGNED(md->phys_addr) || + !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { + /* + * If the end address of this region is not aligned to page + * size, the mapping is rounded up, and may end up sharing a + * page frame with the next UEFI memory region. If we create + * a block entry now, we may need to split it again when mapping + * the next region, and support for that is going to be removed + * from the MMU routines. So avoid block mappings altogether in + * that case. + */ + allow_block_mappings = false; + } create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, - __pgprot(prot_val | PTE_NG)); + __pgprot(prot_val | PTE_NG), allow_block_mappings); + return 0; +} + +static int __init set_permissions(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + efi_memory_desc_t *md = data; + pte_t pte = *ptep; + + if (md->attribute & EFI_MEMORY_RO) + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + if (md->attribute & EFI_MEMORY_XP) + pte = set_pte_bit(pte, __pgprot(PTE_PXN)); + set_pte(ptep, pte); return 0; } +int __init efi_set_mapping_permissions(struct mm_struct *mm, + efi_memory_desc_t *md) +{ + BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); + + /* + * Calling apply_to_page_range() is only safe on regions that are + * guaranteed to be mapped down to pages. Since we are only called + * for regions that have been mapped using efi_create_mapping() above + * (and this is checked by the generic Memory Attributes table parsing + * routines), there is no need to check that again here. + */ + return apply_to_page_range(mm, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + set_permissions, md); +} + static int __init arm64_dmi_init(void) { /* diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6c3b7345a6c4..96e4a2b64cc1 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -258,6 +258,7 @@ tsk .req x28 // current thread_info /* * Exception vectors. */ + .pushsection ".entry.text", "ax" .align 11 ENTRY(vectors) @@ -466,7 +467,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - b.eq el0_undef + b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception @@ -547,7 +548,7 @@ el0_ia: enable_dbg_and_irq ct_user_exit mov x0, x26 - orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts + mov x1, x25 mov x2, sp bl do_mem_abort b ret_to_user @@ -594,6 +595,16 @@ el0_undef: mov x0, sp bl do_undefinstr b ret_to_user +el0_sys: + /* + * System instructions, for trapped cache maintenance instructions + */ + enable_dbg_and_irq + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_sysinstr + b ret_to_user el0_dbg: /* * Debug exception handling @@ -789,6 +800,8 @@ __ni_sys_trace: bl do_ni_syscall b __sys_trace_return + .popsection // .entry.text + /* * Special system call wrappers. */ diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index ce21aa88263f..26a6bf77d272 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> +#include <linux/kprobes.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/smp.h> @@ -127,6 +128,7 @@ static u64 read_wb_reg(int reg, int n) return val; } +NOKPROBE_SYMBOL(read_wb_reg); static void write_wb_reg(int reg, int n, u64 val) { @@ -140,6 +142,7 @@ static void write_wb_reg(int reg, int n, u64 val) } isb(); } +NOKPROBE_SYMBOL(write_wb_reg); /* * Convert a breakpoint privilege level to the corresponding exception @@ -157,6 +160,7 @@ static enum dbg_active_el debug_exception_level(int privilege) return -EINVAL; } } +NOKPROBE_SYMBOL(debug_exception_level); enum hw_breakpoint_ops { HW_BREAKPOINT_INSTALL, @@ -575,6 +579,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable) write_wb_reg(reg, i, ctrl); } } +NOKPROBE_SYMBOL(toggle_bp_registers); /* * Debug exception handlers. @@ -654,6 +659,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(breakpoint_handler); static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -756,6 +762,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(watchpoint_handler); /* * Handle single-step exception. @@ -813,6 +820,7 @@ int reinstall_suspended_bps(struct pt_regs *regs) return !handled_exception; } +NOKPROBE_SYMBOL(reinstall_suspended_bps); /* * Context-switcher for restoring suspended breakpoints. diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 8727f4490772..d3b5f75e652e 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -71,8 +71,16 @@ el1_sync: msr vbar_el2, x1 b 9f +2: cmp x0, #HVC_SOFT_RESTART + b.ne 3f + mov x0, x2 + mov x2, x4 + mov x4, x1 + mov x1, x3 + br x4 // no return + /* Someone called kvm_call_hyp() against the hyp-stub... */ -2: mov x0, #ARM_EXCEPTION_HYP_GONE +3: mov x0, #ARM_EXCEPTION_HYP_GONE 9: eret ENDPROC(el1_sync) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 368c08290dd8..63f9432d05e8 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -30,6 +30,7 @@ #include <asm/cacheflush.h> #include <asm/debug-monitors.h> #include <asm/fixmap.h> +#include <asm/opcodes.h> #include <asm/insn.h> #define AARCH64_INSN_SF_BIT BIT(31) @@ -162,6 +163,32 @@ static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) aarch64_insn_is_nop(insn); } +bool __kprobes aarch64_insn_uses_literal(u32 insn) +{ + /* ldr/ldrsw (literal), prfm */ + + return aarch64_insn_is_ldr_lit(insn) || + aarch64_insn_is_ldrsw_lit(insn) || + aarch64_insn_is_adr_adrp(insn) || + aarch64_insn_is_prfm_lit(insn); +} + +bool __kprobes aarch64_insn_is_branch(u32 insn) +{ + /* b, bl, cb*, tb*, b.cond, br, blr */ + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn) || + aarch64_insn_is_ret(insn) || + aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_bcond(insn); +} + /* * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a * Section B2.6.5 "Concurrent modification and execution of instructions": @@ -1175,6 +1202,14 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset) BUG(); } +/* + * Extract the Op/CR data from a msr/mrs instruction. + */ +u32 aarch64_insn_extract_system_reg(u32 insn) +{ + return (insn & 0x1FFFE0) >> 5; +} + bool aarch32_insn_is_wide(u32 insn) { return insn >= 0xe800; @@ -1200,3 +1235,101 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn) { return insn & CRM_MASK; } + +static bool __kprobes __check_eq(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) != 0; +} + +static bool __kprobes __check_ne(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) == 0; +} + +static bool __kprobes __check_cs(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_cc(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_mi(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_pl(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_vs(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) != 0; +} + +static bool __kprobes __check_vc(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) == 0; +} + +static bool __kprobes __check_hi(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_ls(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_ge(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_lt(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_gt(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_le(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_al(unsigned long pstate) +{ + return true; +} + +/* + * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that + * it behaves identically to 0b1110 ("al"). + */ +pstate_check_t * const aarch32_opcode_cond_checks[16] = { + __check_eq, __check_ne, __check_cs, __check_cc, + __check_mi, __check_pl, __check_vs, __check_vc, + __check_hi, __check_ls, __check_ge, __check_lt, + __check_gt, __check_le, __check_al, __check_al +}; diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index b5f063e5eff7..8c57f6496e56 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -22,6 +22,7 @@ #include <linux/irq.h> #include <linux/kdebug.h> #include <linux/kgdb.h> +#include <linux/kprobes.h> #include <asm/traps.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -230,6 +231,7 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } +NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { @@ -238,12 +240,14 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) return 0; } +NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } +NOKPROBE_SYMBOL(kgdb_step_brk_fn); static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c new file mode 100644 index 000000000000..bc96c8a7fc79 --- /dev/null +++ b/arch/arm64/kernel/machine_kexec.c @@ -0,0 +1,212 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kexec.h> +#include <linux/smp.h> + +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/mmu_context.h> + +#include "cpu-reset.h" + +/* Global variables for the arm64_relocate_new_kernel routine. */ +extern const unsigned char arm64_relocate_new_kernel[]; +extern const unsigned long arm64_relocate_new_kernel_size; + +static unsigned long kimage_start; + +/** + * kexec_image_info - For debugging output. + */ +#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i) +static void _kexec_image_info(const char *func, int line, + const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("%s:%d:\n", func, line); + pr_debug(" kexec kimage info:\n"); + pr_debug(" type: %d\n", kimage->type); + pr_debug(" start: %lx\n", kimage->start); + pr_debug(" head: %lx\n", kimage->head); + pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", + i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz, + kimage->segment[i].memsz, + kimage->segment[i].memsz / PAGE_SIZE); + } +} + +void machine_kexec_cleanup(struct kimage *kimage) +{ + /* Empty routine needed to avoid build errors. */ +} + +/** + * machine_kexec_prepare - Prepare for a kexec reboot. + * + * Called from the core kexec code when a kernel image is loaded. + * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus + * are stuck in the kernel. This avoids a panic once we hit machine_kexec(). + */ +int machine_kexec_prepare(struct kimage *kimage) +{ + kimage_start = kimage->start; + + kexec_image_info(kimage); + + if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { + pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); + return -EBUSY; + } + + return 0; +} + +/** + * kexec_list_flush - Helper to flush the kimage list and source pages to PoC. + */ +static void kexec_list_flush(struct kimage *kimage) +{ + kimage_entry_t *entry; + + for (entry = &kimage->head; ; entry++) { + unsigned int flag; + void *addr; + + /* flush the list entries. */ + __flush_dcache_area(entry, sizeof(kimage_entry_t)); + + flag = *entry & IND_FLAGS; + if (flag == IND_DONE) + break; + + addr = phys_to_virt(*entry & PAGE_MASK); + + switch (flag) { + case IND_INDIRECTION: + /* Set entry point just before the new list page. */ + entry = (kimage_entry_t *)addr - 1; + break; + case IND_SOURCE: + /* flush the source pages. */ + __flush_dcache_area(addr, PAGE_SIZE); + break; + case IND_DESTINATION: + break; + default: + BUG(); + } + } +} + +/** + * kexec_segment_flush - Helper to flush the kimage segments to PoC. + */ +static void kexec_segment_flush(const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("%s:\n", __func__); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", + i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz, + kimage->segment[i].memsz, + kimage->segment[i].memsz / PAGE_SIZE); + + __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), + kimage->segment[i].memsz); + } +} + +/** + * machine_kexec - Do the kexec reboot. + * + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. + */ +void machine_kexec(struct kimage *kimage) +{ + phys_addr_t reboot_code_buffer_phys; + void *reboot_code_buffer; + + /* + * New cpus may have become stuck_in_kernel after we loaded the image. + */ + BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1)); + + reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); + reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); + + kexec_image_info(kimage); + + pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__, + kimage->control_code_page); + pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__, + &reboot_code_buffer_phys); + pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__, + reboot_code_buffer); + pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__, + arm64_relocate_new_kernel); + pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n", + __func__, __LINE__, arm64_relocate_new_kernel_size, + arm64_relocate_new_kernel_size); + + /* + * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use + * after the kernel is shut down. + */ + memcpy(reboot_code_buffer, arm64_relocate_new_kernel, + arm64_relocate_new_kernel_size); + + /* Flush the reboot_code_buffer in preparation for its execution. */ + __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); + flush_icache_range((uintptr_t)reboot_code_buffer, + arm64_relocate_new_kernel_size); + + /* Flush the kimage list and its buffers. */ + kexec_list_flush(kimage); + + /* Flush the new image if already in place. */ + if (kimage->head & IND_DONE) + kexec_segment_flush(kimage); + + pr_info("Bye!\n"); + + /* Disable all DAIF exceptions. */ + asm volatile ("msr daifset, #0xf" : : : "memory"); + + /* + * cpu_soft_restart will shutdown the MMU, disable data caches, then + * transfer control to the reboot_code_buffer which contains a copy of + * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel + * uses physical addressing to relocate the new image to its final + * position and transfers control to the image entry point when the + * relocation is complete. + */ + + cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, + kimage_start, 0); + + BUG(); /* Should never get here. */ +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* Empty routine needed to avoid build errors. */ +} diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile new file mode 100644 index 000000000000..ce06312e3d34 --- /dev/null +++ b/arch/arm64/kernel/probes/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \ + kprobes_trampoline.o \ + simulate-insn.o diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c new file mode 100644 index 000000000000..37e47a9d617e --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -0,0 +1,174 @@ +/* + * arch/arm64/kernel/probes/decode-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <asm/kprobes.h> +#include <asm/insn.h> +#include <asm/sections.h> + +#include "decode-insn.h" +#include "simulate-insn.h" + +static bool __kprobes aarch64_insn_is_steppable(u32 insn) +{ + /* + * Branch instructions will write a new value into the PC which is + * likely to be relative to the XOL address and therefore invalid. + * Deliberate generation of an exception during stepping is also not + * currently safe. Lastly, MSR instructions can do any number of nasty + * things we can't handle during single-stepping. + */ + if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) { + if (aarch64_insn_is_branch(insn) || + aarch64_insn_is_msr_imm(insn) || + aarch64_insn_is_msr_reg(insn) || + aarch64_insn_is_exception(insn) || + aarch64_insn_is_eret(insn)) + return false; + + /* + * The MRS instruction may not return a correct value when + * executing in the single-stepping environment. We do make one + * exception, for reading the DAIF bits. + */ + if (aarch64_insn_is_mrs(insn)) + return aarch64_insn_extract_system_reg(insn) + != AARCH64_INSN_SPCLREG_DAIF; + + /* + * The HINT instruction is is problematic when single-stepping, + * except for the NOP case. + */ + if (aarch64_insn_is_hint(insn)) + return aarch64_insn_is_nop(insn); + + return true; + } + + /* + * Instructions which load PC relative literals are not going to work + * when executed from an XOL slot. Instructions doing an exclusive + * load/store are not going to complete successfully when single-step + * exception handling happens in the middle of the sequence. + */ + if (aarch64_insn_uses_literal(insn) || + aarch64_insn_is_exclusive(insn)) + return false; + + return true; +} + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD If instruction is supported and uses instruction slot, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. + */ +static enum kprobe_insn __kprobes +arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* + * Instructions reading or modifying the PC won't work from the XOL + * slot. + */ + if (aarch64_insn_is_steppable(insn)) + return INSN_GOOD; + + if (aarch64_insn_is_bcond(insn)) { + asi->handler = simulate_b_cond; + } else if (aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn)) { + asi->handler = simulate_cbz_cbnz; + } else if (aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn)) { + asi->handler = simulate_tbz_tbnz; + } else if (aarch64_insn_is_adr_adrp(insn)) { + asi->handler = simulate_adr_adrp; + } else if (aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn)) { + asi->handler = simulate_b_bl; + } else if (aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_ret(insn)) { + asi->handler = simulate_br_blr_ret; + } else if (aarch64_insn_is_ldr_lit(insn)) { + asi->handler = simulate_ldr_literal; + } else if (aarch64_insn_is_ldrsw_lit(insn)) { + asi->handler = simulate_ldrsw_literal; + } else { + /* + * Instruction cannot be stepped out-of-line and we don't + * (yet) simulate it. + */ + return INSN_REJECTED; + } + + return INSN_GOOD_NO_SLOT; +} + +static bool __kprobes +is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) +{ + while (scan_start > scan_end) { + /* + * atomic region starts from exclusive load and ends with + * exclusive store. + */ + if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start))) + return false; + else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start))) + return true; + scan_start--; + } + + return false; +} + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) +{ + enum kprobe_insn decoded; + kprobe_opcode_t insn = le32_to_cpu(*addr); + kprobe_opcode_t *scan_start = addr - 1; + kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + struct module *mod; +#endif + + if (addr >= (kprobe_opcode_t *)_text && + scan_end < (kprobe_opcode_t *)_text) + scan_end = (kprobe_opcode_t *)_text; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + else { + preempt_disable(); + mod = __module_address((unsigned long)addr); + if (mod && within_module_init((unsigned long)addr, mod) && + !within_module_init((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->init_layout.base; + else if (mod && within_module_core((unsigned long)addr, mod) && + !within_module_core((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->core_layout.base; + preempt_enable(); + } +#endif + decoded = arm_probe_decode_insn(insn, asi); + + if (decoded == INSN_REJECTED || + is_probed_address_atomic(scan_start, scan_end)) + return INSN_REJECTED; + + return decoded; +} diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h new file mode 100644 index 000000000000..d438289646a6 --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -0,0 +1,35 @@ +/* + * arch/arm64/kernel/probes/decode-insn.h + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_ARM64_H +#define _ARM_KERNEL_KPROBES_ARM64_H + +/* + * ARM strongly recommends a limit of 128 bytes between LoadExcl and + * StoreExcl instructions in a single thread of execution. So keep the + * max atomic context size as 32. + */ +#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t)) + +enum kprobe_insn { + INSN_REJECTED, + INSN_GOOD_NO_SLOT, + INSN_GOOD, +}; + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi); + +#endif /* _ARM_KERNEL_KPROBES_ARM64_H */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c new file mode 100644 index 000000000000..bf9768588288 --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes.c @@ -0,0 +1,686 @@ +/* + * arch/arm64/kernel/probes/kprobes.c + * + * Kprobes support for ARM64 + * + * Copyright (C) 2013 Linaro Limited. + * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> +#include <linux/stringify.h> +#include <asm/traps.h> +#include <asm/ptrace.h> +#include <asm/cacheflush.h> +#include <asm/debug-monitors.h> +#include <asm/system_misc.h> +#include <asm/insn.h> +#include <asm/uaccess.h> +#include <asm/irq.h> +#include <asm-generic/sections.h> + +#include "decode-insn.h" + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); + +static inline unsigned long min_stack_size(unsigned long addr) +{ + unsigned long size; + + if (on_irq_stack(addr, raw_smp_processor_id())) + size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr; + else + size = (unsigned long)current_thread_info() + THREAD_START_SP - addr; + + return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack)); +} + +static void __kprobes arch_prepare_ss_slot(struct kprobe *p) +{ + /* prepare insn slot */ + p->ainsn.insn[0] = cpu_to_le32(p->opcode); + + flush_icache_range((uintptr_t) (p->ainsn.insn), + (uintptr_t) (p->ainsn.insn) + + MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + + /* + * Needs restoring of return address after stepping xol. + */ + p->ainsn.restore = (unsigned long) p->addr + + sizeof(kprobe_opcode_t); +} + +static void __kprobes arch_prepare_simulate(struct kprobe *p) +{ + /* This instructions is not executed xol. No need to adjust the PC */ + p->ainsn.restore = 0; +} + +static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (p->ainsn.handler) + p->ainsn.handler((u32)p->opcode, (long)p->addr, regs); + + /* single step simulated, now go for post processing */ + post_kprobe_handler(kcb, regs); +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long probe_addr = (unsigned long)p->addr; + extern char __start_rodata[]; + extern char __end_rodata[]; + + if (probe_addr & 0x3) + return -EINVAL; + + /* copy instruction */ + p->opcode = le32_to_cpu(*p->addr); + + if (in_exception_text(probe_addr)) + return -EINVAL; + if (probe_addr >= (unsigned long) __start_rodata && + probe_addr <= (unsigned long) __end_rodata) + return -EINVAL; + + /* decode instruction */ + switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) { + case INSN_REJECTED: /* insn not supported */ + return -EINVAL; + + case INSN_GOOD_NO_SLOT: /* insn need simulation */ + p->ainsn.insn = NULL; + break; + + case INSN_GOOD: /* instruction uses slot */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + break; + }; + + /* prepare the instruction */ + if (p->ainsn.insn) + arch_prepare_ss_slot(p); + else + arch_prepare_simulate(p); + + return 0; +} + +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = (void *)addr; + insns[0] = (u32)opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + +/* arm kprobe: install breakpoint in text */ +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, BRK64_OPCODE_KPROBES); +} + +/* disarm kprobe: remove breakpoint from text */ +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, p->opcode); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + +/* + * The D-flag (Debug mask) is set (masked) upon debug exception entry. + * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive + * probe i.e. when probe hit from kprobe handler context upon + * executing the pre/post handlers. In this case we return with + * D-flag clear so that single-stepping can be carried-out. + * + * Leave D-flag set in all other cases. + */ +static void __kprobes +spsr_set_debug_flag(struct pt_regs *regs, int mask) +{ + unsigned long spsr = regs->pstate; + + if (mask) + spsr |= PSR_D_BIT; + else + spsr &= ~PSR_D_BIT; + + regs->pstate = spsr; +} + +/* + * Interrupts need to be disabled before single-step mode is set, and not + * reenabled until after single-step mode ends. + * Without disabling interrupt on local CPU, there is a chance of + * interrupt occurrence in the period of exception return and start of + * out-of-line single-step, that result in wrongly single stepping + * into the interrupt handler. + */ +static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + kcb->saved_irqflag = regs->pstate; + regs->pstate |= PSR_I_BIT; +} + +static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + if (kcb->saved_irqflag & PSR_I_BIT) + regs->pstate |= PSR_I_BIT; + else + regs->pstate &= ~PSR_I_BIT; +} + +static void __kprobes +set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + kcb->ss_ctx.ss_pending = true; + kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t); +} + +static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb) +{ + kcb->ss_ctx.ss_pending = false; + kcb->ss_ctx.match_addr = 0; +} + +static void __kprobes setup_singlestep(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + unsigned long slot; + + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + + + if (p->ainsn.insn) { + /* prepare for single stepping */ + slot = (unsigned long)p->ainsn.insn; + + set_ss_context(kcb, slot); /* mark pending ss */ + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 0); + else + WARN_ON(regs->pstate & PSR_D_BIT); + + /* IRQs and single stepping do not mix well. */ + kprobes_save_local_irqflag(kcb, regs); + kernel_enable_single_step(regs); + instruction_pointer_set(regs, slot); + } else { + /* insn simulation */ + arch_simulate_insn(p, regs); + } +} + +static int __kprobes reenter_kprobe(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + break; + default: + WARN_ON(1); + return 0; + } + + return 1; +} + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + + if (!cur) + return; + + /* return addr restore if non-branching insn */ + if (cur->ainsn.restore != 0) + instruction_pointer_set(regs, cur->ainsn.restore); + + /* restore back original saved kprobe variables and continue */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + return; + } + /* call post handler */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) { + /* post_handler can hit breakpoint and single step + * again, so we enable D-flag for recursive exception. + */ + cur->post_handler(cur, regs, 0); + } + + reset_current_kprobe(); +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + switch (kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + instruction_pointer_set(regs, (unsigned long) cur->addr); + if (!instruction_pointer(regs)) + BUG(); + + kernel_disable_single_step(); + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, fsr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + } + return 0; +} + +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +static void __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p, *cur_kprobe; + struct kprobe_ctlblk *kcb; + unsigned long addr = instruction_pointer(regs); + + kcb = get_kprobe_ctlblk(); + cur_kprobe = kprobe_running(); + + p = get_kprobe((kprobe_opcode_t *) addr); + + if (p) { + if (cur_kprobe) { + if (reenter_kprobe(p, regs, kcb)) + return; + } else { + /* Probe hit */ + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry, + * so get out doing nothing more here. + * + * pre_handler can hit a breakpoint and can step thru + * before return, keep PSTATE D-flag enabled until + * pre_handler return back. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) { + setup_singlestep(p, regs, kcb, 0); + return; + } + } + } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) == + BRK64_OPCODE_KPROBES) && cur_kprobe) { + /* We probably hit a jprobe. Call its break handler. */ + if (cur_kprobe->break_handler && + cur_kprobe->break_handler(cur_kprobe, regs)) { + setup_singlestep(cur_kprobe, regs, kcb, 0); + return; + } + } + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * Return back to original instruction, and continue. + */ +} + +static int __kprobes +kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + if ((kcb->ss_ctx.ss_pending) + && (kcb->ss_ctx.match_addr == addr)) { + clear_ss_context(kcb); /* clear pending ss */ + return DBG_HOOK_HANDLED; + } + /* not ours, kprobes should ignore it */ + return DBG_HOOK_ERROR; +} + +int __kprobes +kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + int retval; + + /* return error if this is not our step */ + retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); + + if (retval == DBG_HOOK_HANDLED) { + kprobes_restore_local_irqflag(kcb, regs); + kernel_disable_single_step(); + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + post_kprobe_handler(kcb, regs); + } + + return retval; +} + +int __kprobes +kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) +{ + kprobe_handler(regs); + return DBG_HOOK_HANDLED; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + long stack_ptr = kernel_stack_pointer(regs); + + kcb->jprobe_saved_regs = *regs; + /* + * As Linus pointed out, gcc assumes that the callee + * owns the argument space and could overwrite it, e.g. + * tailcall optimization. So, to be absolutely safe + * we also save and restore enough stack bytes to cover + * the argument area. + */ + kasan_disable_current(); + memcpy(kcb->jprobes_stack, (void *)stack_ptr, + min_stack_size(stack_ptr)); + kasan_enable_current(); + + instruction_pointer_set(regs, (unsigned long) jp->entry); + preempt_disable(); + pause_graph_tracing(); + return 1; +} + +void __kprobes jprobe_return(void) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + /* + * Jprobe handler return by entering break exception, + * encoded same as kprobe, but with following conditions + * -a special PC to identify it from the other kprobes. + * -restore stack addr to original saved pt_regs + */ + asm volatile(" mov sp, %0 \n" + "jprobe_return_break: brk %1 \n" + : + : "r" (kcb->jprobe_saved_regs.sp), + "I" (BRK64_ESR_KPROBES) + : "memory"); + + unreachable(); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + long stack_addr = kcb->jprobe_saved_regs.sp; + long orig_sp = kernel_stack_pointer(regs); + struct jprobe *jp = container_of(p, struct jprobe, kp); + extern const char jprobe_return_break[]; + + if (instruction_pointer(regs) != (u64) jprobe_return_break) + return 0; + + if (orig_sp != stack_addr) { + struct pt_regs *saved_regs = + (struct pt_regs *)kcb->jprobe_saved_regs.sp; + pr_err("current sp %lx does not match saved sp %lx\n", + orig_sp, stack_addr); + pr_err("Saved registers for jprobe %p\n", jp); + show_regs(saved_regs); + pr_err("Current registers\n"); + show_regs(regs); + BUG(); + } + unpause_graph_tracing(); + *regs = kcb->jprobe_saved_regs; + kasan_disable_current(); + memcpy((void *)stack_addr, kcb->jprobes_stack, + min_stack_size(stack_addr)); + kasan_enable_current(); + preempt_enable_no_resched(); + return 1; +} + +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + extern char __idmap_text_start[], __idmap_text_end[]; + extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; + + if ((addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end) || + (addr >= (unsigned long)__idmap_text_start && + addr < (unsigned long)__idmap_text_end) || + !!search_exception_tables(addr)) + return true; + + if (!is_kernel_in_hyp_mode()) { + if ((addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || + (addr >= (unsigned long)__hyp_idmap_text_start && + addr < (unsigned long)__hyp_idmap_text_end)) + return true; + } + + return false; +} + +void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = + (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * return probes installed on them, and/or more than one + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always pushed into the head of the list + * - when multiple return probes are registered for the same + * function, the (chronologically) first instance's ret_addr + * will be the real return address, and all the rest will + * point to kretprobe_trampoline. + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (ri->rp && ri->rp->handler) { + __this_cpu_write(current_kprobe, &ri->rp->kp); + get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + __this_cpu_write(current_kprobe, NULL); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + return (void *)orig_ret_address; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->regs[30]; + + /* replace return addr (x30) with trampoline */ + regs->regs[30] = (long)&kretprobe_trampoline; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; +} + +int __init arch_init_kprobes(void) +{ + return 0; +} diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S new file mode 100644 index 000000000000..5d6e7f14638c --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -0,0 +1,81 @@ +/* + * trampoline entry and return code for kretprobes. + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text + + .macro save_all_base_regs + stp x0, x1, [sp, #S_X0] + stp x2, x3, [sp, #S_X2] + stp x4, x5, [sp, #S_X4] + stp x6, x7, [sp, #S_X6] + stp x8, x9, [sp, #S_X8] + stp x10, x11, [sp, #S_X10] + stp x12, x13, [sp, #S_X12] + stp x14, x15, [sp, #S_X14] + stp x16, x17, [sp, #S_X16] + stp x18, x19, [sp, #S_X18] + stp x20, x21, [sp, #S_X20] + stp x22, x23, [sp, #S_X22] + stp x24, x25, [sp, #S_X24] + stp x26, x27, [sp, #S_X26] + stp x28, x29, [sp, #S_X28] + add x0, sp, #S_FRAME_SIZE + stp lr, x0, [sp, #S_LR] + /* + * Construct a useful saved PSTATE + */ + mrs x0, nzcv + mrs x1, daif + orr x0, x0, x1 + mrs x1, CurrentEL + orr x0, x0, x1 + mrs x1, SPSel + orr x0, x0, x1 + stp xzr, x0, [sp, #S_PC] + .endm + + .macro restore_all_base_regs + ldr x0, [sp, #S_PSTATE] + and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT) + msr nzcv, x0 + ldp x0, x1, [sp, #S_X0] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldp x8, x9, [sp, #S_X8] + ldp x10, x11, [sp, #S_X10] + ldp x12, x13, [sp, #S_X12] + ldp x14, x15, [sp, #S_X14] + ldp x16, x17, [sp, #S_X16] + ldp x18, x19, [sp, #S_X18] + ldp x20, x21, [sp, #S_X20] + ldp x22, x23, [sp, #S_X22] + ldp x24, x25, [sp, #S_X24] + ldp x26, x27, [sp, #S_X26] + ldp x28, x29, [sp, #S_X28] + .endm + +ENTRY(kretprobe_trampoline) + sub sp, sp, #S_FRAME_SIZE + + save_all_base_regs + + mov x0, sp + bl trampoline_probe_handler + /* + * Replace trampoline address in lr with actual orig_ret_addr return + * address. + */ + mov lr, x0 + + restore_all_base_regs + + add sp, sp, #S_FRAME_SIZE + ret + +ENDPROC(kretprobe_trampoline) diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c new file mode 100644 index 000000000000..8977ce9d009d --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -0,0 +1,217 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> + +#include "simulate-insn.h" + +#define sign_extend(x, signbit) \ + ((x) | (0 - ((x) & (1 << (signbit))))) + +#define bbl_displacement(insn) \ + sign_extend(((insn) & 0x3ffffff) << 2, 27) + +#define bcond_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define cbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define tbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x3fff) << 2, 15) + +#define ldr_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = val; +} + +static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = lower_32_bits(val); +} + +static inline u64 get_x_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return regs->regs[reg]; + else + return 0; +} + +static inline u32 get_w_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return lower_32_bits(regs->regs[reg]); + else + return 0; +} + +static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) == 0) : (get_w_reg(regs, xn) == 0); +} + +static bool __kprobes check_cbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) != 0) : (get_w_reg(regs, xn) != 0); +} + +static bool __kprobes check_tbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) == 0; +} + +static bool __kprobes check_tbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) != 0; +} + +/* + * instruction simulation functions + */ +void __kprobes +simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs) +{ + long imm, xn, val; + + xn = opcode & 0x1f; + imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3); + imm = sign_extend(imm, 20); + if (opcode & 0x80000000) + val = (imm<<12) + (addr & 0xfffffffffffff000); + else + val = imm + addr; + + set_x_reg(regs, xn, val); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = bbl_displacement(opcode); + + /* Link register is x30 */ + if (opcode & (1 << 31)) + set_x_reg(regs, 30, addr + 4); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (aarch32_opcode_cond_checks[opcode & 0xf](regs->pstate & 0xffffffff)) + disp = bcond_displacement(opcode); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs) +{ + int xn = (opcode >> 5) & 0x1f; + + /* update pc first in case we're doing a "blr lr" */ + instruction_pointer_set(regs, get_x_reg(regs, xn)); + + /* Link register is x30 */ + if (((opcode >> 21) & 0x3) == 1) + set_x_reg(regs, 30, addr + 4); +} + +void __kprobes +simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_cbnz(opcode, regs)) + disp = cbz_displacement(opcode); + } else { + if (check_cbz(opcode, regs)) + disp = cbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_tbnz(opcode, regs)) + disp = tbz_displacement(opcode); + } else { + if (check_tbz(opcode, regs)) + disp = tbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + u64 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (u64 *) (addr + disp); + + if (opcode & (1 << 30)) /* x0-x30 */ + set_x_reg(regs, xn, *load_addr); + else /* w0-w30 */ + set_w_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + s32 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (s32 *) (addr + disp); + + set_x_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h new file mode 100644 index 000000000000..050bde683c2d --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.h @@ -0,0 +1,28 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_SIMULATE_INSN_H +#define _ARM_KERNEL_KPROBES_SIMULATE_INSN_H + +void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs); +void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs); +void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs); + +#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 3f6cd5c5234f..030c1d5aa46d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -48,6 +48,107 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} +#define GPR_OFFSET_NAME(r) \ + {.name = "x" #r, .offset = offsetof(struct pt_regs, regs[r])} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + {.name = "lr", .offset = offsetof(struct pt_regs, regs[30])}, + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(pc), + REG_OFFSET_NAME(pstate), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || + on_irq_stack(addr, raw_smp_processor_id()); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S new file mode 100644 index 000000000000..51b73cdde287 --- /dev/null +++ b/arch/arm64/kernel/relocate_kernel.S @@ -0,0 +1,130 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kexec.h> +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/kexec.h> +#include <asm/page.h> +#include <asm/sysreg.h> + +/* + * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. + * + * The memory that the old kernel occupies may be overwritten when coping the + * new image to its final location. To assure that the + * arm64_relocate_new_kernel routine which does that copy is not overwritten, + * all code and data needed by arm64_relocate_new_kernel must be between the + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec + * control_code_page, a special page which has been set up to be preserved + * during the copy operation. + */ +ENTRY(arm64_relocate_new_kernel) + + /* Setup the list loop variables. */ + mov x17, x1 /* x17 = kimage_start */ + mov x16, x0 /* x16 = kimage_head */ + dcache_line_size x15, x0 /* x15 = dcache line size */ + mov x14, xzr /* x14 = entry ptr */ + mov x13, xzr /* x13 = copy dest */ + + /* Clear the sctlr_el2 flags. */ + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.ne 1f + mrs x0, sctlr_el2 + ldr x1, =SCTLR_ELx_FLAGS + bic x0, x0, x1 + msr sctlr_el2, x0 + isb +1: + + /* Check if the new image needs relocation. */ + tbnz x16, IND_DONE_BIT, .Ldone + +.Lloop: + and x12, x16, PAGE_MASK /* x12 = addr */ + + /* Test the entry flags. */ +.Ltest_source: + tbz x16, IND_SOURCE_BIT, .Ltest_indirection + + /* Invalidate dest page to PoC. */ + mov x0, x13 + add x20, x0, #PAGE_SIZE + sub x1, x15, #1 + bic x0, x0, x1 +2: dc ivac, x0 + add x0, x0, x15 + cmp x0, x20 + b.lo 2b + dsb sy + + mov x20, x13 + mov x21, x12 + copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7 + + /* dest += PAGE_SIZE */ + add x13, x13, PAGE_SIZE + b .Lnext + +.Ltest_indirection: + tbz x16, IND_INDIRECTION_BIT, .Ltest_destination + + /* ptr = addr */ + mov x14, x12 + b .Lnext + +.Ltest_destination: + tbz x16, IND_DESTINATION_BIT, .Lnext + + /* dest = addr */ + mov x13, x12 + +.Lnext: + /* entry = *ptr++ */ + ldr x16, [x14], #8 + + /* while (!(entry & DONE)) */ + tbz x16, IND_DONE_BIT, .Lloop + +.Ldone: + /* wait for writes from copy_page to finish */ + dsb nsh + ic iallu + dsb nsh + isb + + /* Start new image. */ + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x17 + +ENDPROC(arm64_relocate_new_kernel) + +.ltorg + +.align 3 /* To keep the 64-bit values below naturally aligned. */ + +.Lcopy_end: +.org KEXEC_CONTROL_PAGE_SIZE + +/* + * arm64_relocate_new_kernel_size - Number of bytes to copy to the + * control_code_page. + */ +.globl arm64_relocate_new_kernel_size +arm64_relocate_new_kernel_size: + .quad .Lcopy_end - arm64_relocate_new_kernel diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3279defabaa2..2981f1bdd073 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -202,7 +202,7 @@ static void __init request_standard_resources(void) struct resource *res; kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(_etext - 1); + kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); @@ -257,14 +257,17 @@ void __init setup_arch(char **cmdline_p) */ cpu_uninstall_idmap(); + xen_early_init(); efi_init(); arm64_memblock_init(); + paging_init(); + + acpi_table_upgrade(); + /* Parse the ACPI tables for possible boot-time configuration */ acpi_boot_table_init(); - paging_init(); - if (acpi_disabled) unflatten_device_tree(); @@ -281,8 +284,6 @@ void __init setup_arch(char **cmdline_p) else psci_acpi_init(); - xen_early_init(); - cpu_read_bootcpu_ops(); smp_init_cpus(); smp_build_mpidr_hash(); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 62ff3c0622e2..76a6d9263908 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -267,7 +267,6 @@ asmlinkage void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); - local_dbg_enable(); local_irq_enable(); local_async_enable(); @@ -437,9 +436,9 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) @@ -560,6 +559,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) */ acpi_set_mailbox_entry(cpu_count, processor); + early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid)); + cpu_count++; } @@ -694,6 +695,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_store_cpu_info(smp_processor_id()); /* + * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set + * secondary CPUs present. + */ + if (max_cpus == 0) + return; + + /* * Initialise the present map (which describes the set of CPUs * actually populated at the present time) and release the * secondaries from the bootloader. diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 2a43012616b7..e04f83873af7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -41,6 +41,7 @@ #include <asm/stacktrace.h> #include <asm/exception.h> #include <asm/system_misc.h> +#include <asm/sysreg.h> static const char *handler[]= { "Synchronous Abort", @@ -52,15 +53,14 @@ static const char *handler[]= { int show_unhandled_signals = 1; /* - * Dump out the contents of some memory nicely... + * Dump out the contents of some kernel memory nicely... */ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, - unsigned long top, bool compat) + unsigned long top) { unsigned long first; mm_segment_t fs; int i; - unsigned int width = compat ? 4 : 8; /* * We need to switch to kernel mode so that we can use __get_user @@ -78,22 +78,15 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, memset(str, ' ', sizeof(str)); str[sizeof(str) - 1] = '\0'; - for (p = first, i = 0; i < (32 / width) - && p < top; i++, p += width) { + for (p = first, i = 0; i < (32 / 8) + && p < top; i++, p += 8) { if (p >= bottom && p < top) { unsigned long val; - if (width == 8) { - if (__get_user(val, (unsigned long *)p) == 0) - sprintf(str + i * 17, " %016lx", val); - else - sprintf(str + i * 17, " ????????????????"); - } else { - if (__get_user(val, (unsigned int *)p) == 0) - sprintf(str + i * 9, " %08lx", val); - else - sprintf(str + i * 9, " ????????"); - } + if (__get_user(val, (unsigned long *)p) == 0) + sprintf(str + i * 17, " %016lx", val); + else + sprintf(str + i * 17, " ????????????????"); } } printk("%s%04lx:%s\n", lvl, first & 0xffff, str); @@ -216,7 +209,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs), false); + stack + sizeof(struct pt_regs)); } } } @@ -254,10 +247,9 @@ static int __die(const char *str, int err, struct thread_info *thread, pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); - if (!user_mode(regs) || in_interrupt()) { + if (!user_mode(regs)) { dump_mem(KERN_EMERG, "Stack: ", regs->sp, - THREAD_SIZE + (unsigned long)task_stack_page(tsk), - compat_user_mode(regs)); + THREAD_SIZE + (unsigned long)task_stack_page(tsk)); dump_backtrace(regs, tsk); dump_instr(KERN_EMERG, regs); } @@ -373,11 +365,59 @@ exit: return fn ? fn(regs, instr) : 1; } -asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +static void force_signal_inject(int signal, int code, struct pt_regs *regs, + unsigned long address) { siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); + const char *desc; + switch (signal) { + case SIGILL: + desc = "undefined instruction"; + break; + case SIGSEGV: + desc = "illegal memory access"; + break; + default: + desc = "bad mode"; + break; + } + + if (unhandled_signal(current, signal) && + show_unhandled_signals_ratelimited()) { + pr_info("%s[%d]: %s: pc=%p\n", + current->comm, task_pid_nr(current), desc, pc); + dump_instr(KERN_INFO, regs); + } + + info.si_signo = signal; + info.si_errno = 0; + info.si_code = code; + info.si_addr = pc; + + arm64_notify_die(desc, regs, &info, 0); +} + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr) +{ + int code; + + down_read(¤t->mm->mmap_sem); + if (find_vma(current->mm, addr) == NULL) + code = SEGV_MAPERR; + else + code = SEGV_ACCERR; + up_read(¤t->mm->mmap_sem); + + force_signal_inject(SIGSEGV, code, regs, addr); +} + +asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +{ /* check for AArch32 breakpoint instructions */ if (!aarch32_break_handler(regs)) return; @@ -385,18 +425,66 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: undefined instruction: pc=%p\n", - current->comm, task_pid_nr(current), pc); - dump_instr(KERN_INFO, regs); - } + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); +} - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = pc; +void cpu_enable_cache_maint_trap(void *__unused) +{ + config_sctlr_el1(SCTLR_EL1_UCI, 0); +} + +#define __user_cache_maint(insn, address, res) \ + asm volatile ( \ + "1: " insn ", %1\n" \ + " mov %w0, #0\n" \ + "2:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %w2\n" \ + " b 2b\n" \ + " .popsection\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (res) \ + : "r" (address), "i" (-EFAULT) ) + +asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +{ + unsigned long address; + int ret; - arm64_notify_die("Oops - undefined instruction", regs, &info, 0); + /* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */ + if ((esr & 0x01fffc01) == 0x0012dc00) { + int rt = (esr >> 5) & 0x1f; + int crm = (esr >> 1) & 0x0f; + + address = (rt == 31) ? 0 : regs->regs[rt]; + + switch (crm) { + case 11: /* DC CVAU, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case 10: /* DC CVAC, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case 14: /* DC CIVAC */ + __user_cache_maint("dc civac", address, ret); + break; + case 5: /* IC IVAU */ + __user_cache_maint("ic ivau", address, ret); + break; + default: + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + return; + } + } else { + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + return; + } + + if (ret) + arm64_notify_segfault(regs, address); + else + regs->pc += 4; } long compat_arm_syscall(struct pt_regs *regs); @@ -465,7 +553,7 @@ static const char *esr_class_str[] = { const char *esr_get_class_string(u32 esr) { - return esr_class_str[esr >> ESR_ELx_EC_SHIFT]; + return esr_class_str[ESR_ELx_EC(esr)]; } /* diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 9fefb005812a..076312b17d4f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -214,10 +214,16 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { + /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; + vdso_data->raw_time_sec = tk->raw_time.tv_sec; + vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mult = tk->tkr_mono.mult; + /* tkr_raw.xtime_nsec == 0 */ + vdso_data->cs_mono_mult = tk->tkr_mono.mult; + vdso_data->cs_raw_mult = tk->tkr_raw.mult; + /* tkr_mono.shift == tkr_raw.shift */ vdso_data->cs_shift = tk->tkr_mono.shift; } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b467fd0a384b..62c84f7cb01b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,7 +23,7 @@ GCOV_PROFILE := n ccflags-y += -Wl,-shared obj-y += vdso.o -extra-y += vdso.lds vdso-offsets.h +extra-y += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Force dependency (incbin is bad) @@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ define cmd_vdsosym - $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ - cp $@ include/generated/ + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ endef -$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index efa79e8d4196..e00b4671bd7c 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -26,24 +26,109 @@ #define NSEC_PER_SEC_HI16 0x3b9a vdso_data .req x6 -use_syscall .req w7 -seqcnt .req w8 +seqcnt .req w7 +w_tmp .req w8 +x_tmp .req x8 + +/* + * Conventions for macro arguments: + * - An argument is write-only if its name starts with "res". + * - All other arguments are read-only, unless otherwise specified. + */ .macro seqcnt_acquire 9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] tbnz seqcnt, #0, 9999b dmb ishld - ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL] .endm - .macro seqcnt_read, cnt + .macro seqcnt_check fail dmb ishld - ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] + cmp w_tmp, seqcnt + b.ne \fail .endm - .macro seqcnt_check, cnt, fail - cmp \cnt, seqcnt - b.ne \fail + .macro syscall_check fail + ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] + cbnz w_tmp, \fail + .endm + + .macro get_nsec_per_sec res + mov \res, #NSEC_PER_SEC_LO16 + movk \res, #NSEC_PER_SEC_HI16, lsl #16 + .endm + + /* + * Returns the clock delta, in nanoseconds left-shifted by the clock + * shift. + */ + .macro get_clock_shifted_nsec res, cycle_last, mult + /* Read the virtual counter. */ + isb + mrs x_tmp, cntvct_el0 + /* Calculate cycle delta and convert to ns. */ + sub \res, x_tmp, \cycle_last + /* We can only guarantee 56 bits of precision. */ + movn x_tmp, #0xff00, lsl #48 + and \res, x_tmp, \res + mul \res, \res, \mult + .endm + + /* + * Returns in res_{sec,nsec} the REALTIME timespec, based on the + * "wall time" (xtime) and the clock_mono delta. + */ + .macro get_ts_realtime res_sec, res_nsec, \ + clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec + add \res_nsec, \clock_nsec, \xtime_nsec + udiv x_tmp, \res_nsec, \nsec_to_sec + add \res_sec, \xtime_sec, x_tmp + msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec + .endm + + /* + * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, + * used for CLOCK_MONOTONIC_RAW. + */ + .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec + udiv \res_sec, \clock_nsec, \nsec_to_sec + msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec + .endm + + /* sec and nsec are modified in place. */ + .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec + /* Add timespec. */ + add \sec, \sec, \ts_sec + add \nsec, \nsec, \ts_nsec + + /* Normalise the new timespec. */ + cmp \nsec, \nsec_to_sec + b.lt 9999f + sub \nsec, \nsec, \nsec_to_sec + add \sec, \sec, #1 +9999: + cmp \nsec, #0 + b.ge 9998f + add \nsec, \nsec, \nsec_to_sec + sub \sec, \sec, #1 +9998: + .endm + + .macro clock_gettime_return, shift=0 + .if \shift == 1 + lsr x11, x11, x12 + .endif + stp x10, x11, [x1, #TSPEC_TV_SEC] + mov x0, xzr + ret + .endm + + .macro jump_slot jumptable, index, label + .if (. - \jumptable) != 4 * (\index) + .error "Jump slot index mismatch" + .endif + b \label .endm .text @@ -51,18 +136,25 @@ seqcnt .req w8 /* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ ENTRY(__kernel_gettimeofday) .cfi_startproc - mov x2, x30 - .cfi_register x30, x2 - - /* Acquire the sequence counter and get the timespec. */ adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 4f - /* If tv is NULL, skip to the timezone code. */ cbz x0, 2f - bl __do_get_tspec - seqcnt_check w9, 1b + + /* Compute the time of day. */ +1: seqcnt_acquire + syscall_check fail=4f + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=1b + + get_nsec_per_sec res=x9 + lsl x9, x9, x12 + + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 /* Convert ns to us. */ mov x13, #1000 @@ -76,95 +168,126 @@ ENTRY(__kernel_gettimeofday) stp w4, w5, [x1, #TZ_MINWEST] 3: mov x0, xzr - ret x2 + ret 4: /* Syscall fallback. */ mov x8, #__NR_gettimeofday svc #0 - ret x2 + ret .cfi_endproc ENDPROC(__kernel_gettimeofday) +#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE + /* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ ENTRY(__kernel_clock_gettime) .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - b.ne 2f + cmp w0, #JUMPSLOT_MAX + b.hi syscall + adr vdso_data, _vdso_data + adr x_tmp, jumptable + add x_tmp, x_tmp, w0, uxtw #2 + br x_tmp + + ALIGN +jumptable: + jump_slot jumptable, CLOCK_REALTIME, realtime + jump_slot jumptable, CLOCK_MONOTONIC, monotonic + b syscall + b syscall + jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw + jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse + jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse + + .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) + .error "Wrong jumptable size" + .endif + + ALIGN +realtime: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=realtime - mov x2, x30 - .cfi_register x30, x2 + /* All computations are done with left-shifted nsecs. */ + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Get kernel timespec. */ - adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 7f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 - bl __do_get_tspec - seqcnt_check w9, 1b + ALIGN +monotonic: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic - mov x30, x2 + /* All computations are done with left-shifted nsecs. */ + lsl x4, x4, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - cmp w0, #CLOCK_MONOTONIC - b.ne 6f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - /* Get wtm timespec. */ - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 + clock_gettime_return, shift=1 - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 1b - b 4f -2: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 8f + ALIGN +monotonic_raw: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_raw_mult, w12 = cs_shift */ + ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] + ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] + seqcnt_check fail=monotonic_raw - /* xtime_coarse_nsec is already right-shifted */ - mov x12, #0 + /* All computations are done with left-shifted nsecs. */ + lsl x14, x14, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Get coarse timespec. */ - adr vdso_data, _vdso_data -3: seqcnt_acquire + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_clock_raw res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, nsec_to_sec=x9 + + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 + + ALIGN +realtime_coarse: + seqcnt_acquire ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + seqcnt_check fail=realtime_coarse + clock_gettime_return - /* Get wtm timespec. */ + ALIGN +monotonic_coarse: + seqcnt_acquire + ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic_coarse - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 3b + /* Computations are done in (non-shifted) nsecs. */ + get_nsec_per_sec res=x9 + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return - cmp w0, #CLOCK_MONOTONIC_COARSE - b.ne 6f -4: - /* Add on wtm timespec. */ - add x10, x10, x13 - lsl x14, x14, x12 - add x11, x11, x14 - - /* Normalise the new timespec. */ - mov x15, #NSEC_PER_SEC_LO16 - movk x15, #NSEC_PER_SEC_HI16, lsl #16 - lsl x15, x15, x12 - cmp x11, x15 - b.lt 5f - sub x11, x11, x15 - add x10, x10, #1 -5: - cmp x11, #0 - b.ge 6f - add x11, x11, x15 - sub x10, x10, #1 - -6: /* Store to the user timespec. */ - lsr x11, x11, x12 - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret -7: - mov x30, x2 -8: /* Syscall fallback. */ + ALIGN +syscall: /* Syscall fallback. */ mov x8, #__NR_clock_gettime svc #0 ret @@ -176,6 +299,7 @@ ENTRY(__kernel_clock_getres) .cfi_startproc cmp w0, #CLOCK_REALTIME ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne b.ne 1f ldr x2, 5f @@ -203,46 +327,3 @@ ENTRY(__kernel_clock_getres) .quad CLOCK_COARSE_RES .cfi_endproc ENDPROC(__kernel_clock_getres) - -/* - * Read the current time from the architected counter. - * Expects vdso_data to be initialised. - * Clobbers the temporary registers (x9 - x15). - * Returns: - * - w9 = vDSO sequence counter - * - (x10, x11) = (ts->tv_sec, shifted ts->tv_nsec) - * - w12 = cs_shift - */ -ENTRY(__do_get_tspec) - .cfi_startproc - - /* Read from the vDSO data page. */ - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp w11, w12, [vdso_data, #VDSO_CS_MULT] - seqcnt_read w9 - - /* Read the virtual counter. */ - isb - mrs x15, cntvct_el0 - - /* Calculate cycle delta and convert to ns. */ - sub x10, x15, x10 - /* We can only guarantee 56 bits of precision. */ - movn x15, #0xff00, lsl #48 - and x10, x15, x10 - mul x10, x10, x11 - - /* Use the kernel time to calculate the new timespec. */ - mov x11, #NSEC_PER_SEC_LO16 - movk x11, #NSEC_PER_SEC_HI16, lsl #16 - lsl x11, x11, x12 - add x15, x10, x14 - udiv x14, x15, x11 - add x10, x13, x14 - mul x13, x14, x11 - sub x11, x15, x13 - - ret - .cfi_endproc -ENDPROC(__do_get_tspec) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 435e820e898d..89d6e177ecbd 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -118,9 +118,11 @@ SECTIONS __exception_text_end = .; IRQENTRY_TEXT SOFTIRQENTRY_TEXT + ENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT + KPROBES_TEXT HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT @@ -131,12 +133,13 @@ SECTIONS } . = ALIGN(SEGMENT_ALIGN); - RO_DATA(PAGE_SIZE) /* everything from this point to */ - EXCEPTION_TABLE(8) /* _etext will be marked RO NX */ + _etext = .; /* End of text section */ + + RO_DATA(PAGE_SIZE) /* everything from this point to */ + EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES . = ALIGN(SEGMENT_ALIGN); - _etext = .; /* End of text and rodata section */ __init_begin = .; INIT_TEXT_SECTION(8) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 3246c4aba5b1..fa96fe2bd469 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -106,7 +106,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) run->exit_reason = KVM_EXIT_DEBUG; run->debug.arch.hsr = hsr; - switch (hsr >> ESR_ELx_EC_SHIFT) { + switch (ESR_ELx_EC(hsr)) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; /* fall through */ @@ -149,7 +149,7 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u32 hsr = kvm_vcpu_get_hsr(vcpu); - u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT; + u8 hsr_ec = ESR_ELx_EC(hsr); if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 778d0effa2af..0c85febcc1eb 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -17,6 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o +# KVM code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. GCOV_PROFILE := n KASAN_SANITIZE := n UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 437cfad5e3d8..4373997d1a70 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -198,7 +198,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) { u64 esr = read_sysreg_el2(esr); - u8 ec = esr >> ESR_ELx_EC_SHIFT; + u8 ec = ESR_ELx_EC(esr); u64 hpfar, far; vcpu->arch.fault.esr_el2 = esr; diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 17e8306dca29..0b90497d4424 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -66,7 +66,7 @@ .endm end .req x5 -ENTRY(__copy_from_user) +ENTRY(__arch_copy_from_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 @@ -75,7 +75,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 // Nothing to copy ret -ENDPROC(__copy_from_user) +ENDPROC(__arch_copy_from_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 21faae60f988..7a7efe255034 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -65,7 +65,7 @@ .endm end .req x5 -ENTRY(__copy_to_user) +ENTRY(__arch_copy_to_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 @@ -74,7 +74,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret -ENDPROC(__copy_to_user) +ENDPROC(__arch_copy_to_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 50ff9ba3a236..07d7352d7c38 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -52,7 +52,7 @@ ENTRY(__flush_cache_user_range) sub x3, x2, #1 bic x4, x0, x3 1: -USER(9f, dc cvau, x4 ) // clean D line to PoU +user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 1b diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c566ec83719f..f6c55afab3e2 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -19,6 +19,7 @@ #include <linux/gfp.h> #include <linux/acpi.h> +#include <linux/bootmem.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/genalloc.h> @@ -29,6 +30,8 @@ #include <asm/cacheflush.h> +static int swiotlb __read_mostly; + static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { @@ -341,6 +344,13 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } +static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) +{ + if (swiotlb) + return swiotlb_dma_supported(hwdev, mask); + return 1; +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, @@ -354,7 +364,7 @@ static struct dma_map_ops swiotlb_dma_ops = { .sync_single_for_device = __swiotlb_sync_single_for_device, .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, .sync_sg_for_device = __swiotlb_sync_sg_for_device, - .dma_supported = swiotlb_dma_supported, + .dma_supported = __swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; @@ -513,6 +523,9 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { + if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + swiotlb = 1; + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -848,15 +861,16 @@ static int __iommu_attach_notifier(struct notifier_block *nb, { struct iommu_dma_notifier_data *master, *tmp; - if (action != BUS_NOTIFY_ADD_DEVICE) + if (action != BUS_NOTIFY_BIND_DRIVER) return 0; mutex_lock(&iommu_dma_notifier_lock); list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { - if (do_iommu_attach(master->dev, master->ops, - master->dma_base, master->size)) { + if (data == master->dev && do_iommu_attach(master->dev, + master->ops, master->dma_base, master->size)) { list_del(&master->list); kfree(master); + break; } } mutex_unlock(&iommu_dma_notifier_lock); @@ -870,17 +884,8 @@ static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) if (!nb) return -ENOMEM; - /* - * The device must be attached to a domain before the driver probe - * routine gets a chance to start allocating DMA buffers. However, - * the IOMMU driver also needs a chance to configure the iommu_group - * via its add_device callback first, so we need to make the attach - * happen between those two points. Since the IOMMU core uses a bus - * notifier with default priority for add_device, do the same but - * with a lower priority to ensure the appropriate ordering. - */ + nb->notifier_call = __iommu_attach_notifier; - nb->priority = -100; ret = bus_register_notifier(bus, nb); if (ret) { @@ -904,10 +909,6 @@ static int __init __iommu_dma_init(void) if (!ret) ret = register_iommu_dma_ops_notifier(&pci_bus_type); #endif - - /* handle devices queued before this arch_initcall */ - if (!ret) - __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL); return ret; } arch_initcall(__iommu_dma_init); diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index ccfde237d6e6..f94b80eb295d 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -27,11 +27,7 @@ #include <asm/memory.h> #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> - -struct addr_marker { - unsigned long start_address; - const char *name; -}; +#include <asm/ptdump.h> static const struct addr_marker address_markers[] = { #ifdef CONFIG_KASAN @@ -290,7 +286,8 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } -static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, + unsigned long start) { pgd_t *pgd = pgd_offset(mm, 0UL); unsigned i; @@ -309,12 +306,13 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long st static int ptdump_show(struct seq_file *m, void *v) { + struct ptdump_info *info = m->private; struct pg_state st = { .seq = m, - .marker = address_markers, + .marker = info->markers, }; - walk_pgd(&st, &init_mm, VA_START); + walk_pgd(&st, info->mm, info->base_addr); note_page(&st, 0, 0, 0); return 0; @@ -322,7 +320,7 @@ static int ptdump_show(struct seq_file *m, void *v) static int ptdump_open(struct inode *inode, struct file *file) { - return single_open(file, ptdump_show, NULL); + return single_open(file, ptdump_show, inode->i_private); } static const struct file_operations ptdump_fops = { @@ -332,7 +330,7 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static int ptdump_init(void) +int ptdump_register(struct ptdump_info *info, const char *name) { struct dentry *pe; unsigned i, j; @@ -342,8 +340,18 @@ static int ptdump_init(void) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; - pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, - &ptdump_fops); + pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); return pe ? 0 : -ENOMEM; } + +static struct ptdump_info kernel_ptdump_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = VA_START, +}; + +static int ptdump_init(void) +{ + return ptdump_register(&kernel_ptdump_info, "kernel_page_tables"); +} device_initcall(ptdump_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b1166d1e5955..c8beaa0da7df 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -41,6 +41,28 @@ static const char *fault_name(unsigned int esr); +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, esr)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + return 0; +} +#endif + /* * Dump out the page tables associated with 'addr' in mm 'mm'. */ @@ -202,8 +224,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -#define ESR_LNX_EXEC (1 << 24) - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) @@ -233,7 +253,7 @@ good_area: goto out; } - return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags); + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) @@ -242,14 +262,19 @@ out: return fault; } -static inline int permission_fault(unsigned int esr) +static inline bool is_permission_fault(unsigned int esr) { - unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); } +static bool is_el0_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -259,6 +284,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (notify_page_fault(regs, esr)) + return 0; + tsk = current; mm = tsk->mm; @@ -272,14 +300,14 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; - if (esr & ESR_LNX_EXEC) { + if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } - if (permission_fault(esr) && (addr < USER_DS)) { + if (is_permission_fault(esr) && (addr < USER_DS)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); @@ -630,6 +658,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } +NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN void cpu_enable_pan(void *__unused) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d45f8627012c..bbb7ee76e319 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -160,12 +160,10 @@ static void __init arm64_memory_present(void) static void __init arm64_memory_present(void) { struct memblock_region *reg; - int nid = 0; for_each_memblock(memory, reg) { -#ifdef CONFIG_NUMA - nid = reg->nid; -#endif + int nid = memblock_get_region_node(reg); + memory_present(nid, memblock_region_memory_base_pfn(reg), memblock_region_memory_end_pfn(reg)); } @@ -226,7 +224,7 @@ void __init arm64_memblock_init(void) * via the linear mapping. */ if (memory_limit != (phys_addr_t)ULLONG_MAX) { - memblock_enforce_memory_limit(memory_limit); + memblock_mem_limit_remove_map(memory_limit); memblock_add(__pa(_text), (u64)(_end - _text)); } @@ -403,7 +401,8 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - swiotlb_init(1); + if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + swiotlb_init(1); set_max_mapnr(pfn_to_page(max_pfn) - mem_map); @@ -430,9 +429,9 @@ void __init mem_init(void) pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(VMALLOC_START, VMALLOC_END)); pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(_text, __start_rodata)); + MLK_ROUNDUP(_text, _etext)); pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(__start_rodata, _etext)); + MLK_ROUNDUP(__start_rodata, __init_begin)); pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__init_begin, __init_end)); pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0f85a46c3e18..51a558195bb9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -77,7 +77,6 @@ static phys_addr_t __init early_pgtable_alloc(void) void *ptr; phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - BUG_ON(!phys); /* * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE @@ -97,24 +96,6 @@ static phys_addr_t __init early_pgtable_alloc(void) return phys; } -/* - * remap a PMD into pages - */ -static void split_pmd(pmd_t *pmd, pte_t *pte) -{ - unsigned long pfn = pmd_pfn(*pmd); - int i = 0; - - do { - /* - * Need to have the least restrictive permissions available - * permissions will be fixed up later - */ - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); - pfn++; - } while (pte++, i++, i < PTRS_PER_PTE); -} - static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, @@ -122,15 +103,13 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, { pte_t *pte; - if (pmd_none(*pmd) || pmd_sect(*pmd)) { + BUG_ON(pmd_sect(*pmd)); + if (pmd_none(*pmd)) { phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(); pte = pte_set_fixmap(pte_phys); - if (pmd_sect(*pmd)) - split_pmd(pmd, pte); __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); - flush_tlb_all(); pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); @@ -144,41 +123,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte_clear_fixmap(); } -static void split_pud(pud_t *old_pud, pmd_t *pmd) -{ - unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; - pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); - int i = 0; - - do { - set_pmd(pmd, __pmd(addr | pgprot_val(prot))); - addr += PMD_SIZE; - } while (pmd++, i++, i < PTRS_PER_PMD); -} - -#ifdef CONFIG_DEBUG_PAGEALLOC -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - - /* - * If debug_page_alloc is enabled we must map the linear map - * using pages. However, other mappings created by - * create_mapping_noalloc must use sections in some cases. Allow - * sections to be used in those cases, where no pgtable_alloc - * function is provided. - */ - return !pgtable_alloc || !debug_pagealloc_enabled(); -} -#else -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - return true; -} -#endif - static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { pmd_t *pmd; unsigned long next; @@ -186,20 +134,13 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* * Check for initial section mappings in the pgd/pud and remove them. */ - if (pud_none(*pud) || pud_sect(*pud)) { + BUG_ON(pud_sect(*pud)); + if (pud_none(*pud)) { phys_addr_t pmd_phys; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(); pmd = pmd_set_fixmap(pmd_phys); - if (pud_sect(*pud)) { - /* - * need to have the 1G of mappings continue to be - * present - */ - split_pud(pud, pmd); - } __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); - flush_tlb_all(); pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); @@ -209,7 +150,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - block_mappings_allowed(pgtable_alloc)) { + allow_block_mappings) { pmd_t old_pmd =*pmd; pmd_set_huge(pmd, phys, prot); /* @@ -248,7 +189,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { pud_t *pud; unsigned long next; @@ -268,8 +210,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && - block_mappings_allowed(pgtable_alloc)) { + if (use_1G_block(addr, next, phys) && allow_block_mappings) { pud_t old_pud = *pud; pud_set_huge(pud, phys, prot); @@ -290,7 +231,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, } } else { alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc); + pgtable_alloc, allow_block_mappings); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -298,15 +239,14 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, pud_clear_fixmap(); } -/* - * Create the page directory entries and any necessary page tables for the - * mapping specified by 'md'. - */ -static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { unsigned long addr, length, end, next; + pgd_t *pgd = pgd_offset_raw(pgdir, virt); /* * If the virtual and physical address don't have the same offset @@ -322,29 +262,23 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); + alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, + allow_block_mappings); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static phys_addr_t late_pgtable_alloc(void) +static phys_addr_t pgd_pgtable_alloc(void) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); - BUG_ON(!ptr); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); return __pa(ptr); } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*alloc)(void)) -{ - init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); -} - /* * This function can only be used to modify existing table entries, * without allocating new levels of table. Note that this permits the @@ -358,16 +292,17 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot) + pgprot_t prot, bool allow_block_mappings) { + BUG_ON(mm == &init_mm); + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - late_pgtable_alloc); + pgd_pgtable_alloc, allow_block_mappings); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -380,51 +315,54 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, } __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - late_pgtable_alloc); + NULL, !debug_pagealloc_enabled()); } static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { unsigned long kernel_start = __pa(_text); - unsigned long kernel_end = __pa(_etext); + unsigned long kernel_end = __pa(__init_begin); /* * Take care not to create a writable alias for the * read-only text and rodata sections of the kernel image. */ - /* No overlap with the kernel text */ + /* No overlap with the kernel text/rodata */ if (end < kernel_start || start >= kernel_end) { __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); return; } /* - * This block overlaps the kernel text mapping. + * This block overlaps the kernel text/rodata mappings. * Map the portion(s) which don't overlap. */ if (start < kernel_start) __create_pgd_mapping(pgd, start, __phys_to_virt(start), kernel_start - start, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); if (kernel_end < end) __create_pgd_mapping(pgd, kernel_end, __phys_to_virt(kernel_end), end - kernel_end, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); /* - * Map the linear alias of the [_text, _etext) interval as + * Map the linear alias of the [_text, __init_begin) interval as * read-only/non-executable. This makes the contents of the * region accessible to subsystems such as hibernate, but * protects it from inadvertent modification or execution. */ __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc); + early_pgtable_alloc, !debug_pagealloc_enabled()); } static void __init map_mem(pgd_t *pgd) @@ -449,14 +387,14 @@ void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)__start_rodata - (unsigned long)_text; + section_size = (unsigned long)_etext - (unsigned long)_text; create_mapping_late(__pa(_text), (unsigned long)_text, section_size, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use _etext rather than __end_rodata to - * cover NOTES and EXCEPTION_TABLE. + * mark .rodata as read only. Use __init_begin rather than __end_rodata + * to cover NOTES and EXCEPTION_TABLE. */ - section_size = (unsigned long)_etext - (unsigned long)__start_rodata; + section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); } @@ -481,7 +419,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(size)); __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc); + early_pgtable_alloc, !debug_pagealloc_enabled()); vma->addr = va_start; vma->phys_addr = pa_start; @@ -499,8 +437,8 @@ static void __init map_kernel(pgd_t *pgd) { static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; - map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata); + map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, &vmlinux_init); map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 98dc1047f2a2..c7fe3ec70774 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi.h> #include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/module.h> @@ -29,7 +30,7 @@ static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; static int numa_distance_cnt; static u8 *numa_distance; -static int numa_off; +static bool numa_off; static __init int numa_parse_early_param(char *opt) { @@ -37,7 +38,7 @@ static __init int numa_parse_early_param(char *opt) return -EINVAL; if (!strncmp(opt, "off", 3)) { pr_info("%s\n", "NUMA turned off"); - numa_off = 1; + numa_off = true; } return 0; } @@ -131,25 +132,25 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid) * numa_add_memblk - Set node id to memblk * @nid: NUMA node ID of the new memblk * @start: Start address of the new memblk - * @size: Size of the new memblk + * @end: End address of the new memblk * * RETURNS: * 0 on success, -errno on failure. */ -int __init numa_add_memblk(int nid, u64 start, u64 size) +int __init numa_add_memblk(int nid, u64 start, u64 end) { int ret; - ret = memblock_set_node(start, size, &memblock.memory, nid); + ret = memblock_set_node(start, (end - start), &memblock.memory, nid); if (ret < 0) { pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n", - start, (start + size - 1), nid); + start, (end - 1), nid); return ret; } node_set(nid, numa_nodes_parsed); pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n", - start, (start + size - 1), nid); + start, (end - 1), nid); return ret; } @@ -362,12 +363,15 @@ static int __init dummy_numa_init(void) int ret; struct memblock_region *mblk; - pr_info("%s\n", "No NUMA configuration found"); + if (numa_off) + pr_info("NUMA disabled\n"); /* Forced off on command line. */ + else + pr_info("No NUMA configuration found\n"); pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", 0LLU, PFN_PHYS(max_pfn) - 1); for_each_memblock(memory, mblk) { - ret = numa_add_memblk(0, mblk->base, mblk->size); + ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); if (!ret) continue; @@ -375,7 +379,7 @@ static int __init dummy_numa_init(void) return ret; } - numa_off = 1; + numa_off = true; return 0; } @@ -388,7 +392,9 @@ static int __init dummy_numa_init(void) void __init arm64_numa_init(void) { if (!numa_off) { - if (!numa_init(of_numa_init)) + if (!acpi_disabled && !numa_init(arm64_acpi_numa_init)) + return; + if (acpi_disabled && !numa_init(of_numa_init)) return; } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c4317879b938..5bb61de23201 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -180,6 +180,8 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 + isb // Unmask debug exceptions now, + enable_dbg // since this is per-cpu reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* * Memory region attributes for LPAE: diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index aee5637ea436..7c16e547ccb2 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -55,6 +55,7 @@ #define A64_BL(imm26) A64_BRANCH((imm26) << 2, LINK) /* Unconditional branch (register) */ +#define A64_BR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_NOLINK) #define A64_BLR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_LINK) #define A64_RET(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_RETURN) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 49ba37e4bfc0..b2fc97a2c56c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include <linux/bpf.h> #include <linux/filter.h> #include <linux/printk.h> #include <linux/skbuff.h> @@ -33,6 +34,7 @@ int bpf_jit_enable __read_mostly; #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) +#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -54,6 +56,8 @@ static const int bpf2a64[] = { /* temporary registers for internal BPF JIT */ [TMP_REG_1] = A64_R(10), [TMP_REG_2] = A64_R(11), + /* tail_call_cnt */ + [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), }; @@ -146,13 +150,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) #define STACK_SIZE STACK_ALIGN(_STACK_SIZE) -static void build_prologue(struct jit_ctx *ctx) +#define PROLOGUE_OFFSET 8 + +static int build_prologue(struct jit_ctx *ctx) { const u8 r6 = bpf2a64[BPF_REG_6]; const u8 r7 = bpf2a64[BPF_REG_7]; const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 tcc = bpf2a64[TCALL_CNT]; + const int idx0 = ctx->idx; + int cur_offset; /* * BPF prog stack layout @@ -162,8 +171,6 @@ static void build_prologue(struct jit_ctx *ctx) * |FP/LR| * current A64_FP => -16:+-----+ * | ... | callee saved registers - * +-----+ - * | | x25/x26 * BPF fp register => -64:+-----+ <= (BPF_FP) * | | * | ... | BPF prog stack @@ -183,18 +190,90 @@ static void build_prologue(struct jit_ctx *ctx) emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); emit(A64_MOV(1, A64_FP, A64_SP), ctx); - /* Save callee-saved register */ + /* Save callee-saved registers */ emit(A64_PUSH(r6, r7, A64_SP), ctx); emit(A64_PUSH(r8, r9, A64_SP), ctx); + emit(A64_PUSH(fp, tcc, A64_SP), ctx); - /* Save fp (x25) and x26. SP requires 16 bytes alignment */ - emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx); - - /* Set up BPF prog stack base register (x25) */ + /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); + /* Initialize tail_call_cnt */ + emit(A64_MOVZ(1, tcc, 0, 0), ctx); + /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); + + cur_offset = ctx->idx - idx0; + if (cur_offset != PROLOGUE_OFFSET) { + pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", + cur_offset, PROLOGUE_OFFSET); + return -1; + } + return 0; +} + +static int out_offset = -1; /* initialized on the first pass of build_body() */ +static int emit_bpf_tail_call(struct jit_ctx *ctx) +{ + /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ + const u8 r2 = bpf2a64[BPF_REG_2]; + const u8 r3 = bpf2a64[BPF_REG_3]; + + const u8 tmp = bpf2a64[TMP_REG_1]; + const u8 prg = bpf2a64[TMP_REG_2]; + const u8 tcc = bpf2a64[TCALL_CNT]; + const int idx0 = ctx->idx; +#define cur_offset (ctx->idx - idx0) +#define jmp_offset (out_offset - (cur_offset)) + size_t off; + + /* if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_LDR32(tmp, r2, tmp), ctx); + emit(A64_CMP(0, r3, tmp), ctx); + emit(A64_B_(A64_COND_GE, jmp_offset), ctx); + + /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + * tail_call_cnt++; + */ + emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); + emit(A64_CMP(1, tcc, tmp), ctx); + emit(A64_B_(A64_COND_GT, jmp_offset), ctx); + emit(A64_ADD_I(1, tcc, tcc, 1), ctx); + + /* prog = array->ptrs[index]; + * if (prog == NULL) + * goto out; + */ + off = offsetof(struct bpf_array, ptrs); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_LDR64(tmp, r2, tmp), ctx); + emit(A64_LDR64(prg, tmp, r3), ctx); + emit(A64_CBZ(1, prg, jmp_offset), ctx); + + /* goto *(prog->bpf_func + prologue_size); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_LDR64(tmp, prg, tmp), ctx); + emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(A64_BR(tmp), ctx); + + /* out: */ + if (out_offset == -1) + out_offset = cur_offset; + if (cur_offset != out_offset) { + pr_err_once("tail_call out_offset = %d, expected %d!\n", + cur_offset, out_offset); + return -1; + } + return 0; +#undef cur_offset +#undef jmp_offset } static void build_epilogue(struct jit_ctx *ctx) @@ -499,13 +578,15 @@ emit_cond_jmp: const u64 func = (u64)__bpf_call_base + imm; emit_a64_mov_i64(tmp, func, ctx); - emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); emit(A64_BLR(tmp), ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); - emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); break; } + /* tail call */ + case BPF_JMP | BPF_CALL | BPF_X: + if (emit_bpf_tail_call(ctx)) + return -EFAULT; + break; /* function return */ case BPF_JMP | BPF_EXIT: /* Optimization: when last instruction is EXIT, @@ -650,11 +731,8 @@ emit_cond_jmp: emit_a64_mov_i64(r3, size, ctx); emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx); emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx); - emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); emit(A64_BLR(r5), ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); - emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); jmp_offset = epilogue_offset(ctx); check_imm19(jmp_offset); @@ -780,7 +858,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - build_prologue(&ctx); + if (build_prologue(&ctx)) { + prog = orig_prog; + goto out_off; + } ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index 74a8d87e542b..8ff8aa9c6228 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,2 +1,3 @@ xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o) obj-y := xen-arm.o hypercall.o +obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 70df80e8da2c..329c8027b0a9 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -82,6 +82,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) mov x16, x0 diff --git a/arch/avr32/include/uapi/asm/unistd.h b/arch/avr32/include/uapi/asm/unistd.h index 60c0f3afc1f9..2c8a0d2b6c30 100644 --- a/arch/avr32/include/uapi/asm/unistd.h +++ b/arch/avr32/include/uapi/asm/unistd.h @@ -12,331 +12,333 @@ * This file contains the system call numbers. */ -#define __NR_restart_syscall 0 -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_umask 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_chown 16 -#define __NR_lchown 17 -#define __NR_lseek 18 -#define __NR__llseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount2 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_pause 28 -#define __NR_utime 29 -#define __NR_stat 30 -#define __NR_fstat 31 -#define __NR_lstat 32 -#define __NR_access 33 -#define __NR_chroot 34 -#define __NR_sync 35 -#define __NR_fsync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_clone 44 -#define __NR_brk 45 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_getcwd 48 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_acct 51 -#define __NR_setfsuid 52 -#define __NR_setfsgid 53 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_setpgid 56 -#define __NR_mremap 57 -#define __NR_setresuid 58 -#define __NR_getresuid 59 -#define __NR_setreuid 60 -#define __NR_setregid 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_rt_sigaction 67 -#define __NR_rt_sigreturn 68 -#define __NR_rt_sigprocmask 69 -#define __NR_rt_sigpending 70 -#define __NR_rt_sigtimedwait 71 -#define __NR_rt_sigqueueinfo 72 -#define __NR_rt_sigsuspend 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrlimit 76 /* SuS compliant getrlimit */ -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_select 82 -#define __NR_symlink 83 -#define __NR_fchdir 84 -#define __NR_readlink 85 -#define __NR_pread 86 -#define __NR_pwrite 87 -#define __NR_swapon 88 -#define __NR_reboot 89 -#define __NR_mmap2 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_fchown 95 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_wait4 98 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_vhangup 101 -#define __NR_sigaltstack 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_swapoff 106 -#define __NR_sysinfo 107 +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_umask 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_chown 16 +#define __NR_lchown 17 +#define __NR_lseek 18 +#define __NR__llseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount2 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 28 +#define __NR_utime 29 +#define __NR_stat 30 +#define __NR_fstat 31 +#define __NR_lstat 32 +#define __NR_access 33 +#define __NR_chroot 34 +#define __NR_sync 35 +#define __NR_fsync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_clone 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_getcwd 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_setfsuid 52 +#define __NR_setfsgid 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 56 +#define __NR_mremap 57 +#define __NR_setresuid 58 +#define __NR_getresuid 59 +#define __NR_setreuid 60 +#define __NR_setregid 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_rt_sigaction 67 +#define __NR_rt_sigreturn 68 +#define __NR_rt_sigprocmask 69 +#define __NR_rt_sigpending 70 +#define __NR_rt_sigtimedwait 71 +#define __NR_rt_sigqueueinfo 72 +#define __NR_rt_sigsuspend 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 /* SuS compliant getrlimit */ +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_fchdir 84 +#define __NR_readlink 85 +#define __NR_pread 86 +#define __NR_pwrite 87 +#define __NR_swapon 88 +#define __NR_reboot 89 +#define __NR_mmap2 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_wait4 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_vhangup 101 +#define __NR_sigaltstack 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_swapoff 106 +#define __NR_sysinfo 107 /* 108 was __NR_ipc for a little while */ -#define __NR_sendfile 109 -#define __NR_setdomainname 110 -#define __NR_uname 111 -#define __NR_adjtimex 112 -#define __NR_mprotect 113 -#define __NR_vfork 114 -#define __NR_init_module 115 -#define __NR_delete_module 116 -#define __NR_quotactl 117 -#define __NR_getpgid 118 -#define __NR_bdflush 119 -#define __NR_sysfs 120 -#define __NR_personality 121 -#define __NR_afs_syscall 122 /* Syscall for Andrew File System */ -#define __NR_getdents 123 -#define __NR_flock 124 -#define __NR_msync 125 -#define __NR_readv 126 -#define __NR_writev 127 -#define __NR_getsid 128 -#define __NR_fdatasync 129 -#define __NR__sysctl 130 -#define __NR_mlock 131 -#define __NR_munlock 132 -#define __NR_mlockall 133 -#define __NR_munlockall 134 -#define __NR_sched_setparam 135 -#define __NR_sched_getparam 136 -#define __NR_sched_setscheduler 137 -#define __NR_sched_getscheduler 138 -#define __NR_sched_yield 139 -#define __NR_sched_get_priority_max 140 -#define __NR_sched_get_priority_min 141 -#define __NR_sched_rr_get_interval 142 -#define __NR_nanosleep 143 -#define __NR_poll 144 -#define __NR_nfsservctl 145 -#define __NR_setresgid 146 -#define __NR_getresgid 147 +#define __NR_sendfile 109 +#define __NR_setdomainname 110 +#define __NR_uname 111 +#define __NR_adjtimex 112 +#define __NR_mprotect 113 +#define __NR_vfork 114 +#define __NR_init_module 115 +#define __NR_delete_module 116 +#define __NR_quotactl 117 +#define __NR_getpgid 118 +#define __NR_bdflush 119 +#define __NR_sysfs 120 +#define __NR_personality 121 +#define __NR_afs_syscall 122 /* Syscall for Andrew File System */ +#define __NR_getdents 123 +#define __NR_flock 124 +#define __NR_msync 125 +#define __NR_readv 126 +#define __NR_writev 127 +#define __NR_getsid 128 +#define __NR_fdatasync 129 +#define __NR__sysctl 130 +#define __NR_mlock 131 +#define __NR_munlock 132 +#define __NR_mlockall 133 +#define __NR_munlockall 134 +#define __NR_sched_setparam 135 +#define __NR_sched_getparam 136 +#define __NR_sched_setscheduler 137 +#define __NR_sched_getscheduler 138 +#define __NR_sched_yield 139 +#define __NR_sched_get_priority_max 140 +#define __NR_sched_get_priority_min 141 +#define __NR_sched_rr_get_interval 142 +#define __NR_nanosleep 143 +#define __NR_poll 144 +#define __NR_nfsservctl 145 +#define __NR_setresgid 146 +#define __NR_getresgid 147 #define __NR_prctl 148 -#define __NR_socket 149 -#define __NR_bind 150 -#define __NR_connect 151 -#define __NR_listen 152 -#define __NR_accept 153 -#define __NR_getsockname 154 -#define __NR_getpeername 155 -#define __NR_socketpair 156 -#define __NR_send 157 -#define __NR_recv 158 -#define __NR_sendto 159 -#define __NR_recvfrom 160 -#define __NR_shutdown 161 -#define __NR_setsockopt 162 -#define __NR_getsockopt 163 -#define __NR_sendmsg 164 -#define __NR_recvmsg 165 -#define __NR_truncate64 166 -#define __NR_ftruncate64 167 -#define __NR_stat64 168 -#define __NR_lstat64 169 -#define __NR_fstat64 170 -#define __NR_pivot_root 171 -#define __NR_mincore 172 -#define __NR_madvise 173 -#define __NR_getdents64 174 -#define __NR_fcntl64 175 -#define __NR_gettid 176 -#define __NR_readahead 177 -#define __NR_setxattr 178 -#define __NR_lsetxattr 179 -#define __NR_fsetxattr 180 -#define __NR_getxattr 181 -#define __NR_lgetxattr 182 -#define __NR_fgetxattr 183 -#define __NR_listxattr 184 -#define __NR_llistxattr 185 -#define __NR_flistxattr 186 -#define __NR_removexattr 187 -#define __NR_lremovexattr 188 -#define __NR_fremovexattr 189 -#define __NR_tkill 190 -#define __NR_sendfile64 191 -#define __NR_futex 192 -#define __NR_sched_setaffinity 193 -#define __NR_sched_getaffinity 194 -#define __NR_capget 195 -#define __NR_capset 196 -#define __NR_io_setup 197 -#define __NR_io_destroy 198 -#define __NR_io_getevents 199 -#define __NR_io_submit 200 -#define __NR_io_cancel 201 -#define __NR_fadvise64 202 -#define __NR_exit_group 203 -#define __NR_lookup_dcookie 204 -#define __NR_epoll_create 205 -#define __NR_epoll_ctl 206 -#define __NR_epoll_wait 207 -#define __NR_remap_file_pages 208 -#define __NR_set_tid_address 209 -#define __NR_timer_create 210 -#define __NR_timer_settime 211 -#define __NR_timer_gettime 212 -#define __NR_timer_getoverrun 213 -#define __NR_timer_delete 214 -#define __NR_clock_settime 215 -#define __NR_clock_gettime 216 -#define __NR_clock_getres 217 -#define __NR_clock_nanosleep 218 -#define __NR_statfs64 219 -#define __NR_fstatfs64 220 -#define __NR_tgkill 221 - /* 222 reserved for tux */ -#define __NR_utimes 223 -#define __NR_fadvise64_64 224 -#define __NR_cacheflush 225 - -#define __NR_vserver 226 -#define __NR_mq_open 227 -#define __NR_mq_unlink 228 -#define __NR_mq_timedsend 229 -#define __NR_mq_timedreceive 230 -#define __NR_mq_notify 231 -#define __NR_mq_getsetattr 232 -#define __NR_kexec_load 233 -#define __NR_waitid 234 -#define __NR_add_key 235 -#define __NR_request_key 236 -#define __NR_keyctl 237 -#define __NR_ioprio_set 238 -#define __NR_ioprio_get 239 -#define __NR_inotify_init 240 -#define __NR_inotify_add_watch 241 -#define __NR_inotify_rm_watch 242 -#define __NR_openat 243 -#define __NR_mkdirat 244 -#define __NR_mknodat 245 -#define __NR_fchownat 246 -#define __NR_futimesat 247 -#define __NR_fstatat64 248 -#define __NR_unlinkat 249 -#define __NR_renameat 250 -#define __NR_linkat 251 -#define __NR_symlinkat 252 -#define __NR_readlinkat 253 -#define __NR_fchmodat 254 -#define __NR_faccessat 255 -#define __NR_pselect6 256 -#define __NR_ppoll 257 -#define __NR_unshare 258 -#define __NR_set_robust_list 259 -#define __NR_get_robust_list 260 -#define __NR_splice 261 -#define __NR_sync_file_range 262 -#define __NR_tee 263 -#define __NR_vmsplice 264 -#define __NR_epoll_pwait 265 -#define __NR_msgget 266 -#define __NR_msgsnd 267 -#define __NR_msgrcv 268 -#define __NR_msgctl 269 -#define __NR_semget 270 -#define __NR_semop 271 -#define __NR_semctl 272 -#define __NR_semtimedop 273 -#define __NR_shmat 274 -#define __NR_shmget 275 -#define __NR_shmdt 276 -#define __NR_shmctl 277 -#define __NR_utimensat 278 -#define __NR_signalfd 279 +#define __NR_socket 149 +#define __NR_bind 150 +#define __NR_connect 151 +#define __NR_listen 152 +#define __NR_accept 153 +#define __NR_getsockname 154 +#define __NR_getpeername 155 +#define __NR_socketpair 156 +#define __NR_send 157 +#define __NR_recv 158 +#define __NR_sendto 159 +#define __NR_recvfrom 160 +#define __NR_shutdown 161 +#define __NR_setsockopt 162 +#define __NR_getsockopt 163 +#define __NR_sendmsg 164 +#define __NR_recvmsg 165 +#define __NR_truncate64 166 +#define __NR_ftruncate64 167 +#define __NR_stat64 168 +#define __NR_lstat64 169 +#define __NR_fstat64 170 +#define __NR_pivot_root 171 +#define __NR_mincore 172 +#define __NR_madvise 173 +#define __NR_getdents64 174 +#define __NR_fcntl64 175 +#define __NR_gettid 176 +#define __NR_readahead 177 +#define __NR_setxattr 178 +#define __NR_lsetxattr 179 +#define __NR_fsetxattr 180 +#define __NR_getxattr 181 +#define __NR_lgetxattr 182 +#define __NR_fgetxattr 183 +#define __NR_listxattr 184 +#define __NR_llistxattr 185 +#define __NR_flistxattr 186 +#define __NR_removexattr 187 +#define __NR_lremovexattr 188 +#define __NR_fremovexattr 189 +#define __NR_tkill 190 +#define __NR_sendfile64 191 +#define __NR_futex 192 +#define __NR_sched_setaffinity 193 +#define __NR_sched_getaffinity 194 +#define __NR_capget 195 +#define __NR_capset 196 +#define __NR_io_setup 197 +#define __NR_io_destroy 198 +#define __NR_io_getevents 199 +#define __NR_io_submit 200 +#define __NR_io_cancel 201 +#define __NR_fadvise64 202 +#define __NR_exit_group 203 +#define __NR_lookup_dcookie 204 +#define __NR_epoll_create 205 +#define __NR_epoll_ctl 206 +#define __NR_epoll_wait 207 +#define __NR_remap_file_pages 208 +#define __NR_set_tid_address 209 +#define __NR_timer_create 210 +#define __NR_timer_settime 211 +#define __NR_timer_gettime 212 +#define __NR_timer_getoverrun 213 +#define __NR_timer_delete 214 +#define __NR_clock_settime 215 +#define __NR_clock_gettime 216 +#define __NR_clock_getres 217 +#define __NR_clock_nanosleep 218 +#define __NR_statfs64 219 +#define __NR_fstatfs64 220 +#define __NR_tgkill 221 +/* 222 reserved for tux */ +#define __NR_utimes 223 +#define __NR_fadvise64_64 224 +#define __NR_cacheflush 225 +#define __NR_vserver 226 +#define __NR_mq_open 227 +#define __NR_mq_unlink 228 +#define __NR_mq_timedsend 229 +#define __NR_mq_timedreceive 230 +#define __NR_mq_notify 231 +#define __NR_mq_getsetattr 232 +#define __NR_kexec_load 233 +#define __NR_waitid 234 +#define __NR_add_key 235 +#define __NR_request_key 236 +#define __NR_keyctl 237 +#define __NR_ioprio_set 238 +#define __NR_ioprio_get 239 +#define __NR_inotify_init 240 +#define __NR_inotify_add_watch 241 +#define __NR_inotify_rm_watch 242 +#define __NR_openat 243 +#define __NR_mkdirat 244 +#define __NR_mknodat 245 +#define __NR_fchownat 246 +#define __NR_futimesat 247 +#define __NR_fstatat64 248 +#define __NR_unlinkat 249 +#define __NR_renameat 250 +#define __NR_linkat 251 +#define __NR_symlinkat 252 +#define __NR_readlinkat 253 +#define __NR_fchmodat 254 +#define __NR_faccessat 255 +#define __NR_pselect6 256 +#define __NR_ppoll 257 +#define __NR_unshare 258 +#define __NR_set_robust_list 259 +#define __NR_get_robust_list 260 +#define __NR_splice 261 +#define __NR_sync_file_range 262 +#define __NR_tee 263 +#define __NR_vmsplice 264 +#define __NR_epoll_pwait 265 +#define __NR_msgget 266 +#define __NR_msgsnd 267 +#define __NR_msgrcv 268 +#define __NR_msgctl 269 +#define __NR_semget 270 +#define __NR_semop 271 +#define __NR_semctl 272 +#define __NR_semtimedop 273 +#define __NR_shmat 274 +#define __NR_shmget 275 +#define __NR_shmdt 276 +#define __NR_shmctl 277 +#define __NR_utimensat 278 +#define __NR_signalfd 279 /* 280 was __NR_timerfd */ -#define __NR_eventfd 281 -#define __NR_setns 283 -#define __NR_pread64 284 -#define __NR_pwrite64 285 -#define __NR_timerfd_create 286 -#define __NR_fallocate 287 -#define __NR_timerfd_settime 288 -#define __NR_timerfd_gettime 289 -#define __NR_signalfd4 290 -#define __NR_eventfd2 291 -#define __NR_epoll_create1 292 -#define __NR_dup3 293 -#define __NR_pipe2 294 -#define __NR_inotify_init1 295 -#define __NR_preadv 296 -#define __NR_pwritev 297 -#define __NR_rt_tgsigqueueinfo 298 -#define __NR_perf_event_open 299 -#define __NR_recvmmsg 300 -#define __NR_fanotify_init 301 -#define __NR_fanotify_mark 302 -#define __NR_prlimit64 303 -#define __NR_name_to_handle_at 304 -#define __NR_open_by_handle_at 305 -#define __NR_clock_adjtime 306 -#define __NR_syncfs 307 -#define __NR_sendmmsg 308 -#define __NR_process_vm_readv 309 -#define __NR_process_vm_writev 310 -#define __NR_kcmp 311 -#define __NR_finit_module 312 -#define __NR_sched_setattr 313 -#define __NR_sched_getattr 314 -#define __NR_renameat2 315 -#define __NR_seccomp 316 -#define __NR_getrandom 317 -#define __NR_memfd_create 318 -#define __NR_bpf 319 -#define __NR_execveat 320 -#define __NR_accept4 321 -#define __NR_userfaultfd 322 -#define __NR_membarrier 323 -#define __NR_mlock2 324 +#define __NR_eventfd 281 +/* 282 was half-implemented __NR_recvmmsg */ +#define __NR_setns 283 +#define __NR_pread64 284 +#define __NR_pwrite64 285 +#define __NR_timerfd_create 286 +#define __NR_fallocate 287 +#define __NR_timerfd_settime 288 +#define __NR_timerfd_gettime 289 +#define __NR_signalfd4 290 +#define __NR_eventfd2 291 +#define __NR_epoll_create1 292 +#define __NR_dup3 293 +#define __NR_pipe2 294 +#define __NR_inotify_init1 295 +#define __NR_preadv 296 +#define __NR_pwritev 297 +#define __NR_rt_tgsigqueueinfo 298 +#define __NR_perf_event_open 299 +#define __NR_recvmmsg 300 +#define __NR_fanotify_init 301 +#define __NR_fanotify_mark 302 +#define __NR_prlimit64 303 +#define __NR_name_to_handle_at 304 +#define __NR_open_by_handle_at 305 +#define __NR_clock_adjtime 306 +#define __NR_syncfs 307 +#define __NR_sendmmsg 308 +#define __NR_process_vm_readv 309 +#define __NR_process_vm_writev 310 +#define __NR_kcmp 311 +#define __NR_finit_module 312 +#define __NR_sched_setattr 313 +#define __NR_sched_getattr 314 +#define __NR_renameat2 315 +#define __NR_seccomp 316 +#define __NR_getrandom 317 +#define __NR_memfd_create 318 +#define __NR_bpf 319 +#define __NR_execveat 320 +#define __NR_accept4 321 +#define __NR_userfaultfd 322 +#define __NR_membarrier 323 +#define __NR_mlock2 324 #define __NR_copy_file_range 325 +#define __NR_preadv2 326 +#define __NR_pwritev2 327 #endif /* _UAPI__ASM_AVR32_UNISTD_H */ diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S index cb3991552f14..cb256534ed92 100644 --- a/arch/avr32/kernel/syscall-stubs.S +++ b/arch/avr32/kernel/syscall-stubs.S @@ -133,3 +133,21 @@ __sys_copy_file_range: call sys_copy_file_range sub sp, -4 popm pc + + .global __sys_preadv2 + .type __sys_preadv2,@function +__sys_preadv2: + pushm lr + st.w --sp, ARG6 + call sys_preadv2 + sub sp, -4 + popm pc + + .global __sys_pwritev2 + .type __sys_pwritev2,@function +__sys_pwritev2: + pushm lr + st.w --sp, ARG6 + call sys_pwritev2 + sub sp, -4 + popm pc diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S index 64d71a781fa8..7b348ba70e41 100644 --- a/arch/avr32/kernel/syscall_table.S +++ b/arch/avr32/kernel/syscall_table.S @@ -9,334 +9,336 @@ */ .section .rodata,"a",@progbits - .type sys_call_table,@object - .global sys_call_table - .align 2 + .type sys_call_table,@object + .global sys_call_table + .align 2 sys_call_table: - .long sys_restart_syscall - .long sys_exit - .long sys_fork - .long sys_read - .long sys_write - .long sys_open /* 5 */ - .long sys_close - .long sys_umask - .long sys_creat - .long sys_link - .long sys_unlink /* 10 */ - .long sys_execve - .long sys_chdir - .long sys_time - .long sys_mknod - .long sys_chmod /* 15 */ - .long sys_chown - .long sys_lchown - .long sys_lseek - .long sys_llseek - .long sys_getpid /* 20 */ - .long sys_mount - .long sys_umount - .long sys_setuid - .long sys_getuid - .long sys_stime /* 25 */ - .long sys_ptrace - .long sys_alarm - .long sys_pause - .long sys_utime - .long sys_newstat /* 30 */ - .long sys_newfstat - .long sys_newlstat - .long sys_access - .long sys_chroot - .long sys_sync /* 35 */ - .long sys_fsync - .long sys_kill - .long sys_rename - .long sys_mkdir - .long sys_rmdir /* 40 */ - .long sys_dup - .long sys_pipe - .long sys_times - .long sys_clone - .long sys_brk /* 45 */ - .long sys_setgid - .long sys_getgid - .long sys_getcwd - .long sys_geteuid - .long sys_getegid /* 50 */ - .long sys_acct - .long sys_setfsuid - .long sys_setfsgid - .long sys_ioctl - .long sys_fcntl /* 55 */ - .long sys_setpgid - .long sys_mremap - .long sys_setresuid - .long sys_getresuid - .long sys_setreuid /* 60 */ - .long sys_setregid - .long sys_ustat - .long sys_dup2 - .long sys_getppid - .long sys_getpgrp /* 65 */ - .long sys_setsid - .long sys_rt_sigaction - .long __sys_rt_sigreturn - .long sys_rt_sigprocmask - .long sys_rt_sigpending /* 70 */ - .long sys_rt_sigtimedwait - .long sys_rt_sigqueueinfo - .long __sys_rt_sigsuspend - .long sys_sethostname - .long sys_setrlimit /* 75 */ - .long sys_getrlimit - .long sys_getrusage - .long sys_gettimeofday - .long sys_settimeofday - .long sys_getgroups /* 80 */ - .long sys_setgroups - .long sys_select - .long sys_symlink - .long sys_fchdir - .long sys_readlink /* 85 */ - .long sys_pread64 - .long sys_pwrite64 - .long sys_swapon - .long sys_reboot - .long __sys_mmap2 /* 90 */ - .long sys_munmap - .long sys_truncate - .long sys_ftruncate - .long sys_fchmod - .long sys_fchown /* 95 */ - .long sys_getpriority - .long sys_setpriority - .long sys_wait4 - .long sys_statfs - .long sys_fstatfs /* 100 */ - .long sys_vhangup - .long sys_sigaltstack - .long sys_syslog - .long sys_setitimer - .long sys_getitimer /* 105 */ - .long sys_swapoff - .long sys_sysinfo - .long sys_ni_syscall /* was sys_ipc briefly */ - .long sys_sendfile - .long sys_setdomainname /* 110 */ - .long sys_newuname - .long sys_adjtimex - .long sys_mprotect - .long sys_vfork - .long sys_init_module /* 115 */ - .long sys_delete_module - .long sys_quotactl - .long sys_getpgid - .long sys_bdflush - .long sys_sysfs /* 120 */ - .long sys_personality - .long sys_ni_syscall /* reserved for afs_syscall */ - .long sys_getdents - .long sys_flock - .long sys_msync /* 125 */ - .long sys_readv - .long sys_writev - .long sys_getsid - .long sys_fdatasync - .long sys_sysctl /* 130 */ - .long sys_mlock - .long sys_munlock - .long sys_mlockall - .long sys_munlockall - .long sys_sched_setparam /* 135 */ - .long sys_sched_getparam - .long sys_sched_setscheduler - .long sys_sched_getscheduler - .long sys_sched_yield - .long sys_sched_get_priority_max /* 140 */ - .long sys_sched_get_priority_min - .long sys_sched_rr_get_interval - .long sys_nanosleep - .long sys_poll - .long sys_ni_syscall /* 145 was nfsservctl */ - .long sys_setresgid - .long sys_getresgid - .long sys_prctl - .long sys_socket - .long sys_bind /* 150 */ - .long sys_connect - .long sys_listen - .long sys_accept - .long sys_getsockname - .long sys_getpeername /* 155 */ - .long sys_socketpair - .long sys_send - .long sys_recv - .long __sys_sendto - .long __sys_recvfrom /* 160 */ - .long sys_shutdown - .long sys_setsockopt - .long sys_getsockopt - .long sys_sendmsg - .long sys_recvmsg /* 165 */ - .long sys_truncate64 - .long sys_ftruncate64 - .long sys_stat64 - .long sys_lstat64 - .long sys_fstat64 /* 170 */ - .long sys_pivot_root - .long sys_mincore - .long sys_madvise - .long sys_getdents64 - .long sys_fcntl64 /* 175 */ - .long sys_gettid - .long sys_readahead - .long sys_setxattr - .long sys_lsetxattr - .long sys_fsetxattr /* 180 */ - .long sys_getxattr - .long sys_lgetxattr - .long sys_fgetxattr - .long sys_listxattr - .long sys_llistxattr /* 185 */ - .long sys_flistxattr - .long sys_removexattr - .long sys_lremovexattr - .long sys_fremovexattr - .long sys_tkill /* 190 */ - .long sys_sendfile64 - .long sys_futex - .long sys_sched_setaffinity - .long sys_sched_getaffinity - .long sys_capget /* 195 */ - .long sys_capset - .long sys_io_setup - .long sys_io_destroy - .long sys_io_getevents - .long sys_io_submit /* 200 */ - .long sys_io_cancel - .long sys_fadvise64 - .long sys_exit_group - .long sys_lookup_dcookie - .long sys_epoll_create /* 205 */ - .long sys_epoll_ctl - .long sys_epoll_wait - .long sys_remap_file_pages - .long sys_set_tid_address - .long sys_timer_create /* 210 */ - .long sys_timer_settime - .long sys_timer_gettime - .long sys_timer_getoverrun - .long sys_timer_delete - .long sys_clock_settime /* 215 */ - .long sys_clock_gettime - .long sys_clock_getres - .long sys_clock_nanosleep - .long sys_statfs64 - .long sys_fstatfs64 /* 220 */ - .long sys_tgkill - .long sys_ni_syscall /* reserved for TUX */ - .long sys_utimes - .long sys_fadvise64_64 - .long sys_cacheflush /* 225 */ - .long sys_ni_syscall /* sys_vserver */ - .long sys_mq_open - .long sys_mq_unlink - .long sys_mq_timedsend - .long sys_mq_timedreceive /* 230 */ - .long sys_mq_notify - .long sys_mq_getsetattr - .long sys_kexec_load - .long sys_waitid - .long sys_add_key /* 235 */ - .long sys_request_key - .long sys_keyctl - .long sys_ioprio_set - .long sys_ioprio_get - .long sys_inotify_init /* 240 */ - .long sys_inotify_add_watch - .long sys_inotify_rm_watch - .long sys_openat - .long sys_mkdirat - .long sys_mknodat /* 245 */ - .long sys_fchownat - .long sys_futimesat - .long sys_fstatat64 - .long sys_unlinkat - .long sys_renameat /* 250 */ - .long sys_linkat - .long sys_symlinkat - .long sys_readlinkat - .long sys_fchmodat - .long sys_faccessat /* 255 */ - .long __sys_pselect6 - .long sys_ppoll - .long sys_unshare - .long sys_set_robust_list - .long sys_get_robust_list /* 260 */ - .long __sys_splice - .long __sys_sync_file_range - .long sys_tee - .long sys_vmsplice - .long __sys_epoll_pwait /* 265 */ - .long sys_msgget - .long sys_msgsnd - .long sys_msgrcv - .long sys_msgctl - .long sys_semget /* 270 */ - .long sys_semop - .long sys_semctl - .long sys_semtimedop - .long sys_shmat - .long sys_shmget /* 275 */ - .long sys_shmdt - .long sys_shmctl - .long sys_utimensat - .long sys_signalfd - .long sys_ni_syscall /* 280, was sys_timerfd */ - .long sys_eventfd - .long sys_recvmmsg - .long sys_setns - .long sys_pread64 - .long sys_pwrite64 /* 285 */ - .long sys_timerfd_create - .long __sys_fallocate - .long sys_timerfd_settime - .long sys_timerfd_gettime - .long sys_signalfd4 /* 290 */ - .long sys_eventfd2 - .long sys_epoll_create1 - .long sys_dup3 - .long sys_pipe2 - .long sys_inotify_init1 /* 295 */ - .long sys_preadv - .long sys_pwritev - .long sys_rt_tgsigqueueinfo - .long sys_perf_event_open - .long sys_recvmmsg /* 300 */ - .long sys_fanotify_init - .long __sys_fanotify_mark - .long sys_prlimit64 - .long sys_name_to_handle_at - .long sys_open_by_handle_at /* 305 */ - .long sys_clock_adjtime - .long sys_syncfs - .long sys_sendmmsg - .long __sys_process_vm_readv - .long __sys_process_vm_writev /* 310 */ - .long sys_kcmp - .long sys_finit_module - .long sys_sched_setattr - .long sys_sched_getattr - .long sys_renameat2 /* 315 */ - .long sys_seccomp - .long sys_getrandom - .long sys_memfd_create - .long sys_bpf - .long sys_execveat /* 320 */ - .long sys_accept4 - .long sys_userfaultfd - .long sys_membarrier - .long sys_mlock2 - .long __sys_copy_file_range /* 325 */ - .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ + .long sys_restart_syscall + .long sys_exit + .long sys_fork + .long sys_read + .long sys_write + .long sys_open + .long sys_close + .long sys_umask + .long sys_creat + .long sys_link + .long sys_unlink /* 10 */ + .long sys_execve + .long sys_chdir + .long sys_time + .long sys_mknod + .long sys_chmod + .long sys_chown + .long sys_lchown + .long sys_lseek + .long sys_llseek + .long sys_getpid /* 20 */ + .long sys_mount + .long sys_umount + .long sys_setuid + .long sys_getuid + .long sys_stime + .long sys_ptrace + .long sys_alarm + .long sys_pause + .long sys_utime + .long sys_newstat /* 30 */ + .long sys_newfstat + .long sys_newlstat + .long sys_access + .long sys_chroot + .long sys_sync + .long sys_fsync + .long sys_kill + .long sys_rename + .long sys_mkdir + .long sys_rmdir /* 40 */ + .long sys_dup + .long sys_pipe + .long sys_times + .long sys_clone + .long sys_brk + .long sys_setgid + .long sys_getgid + .long sys_getcwd + .long sys_geteuid + .long sys_getegid /* 50 */ + .long sys_acct + .long sys_setfsuid + .long sys_setfsgid + .long sys_ioctl + .long sys_fcntl + .long sys_setpgid + .long sys_mremap + .long sys_setresuid + .long sys_getresuid + .long sys_setreuid /* 60 */ + .long sys_setregid + .long sys_ustat + .long sys_dup2 + .long sys_getppid + .long sys_getpgrp + .long sys_setsid + .long sys_rt_sigaction + .long __sys_rt_sigreturn + .long sys_rt_sigprocmask + .long sys_rt_sigpending /* 70 */ + .long sys_rt_sigtimedwait + .long sys_rt_sigqueueinfo + .long __sys_rt_sigsuspend + .long sys_sethostname + .long sys_setrlimit + .long sys_getrlimit + .long sys_getrusage + .long sys_gettimeofday + .long sys_settimeofday + .long sys_getgroups /* 80 */ + .long sys_setgroups + .long sys_select + .long sys_symlink + .long sys_fchdir + .long sys_readlink + .long sys_pread64 + .long sys_pwrite64 + .long sys_swapon + .long sys_reboot + .long __sys_mmap2 /* 90 */ + .long sys_munmap + .long sys_truncate + .long sys_ftruncate + .long sys_fchmod + .long sys_fchown + .long sys_getpriority + .long sys_setpriority + .long sys_wait4 + .long sys_statfs + .long sys_fstatfs /* 100 */ + .long sys_vhangup + .long sys_sigaltstack + .long sys_syslog + .long sys_setitimer + .long sys_getitimer + .long sys_swapoff + .long sys_sysinfo + .long sys_ni_syscall /* was sys_ipc briefly */ + .long sys_sendfile + .long sys_setdomainname /* 110 */ + .long sys_newuname + .long sys_adjtimex + .long sys_mprotect + .long sys_vfork + .long sys_init_module + .long sys_delete_module + .long sys_quotactl + .long sys_getpgid + .long sys_bdflush + .long sys_sysfs /* 120 */ + .long sys_personality + .long sys_ni_syscall /* reserved for afs_syscall */ + .long sys_getdents + .long sys_flock + .long sys_msync + .long sys_readv + .long sys_writev + .long sys_getsid + .long sys_fdatasync + .long sys_sysctl /* 130 */ + .long sys_mlock + .long sys_munlock + .long sys_mlockall + .long sys_munlockall + .long sys_sched_setparam + .long sys_sched_getparam + .long sys_sched_setscheduler + .long sys_sched_getscheduler + .long sys_sched_yield + .long sys_sched_get_priority_max /* 140 */ + .long sys_sched_get_priority_min + .long sys_sched_rr_get_interval + .long sys_nanosleep + .long sys_poll + .long sys_ni_syscall /* 145 was nfsservctl */ + .long sys_setresgid + .long sys_getresgid + .long sys_prctl + .long sys_socket + .long sys_bind /* 150 */ + .long sys_connect + .long sys_listen + .long sys_accept + .long sys_getsockname + .long sys_getpeername + .long sys_socketpair + .long sys_send + .long sys_recv + .long __sys_sendto + .long __sys_recvfrom /* 160 */ + .long sys_shutdown + .long sys_setsockopt + .long sys_getsockopt + .long sys_sendmsg + .long sys_recvmsg + .long sys_truncate64 + .long sys_ftruncate64 + .long sys_stat64 + .long sys_lstat64 + .long sys_fstat64 /* 170 */ + .long sys_pivot_root + .long sys_mincore + .long sys_madvise + .long sys_getdents64 + .long sys_fcntl64 + .long sys_gettid + .long sys_readahead + .long sys_setxattr + .long sys_lsetxattr + .long sys_fsetxattr /* 180 */ + .long sys_getxattr + .long sys_lgetxattr + .long sys_fgetxattr + .long sys_listxattr + .long sys_llistxattr + .long sys_flistxattr + .long sys_removexattr + .long sys_lremovexattr + .long sys_fremovexattr + .long sys_tkill /* 190 */ + .long sys_sendfile64 + .long sys_futex + .long sys_sched_setaffinity + .long sys_sched_getaffinity + .long sys_capget + .long sys_capset + .long sys_io_setup + .long sys_io_destroy + .long sys_io_getevents + .long sys_io_submit /* 200 */ + .long sys_io_cancel + .long sys_fadvise64 + .long sys_exit_group + .long sys_lookup_dcookie + .long sys_epoll_create + .long sys_epoll_ctl + .long sys_epoll_wait + .long sys_remap_file_pages + .long sys_set_tid_address + .long sys_timer_create /* 210 */ + .long sys_timer_settime + .long sys_timer_gettime + .long sys_timer_getoverrun + .long sys_timer_delete + .long sys_clock_settime + .long sys_clock_gettime + .long sys_clock_getres + .long sys_clock_nanosleep + .long sys_statfs64 + .long sys_fstatfs64 /* 220 */ + .long sys_tgkill + .long sys_ni_syscall /* reserved for TUX */ + .long sys_utimes + .long sys_fadvise64_64 + .long sys_cacheflush + .long sys_ni_syscall /* sys_vserver */ + .long sys_mq_open + .long sys_mq_unlink + .long sys_mq_timedsend + .long sys_mq_timedreceive /* 230 */ + .long sys_mq_notify + .long sys_mq_getsetattr + .long sys_kexec_load + .long sys_waitid + .long sys_add_key + .long sys_request_key + .long sys_keyctl + .long sys_ioprio_set + .long sys_ioprio_get + .long sys_inotify_init /* 240 */ + .long sys_inotify_add_watch + .long sys_inotify_rm_watch + .long sys_openat + .long sys_mkdirat + .long sys_mknodat + .long sys_fchownat + .long sys_futimesat + .long sys_fstatat64 + .long sys_unlinkat + .long sys_renameat /* 250 */ + .long sys_linkat + .long sys_symlinkat + .long sys_readlinkat + .long sys_fchmodat + .long sys_faccessat + .long __sys_pselect6 + .long sys_ppoll + .long sys_unshare + .long sys_set_robust_list + .long sys_get_robust_list /* 260 */ + .long __sys_splice + .long __sys_sync_file_range + .long sys_tee + .long sys_vmsplice + .long __sys_epoll_pwait + .long sys_msgget + .long sys_msgsnd + .long sys_msgrcv + .long sys_msgctl + .long sys_semget /* 270 */ + .long sys_semop + .long sys_semctl + .long sys_semtimedop + .long sys_shmat + .long sys_shmget + .long sys_shmdt + .long sys_shmctl + .long sys_utimensat + .long sys_signalfd + .long sys_ni_syscall /* 280, was sys_timerfd */ + .long sys_eventfd + .long sys_ni_syscall /* 282, was half-implemented recvmmsg */ + .long sys_setns + .long sys_pread64 + .long sys_pwrite64 + .long sys_timerfd_create + .long __sys_fallocate + .long sys_timerfd_settime + .long sys_timerfd_gettime + .long sys_signalfd4 /* 290 */ + .long sys_eventfd2 + .long sys_epoll_create1 + .long sys_dup3 + .long sys_pipe2 + .long sys_inotify_init1 + .long sys_preadv + .long sys_pwritev + .long sys_rt_tgsigqueueinfo + .long sys_perf_event_open + .long sys_recvmmsg /* 300 */ + .long sys_fanotify_init + .long __sys_fanotify_mark + .long sys_prlimit64 + .long sys_name_to_handle_at + .long sys_open_by_handle_at + .long sys_clock_adjtime + .long sys_syncfs + .long sys_sendmmsg + .long __sys_process_vm_readv + .long __sys_process_vm_writev /* 310 */ + .long sys_kcmp + .long sys_finit_module + .long sys_sched_setattr + .long sys_sched_getattr + .long sys_renameat2 + .long sys_seccomp + .long sys_getrandom + .long sys_memfd_create + .long sys_bpf + .long sys_execveat /* 320 */ + .long sys_accept4 + .long sys_userfaultfd + .long sys_membarrier + .long sys_mlock2 + .long __sys_copy_file_range + .long __sys_preadv2 + .long __sys_pwritev2 + .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c index 83c2a0021b56..13d3fc4270b7 100644 --- a/arch/avr32/mach-at32ap/pio.c +++ b/arch/avr32/mach-at32ap/pio.c @@ -435,7 +435,7 @@ void __init at32_init_pio(struct platform_device *pdev) struct resource *regs; struct pio_device *pio; - if (pdev->id > MAX_NR_PIO_DEVICES) { + if (pdev->id >= MAX_NR_PIO_DEVICES) { dev_err(&pdev->dev, "only %d PIO devices supported\n", MAX_NR_PIO_DEVICES); return; diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index c03533937a9f..a4b7edac8f10 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -134,7 +134,7 @@ good_area: * sure we exit gracefully rather than endlessly redo the * fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c index 170d786807c4..6355e97d22b9 100644 --- a/arch/blackfin/kernel/perf_event.c +++ b/arch/blackfin/kernel/perf_event.c @@ -453,29 +453,13 @@ static struct pmu pmu = { .read = bfin_pmu_read, }; -static void bfin_pmu_setup(int cpu) +static int bfin_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + bfin_write_PFCTL(0); memset(cpuhw, 0, sizeof(struct cpu_hw_events)); -} - -static int -bfin_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - bfin_write_PFCTL(0); - bfin_pmu_setup(cpu); - break; - - default: - break; - } - - return NOTIFY_OK; + return 0; } static int __init bfin_pmu_init(void) @@ -491,8 +475,8 @@ static int __init bfin_pmu_init(void) ret = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); if (!ret) - perf_cpu_notifier(bfin_pmu_notifier); - + cpuhp_setup_state(CPUHP_PERF_BFIN, "PERF_BFIN", + bfin_pmu_prepare_cpu, NULL); return ret; } early_initcall(bfin_pmu_init); diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index aad5d7416886..9231e5a72b93 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -1002,14 +1002,12 @@ static struct adv7842_output_format adv7842_opf[] = { { .op_ch_sel = ADV7842_OP_CH_SEL_BRG, .op_format_sel = ADV7842_OP_FORMAT_SEL_SDR_ITU656_8, - .op_656_range = 1, .blank_data = 1, .insert_av_codes = 1, }, { .op_ch_sel = ADV7842_OP_CH_SEL_RGB, .op_format_sel = ADV7842_OP_FORMAT_SEL_SDR_ITU656_16, - .op_656_range = 1, .blank_data = 1, }, }; diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 3066d40a6db1..112ef26c7f2e 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -168,7 +168,7 @@ retry: * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 61d99767fe16..614a46c413d2 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -164,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, ear0, flags); + fault = handle_mm_fault(vma, ear0, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 57298e7b4867..1941e4baaee6 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -8,8 +8,7 @@ config HEXAGON # select HAVE_REGS_AND_STACK_ACCESS_API # select HAVE_HW_BREAKPOINT if PERF_EVENTS # select ARCH_HAS_CPU_IDLE_WAIT - # select ARCH_WANT_OPTIONAL_GPIOLIB - # select ARCH_REQUIRE_GPIOLIB + # select GPIOLIB # select HAVE_CLK # select GENERIC_PENDING_IRQ if SMP select GENERIC_ATOMIC64 diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index 8704c9320032..bd7c251e2bce 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -101,7 +101,7 @@ good_area: break; } - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index e109ee95e919..6a15083cc366 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -39,7 +39,6 @@ config IA64 select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW select GENERIC_IRQ_LEGACY - select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP select GENERIC_SMP_IDLE_THREAD diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index aa0fdf125aba..a3d0211970e9 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -140,6 +140,9 @@ static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus) } } } + +extern void acpi_numa_fixup(void); + #endif /* CONFIG_ACPI_NUMA */ #endif /*__KERNEL__*/ diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 39d64e0df1de..77e541cf0e5d 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -205,17 +205,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) * must be delayed until after the TLB has been flushed (see comments at the beginning of * this file). */ -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { + if (tlb->nr == tlb->max) + return true; + tlb->need_flush = 1; if (!tlb->nr && tlb->pages == tlb->local) __tlb_alloc_page(tlb); tlb->pages[tlb->nr++] = page; - VM_BUG_ON(tlb->nr > tlb->max); - - return tlb->max - tlb->nr; + return false; } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -235,8 +236,28 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (!__tlb_remove_page(tlb, page)) + if (__tlb_remove_page(tlb, page)) { tlb_flush_mmu(tlb); + __tlb_remove_page(tlb, page); + } +} + +static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return __tlb_remove_page(tlb, page); +} + +static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, + struct page *page) +{ + return __tlb_remove_page(tlb, page); +} + +static inline void tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return tlb_remove_page(tlb, page); } /* diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index b1698bc042c8..92b7bc956795 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -524,7 +524,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) return 0; } -void __init acpi_numa_arch_fixup(void) +void __init acpi_numa_fixup(void) { int i, j, node_from, node_to; diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 2029a38a72ae..afddb3e80a29 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -552,6 +552,7 @@ setup_arch (char **cmdline_p) early_acpi_boot_init(); # ifdef CONFIG_ACPI_NUMA acpi_numa_init(); + acpi_numa_fixup(); # ifdef CONFIG_ACPI_HOTPLUG_CPU prefill_possible_map(); # endif diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 70b40d1205a6..fa6ad95e992e 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -159,7 +159,7 @@ retry: * sure we exit gracefully rather than endlessly redo the * fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c index b727e693c805..23f26f4adfff 100644 --- a/arch/m32r/kernel/m32r_ksyms.c +++ b/arch/m32r/kernel/m32r_ksyms.c @@ -41,6 +41,9 @@ EXPORT_SYMBOL(cpu_data); EXPORT_SYMBOL(smp_flush_tlb_page); #endif +extern int __ucmpdi2(unsigned long long a, unsigned long long b); +EXPORT_SYMBOL(__ucmpdi2); + /* compiler generated symbol */ extern void __ashldi3(void); extern void __ashrdi3(void); diff --git a/arch/m32r/lib/Makefile b/arch/m32r/lib/Makefile index d16b4e40d1ae..5889eb9610b5 100644 --- a/arch/m32r/lib/Makefile +++ b/arch/m32r/lib/Makefile @@ -3,5 +3,5 @@ # lib-y := checksum.o ashxdi3.o memset.o memcpy.o \ - delay.o strlen.o usercopy.o csum_partial_copy.o - + delay.o strlen.o usercopy.o csum_partial_copy.o \ + ucmpdi2.o diff --git a/arch/m32r/lib/libgcc.h b/arch/m32r/lib/libgcc.h new file mode 100644 index 000000000000..267aa435bc35 --- /dev/null +++ b/arch/m32r/lib/libgcc.h @@ -0,0 +1,23 @@ +#ifndef __ASM_LIBGCC_H +#define __ASM_LIBGCC_H + +#include <asm/byteorder.h> + +#ifdef __BIG_ENDIAN +struct DWstruct { + int high, low; +}; +#elif defined(__LITTLE_ENDIAN) +struct DWstruct { + int low, high; +}; +#else +#error I feel sick. +#endif + +typedef union { + struct DWstruct s; + long long ll; +} DWunion; + +#endif /* __ASM_LIBGCC_H */ diff --git a/arch/m32r/lib/ucmpdi2.c b/arch/m32r/lib/ucmpdi2.c new file mode 100644 index 000000000000..9d3c682c89b5 --- /dev/null +++ b/arch/m32r/lib/ucmpdi2.c @@ -0,0 +1,17 @@ +#include "libgcc.h" + +int __ucmpdi2(unsigned long long a, unsigned long long b) +{ + const DWunion au = {.ll = a}; + const DWunion bu = {.ll = b}; + + if ((unsigned int)au.s.high < (unsigned int)bu.s.high) + return 0; + else if ((unsigned int)au.s.high > (unsigned int)bu.s.high) + return 2; + if ((unsigned int)au.s.low < (unsigned int)bu.s.low) + return 0; + else if ((unsigned int)au.s.low > (unsigned int)bu.s.low) + return 2; + return 1; +} diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index 8f9875b7933d..a3785d3644c2 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -196,7 +196,7 @@ good_area: */ addr = (address & PAGE_MASK); set_thread_fault_code(error_code); - fault = handle_mm_fault(mm, vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 6a94cdd0c830..bd66a0b20c6b 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -136,7 +136,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); pr_debug("handle_mm_fault returns %d\n", fault); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 33a365f924be..052cba23708c 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -806,25 +806,16 @@ static struct metag_pmu _metag_pmu = { }; /* PMU CPU hotplug notifier */ -static int metag_pmu_cpu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) +static int metag_pmu_starting_cpu(unsigned int cpu) { - unsigned int cpu = (unsigned int)hcpu; struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - memset(cpuc, 0, sizeof(struct cpu_hw_events)); raw_spin_lock_init(&cpuc->pmu_lock); - return NOTIFY_OK; + return 0; } -static struct notifier_block metag_pmu_notifier = { - .notifier_call = metag_pmu_cpu_notify, -}; - /* PMU Initialisation */ static int __init init_hw_perf_events(void) { @@ -876,16 +867,13 @@ static int __init init_hw_perf_events(void) metag_out32(0, PERF_COUNT(0)); metag_out32(0, PERF_COUNT(1)); - for_each_possible_cpu(cpu) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + cpuhp_setup_state(CPUHP_AP_PERF_METAG_STARTING, + "AP_PERF_METAG_STARTING", metag_pmu_starting_cpu, + NULL); - memset(cpuc, 0, sizeof(struct cpu_hw_events)); - raw_spin_lock_init(&cpuc->pmu_lock); - } - - register_cpu_notifier(&metag_pmu_notifier); ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW); -out: + if (ret) + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_METAG_STARTING); return ret; } early_initcall(init_hw_perf_events); diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index f57edca63609..372783a67dda 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -133,7 +133,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return 0; diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 636e0720fb20..86f65721e629 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -3,7 +3,6 @@ config MICROBLAZE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_WANT_IPC_PARSE_VERSION - select ARCH_WANT_OPTIONAL_GPIOLIB select BUILDTIME_EXTABLE_SORT select CLKSRC_OF select CLONE_BACKWARDS3 diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 177dfc003643..abb678ccde6f 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -216,7 +216,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 7f50dd67aa8d..65f140e1e872 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -146,7 +146,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_BACKLIGHT=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index a9d433a17fcf..799c4338fd5e 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -147,7 +147,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_BACKLIGHT=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 2774ef064505..31846000530f 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -152,7 +152,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_BACKLIGHT=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 9bbd2218f0bf..a79107da0675 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -146,7 +146,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_BACKLIGHT=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig index f8bf9b4c1343..43d55e5abacb 100644 --- a/arch/mips/configs/rbtx49xx_defconfig +++ b/arch/mips/configs/rbtx49xx_defconfig @@ -90,7 +90,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y diff --git a/arch/mips/include/asm/octeon/cvmx-mpi-defs.h b/arch/mips/include/asm/octeon/cvmx-mpi-defs.h deleted file mode 100644 index 4615b102625b..000000000000 --- a/arch/mips/include/asm/octeon/cvmx-mpi-defs.h +++ /dev/null @@ -1,328 +0,0 @@ -/***********************license start*************** - * Author: Cavium Networks - * - * Contact: support@caviumnetworks.com - * This file is part of the OCTEON SDK - * - * Copyright (c) 2003-2012 Cavium Networks - * - * This file is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, Version 2, as - * published by the Free Software Foundation. - * - * This file is distributed in the hope that it will be useful, but - * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or - * NONINFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this file; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * or visit http://www.gnu.org/licenses/. - * - * This file may also be available under a different license from Cavium. - * Contact Cavium Networks for more information - ***********************license end**************************************/ - -#ifndef __CVMX_MPI_DEFS_H__ -#define __CVMX_MPI_DEFS_H__ - -#define CVMX_MPI_CFG (CVMX_ADD_IO_SEG(0x0001070000001000ull)) -#define CVMX_MPI_DATX(offset) (CVMX_ADD_IO_SEG(0x0001070000001080ull) + ((offset) & 15) * 8) -#define CVMX_MPI_STS (CVMX_ADD_IO_SEG(0x0001070000001008ull)) -#define CVMX_MPI_TX (CVMX_ADD_IO_SEG(0x0001070000001010ull)) - -union cvmx_mpi_cfg { - uint64_t u64; - struct cvmx_mpi_cfg_s { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_29_63:35; - uint64_t clkdiv:13; - uint64_t csena3:1; - uint64_t csena2:1; - uint64_t csena1:1; - uint64_t csena0:1; - uint64_t cslate:1; - uint64_t tritx:1; - uint64_t idleclks:2; - uint64_t cshi:1; - uint64_t csena:1; - uint64_t int_ena:1; - uint64_t lsbfirst:1; - uint64_t wireor:1; - uint64_t clk_cont:1; - uint64_t idlelo:1; - uint64_t enable:1; -#else - uint64_t enable:1; - uint64_t idlelo:1; - uint64_t clk_cont:1; - uint64_t wireor:1; - uint64_t lsbfirst:1; - uint64_t int_ena:1; - uint64_t csena:1; - uint64_t cshi:1; - uint64_t idleclks:2; - uint64_t tritx:1; - uint64_t cslate:1; - uint64_t csena0:1; - uint64_t csena1:1; - uint64_t csena2:1; - uint64_t csena3:1; - uint64_t clkdiv:13; - uint64_t reserved_29_63:35; -#endif - } s; - struct cvmx_mpi_cfg_cn30xx { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_29_63:35; - uint64_t clkdiv:13; - uint64_t reserved_12_15:4; - uint64_t cslate:1; - uint64_t tritx:1; - uint64_t idleclks:2; - uint64_t cshi:1; - uint64_t csena:1; - uint64_t int_ena:1; - uint64_t lsbfirst:1; - uint64_t wireor:1; - uint64_t clk_cont:1; - uint64_t idlelo:1; - uint64_t enable:1; -#else - uint64_t enable:1; - uint64_t idlelo:1; - uint64_t clk_cont:1; - uint64_t wireor:1; - uint64_t lsbfirst:1; - uint64_t int_ena:1; - uint64_t csena:1; - uint64_t cshi:1; - uint64_t idleclks:2; - uint64_t tritx:1; - uint64_t cslate:1; - uint64_t reserved_12_15:4; - uint64_t clkdiv:13; - uint64_t reserved_29_63:35; -#endif - } cn30xx; - struct cvmx_mpi_cfg_cn31xx { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_29_63:35; - uint64_t clkdiv:13; - uint64_t reserved_11_15:5; - uint64_t tritx:1; - uint64_t idleclks:2; - uint64_t cshi:1; - uint64_t csena:1; - uint64_t int_ena:1; - uint64_t lsbfirst:1; - uint64_t wireor:1; - uint64_t clk_cont:1; - uint64_t idlelo:1; - uint64_t enable:1; -#else - uint64_t enable:1; - uint64_t idlelo:1; - uint64_t clk_cont:1; - uint64_t wireor:1; - uint64_t lsbfirst:1; - uint64_t int_ena:1; - uint64_t csena:1; - uint64_t cshi:1; - uint64_t idleclks:2; - uint64_t tritx:1; - uint64_t reserved_11_15:5; - uint64_t clkdiv:13; - uint64_t reserved_29_63:35; -#endif - } cn31xx; - struct cvmx_mpi_cfg_cn30xx cn50xx; - struct cvmx_mpi_cfg_cn61xx { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_29_63:35; - uint64_t clkdiv:13; - uint64_t reserved_14_15:2; - uint64_t csena1:1; - uint64_t csena0:1; - uint64_t cslate:1; - uint64_t tritx:1; - uint64_t idleclks:2; - uint64_t cshi:1; - uint64_t reserved_6_6:1; - uint64_t int_ena:1; - uint64_t lsbfirst:1; - uint64_t wireor:1; - uint64_t clk_cont:1; - uint64_t idlelo:1; - uint64_t enable:1; -#else - uint64_t enable:1; - uint64_t idlelo:1; - uint64_t clk_cont:1; - uint64_t wireor:1; - uint64_t lsbfirst:1; - uint64_t int_ena:1; - uint64_t reserved_6_6:1; - uint64_t cshi:1; - uint64_t idleclks:2; - uint64_t tritx:1; - uint64_t cslate:1; - uint64_t csena0:1; - uint64_t csena1:1; - uint64_t reserved_14_15:2; - uint64_t clkdiv:13; - uint64_t reserved_29_63:35; -#endif - } cn61xx; - struct cvmx_mpi_cfg_cn66xx { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_29_63:35; - uint64_t clkdiv:13; - uint64_t csena3:1; - uint64_t csena2:1; - uint64_t reserved_12_13:2; - uint64_t cslate:1; - uint64_t tritx:1; - uint64_t idleclks:2; - uint64_t cshi:1; - uint64_t reserved_6_6:1; - uint64_t int_ena:1; - uint64_t lsbfirst:1; - uint64_t wireor:1; - uint64_t clk_cont:1; - uint64_t idlelo:1; - uint64_t enable:1; -#else - uint64_t enable:1; - uint64_t idlelo:1; - uint64_t clk_cont:1; - uint64_t wireor:1; - uint64_t lsbfirst:1; - uint64_t int_ena:1; - uint64_t reserved_6_6:1; - uint64_t cshi:1; - uint64_t idleclks:2; - uint64_t tritx:1; - uint64_t cslate:1; - uint64_t reserved_12_13:2; - uint64_t csena2:1; - uint64_t csena3:1; - uint64_t clkdiv:13; - uint64_t reserved_29_63:35; -#endif - } cn66xx; - struct cvmx_mpi_cfg_cn61xx cnf71xx; -}; - -union cvmx_mpi_datx { - uint64_t u64; - struct cvmx_mpi_datx_s { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_8_63:56; - uint64_t data:8; -#else - uint64_t data:8; - uint64_t reserved_8_63:56; -#endif - } s; - struct cvmx_mpi_datx_s cn30xx; - struct cvmx_mpi_datx_s cn31xx; - struct cvmx_mpi_datx_s cn50xx; - struct cvmx_mpi_datx_s cn61xx; - struct cvmx_mpi_datx_s cn66xx; - struct cvmx_mpi_datx_s cnf71xx; -}; - -union cvmx_mpi_sts { - uint64_t u64; - struct cvmx_mpi_sts_s { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_13_63:51; - uint64_t rxnum:5; - uint64_t reserved_1_7:7; - uint64_t busy:1; -#else - uint64_t busy:1; - uint64_t reserved_1_7:7; - uint64_t rxnum:5; - uint64_t reserved_13_63:51; -#endif - } s; - struct cvmx_mpi_sts_s cn30xx; - struct cvmx_mpi_sts_s cn31xx; - struct cvmx_mpi_sts_s cn50xx; - struct cvmx_mpi_sts_s cn61xx; - struct cvmx_mpi_sts_s cn66xx; - struct cvmx_mpi_sts_s cnf71xx; -}; - -union cvmx_mpi_tx { - uint64_t u64; - struct cvmx_mpi_tx_s { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_22_63:42; - uint64_t csid:2; - uint64_t reserved_17_19:3; - uint64_t leavecs:1; - uint64_t reserved_13_15:3; - uint64_t txnum:5; - uint64_t reserved_5_7:3; - uint64_t totnum:5; -#else - uint64_t totnum:5; - uint64_t reserved_5_7:3; - uint64_t txnum:5; - uint64_t reserved_13_15:3; - uint64_t leavecs:1; - uint64_t reserved_17_19:3; - uint64_t csid:2; - uint64_t reserved_22_63:42; -#endif - } s; - struct cvmx_mpi_tx_cn30xx { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_17_63:47; - uint64_t leavecs:1; - uint64_t reserved_13_15:3; - uint64_t txnum:5; - uint64_t reserved_5_7:3; - uint64_t totnum:5; -#else - uint64_t totnum:5; - uint64_t reserved_5_7:3; - uint64_t txnum:5; - uint64_t reserved_13_15:3; - uint64_t leavecs:1; - uint64_t reserved_17_63:47; -#endif - } cn30xx; - struct cvmx_mpi_tx_cn30xx cn31xx; - struct cvmx_mpi_tx_cn30xx cn50xx; - struct cvmx_mpi_tx_cn61xx { -#ifdef __BIG_ENDIAN_BITFIELD - uint64_t reserved_21_63:43; - uint64_t csid:1; - uint64_t reserved_17_19:3; - uint64_t leavecs:1; - uint64_t reserved_13_15:3; - uint64_t txnum:5; - uint64_t reserved_5_7:3; - uint64_t totnum:5; -#else - uint64_t totnum:5; - uint64_t reserved_5_7:3; - uint64_t txnum:5; - uint64_t reserved_13_15:3; - uint64_t leavecs:1; - uint64_t reserved_17_19:3; - uint64_t csid:1; - uint64_t reserved_21_63:43; -#endif - } cn61xx; - struct cvmx_mpi_tx_s cn66xx; - struct cvmx_mpi_tx_cn61xx cnf71xx; -}; - -#endif diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 4b88fa031891..9560ad731120 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -153,7 +153,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/mips/oprofile/op_model_loongson3.c b/arch/mips/oprofile/op_model_loongson3.c index 8bcf7fc40f0d..85f3ee4ab456 100644 --- a/arch/mips/oprofile/op_model_loongson3.c +++ b/arch/mips/oprofile/op_model_loongson3.c @@ -168,33 +168,26 @@ static int loongson3_perfcount_handler(void) return handled; } -static int loongson3_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int loongson3_starting_cpu(unsigned int cpu) { - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - write_c0_perflo1(reg.control1); - write_c0_perflo2(reg.control2); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - write_c0_perflo1(0xc0000000); - write_c0_perflo2(0x40000000); - break; - } - - return NOTIFY_OK; + write_c0_perflo1(reg.control1); + write_c0_perflo2(reg.control2); + return 0; } -static struct notifier_block loongson3_notifier_block = { - .notifier_call = loongson3_cpu_callback -}; +static int loongson3_dying_cpu(unsigned int cpu) +{ + write_c0_perflo1(0xc0000000); + write_c0_perflo2(0x40000000); + return 0; +} static int __init loongson3_init(void) { on_each_cpu(reset_counters, NULL, 1); - register_hotcpu_notifier(&loongson3_notifier_block); + cpuhp_setup_state_nocalls(CPUHP_AP_MIPS_OP_LOONGSON3_STARTING, + "AP_MIPS_OP_LOONGSON3_STARTING", + loongson3_starting_cpu, loongson3_dying_cpu); save_perf_irq = perf_irq; perf_irq = loongson3_perfcount_handler; @@ -204,7 +197,7 @@ static int __init loongson3_init(void) static void loongson3_exit(void) { on_each_cpu(reset_counters, NULL, 1); - unregister_hotcpu_notifier(&loongson3_notifier_block); + cpuhp_remove_state_nocalls(CPUHP_AP_MIPS_OP_LOONGSON3_STARTING); perf_irq = save_perf_irq; } diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 108f8a8d1640..ada92db92f87 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -727,7 +727,7 @@ void __init txx9_iocled_init(unsigned long baseaddr, int i; static char *default_triggers[] __initdata = { "heartbeat", - "ide-disk", + "disk-activity", "nand-disk", NULL, }; diff --git a/arch/mips/txx9/rbtx4939/setup.c b/arch/mips/txx9/rbtx4939/setup.c index 37030409745c..8b937300fb7f 100644 --- a/arch/mips/txx9/rbtx4939/setup.c +++ b/arch/mips/txx9/rbtx4939/setup.c @@ -215,7 +215,7 @@ static int __init rbtx4939_led_probe(struct platform_device *pdev) int i; static char *default_triggers[] __initdata = { "heartbeat", - "ide-disk", + "disk-activity", "nand-disk", }; diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 4a1d181ed32f..f23781d6bbb3 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -254,7 +254,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index b51878b0c6b8..affc4eb3f89e 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -131,7 +131,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 142cb057c41b..489e7f909286 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -10,7 +10,7 @@ config OPENRISC select IRQ_DOMAIN select HANDLE_DOMAIN_IRQ select HAVE_MEMBLOCK - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select HAVE_ARCH_TRACEHOOK select GENERIC_IRQ_CHIP select GENERIC_IRQ_PROBE diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 230ac20ae794..e94cd225e816 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -163,7 +163,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 5b04d703a924..8688ba7f5966 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -214,7 +214,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_DMADEVICES=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index e945c08892fa..7e0792658952 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -231,7 +231,7 @@ CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_ONESHOT=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m CONFIG_LEDS_TRIGGER_BACKLIGHT=m CONFIG_LEDS_TRIGGER_DEFAULT_ON=m diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 16dbe81c97c9..163af2c31d76 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -239,7 +239,7 @@ good_area: * fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 0a9d439bcda6..d111044f41a2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -98,7 +98,6 @@ config PPC select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select SYSCTL_EXCEPTION_TRACE - select ARCH_WANT_OPTIONAL_GPIOLIB select VIRT_TO_BUS if !PPC64 select HAVE_IDE select HAVE_IOREMAP_PROT diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index 43546844ea5a..ca5139ee5074 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -472,7 +472,7 @@ hdd { gpios = <&mcu_pio 1 0>; - linux,default-trigger = "ide-disk"; + linux,default-trigger = "disk-activity"; }; }; }; diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts index 2b4b6532d69c..e32613963ab0 100644 --- a/arch/powerpc/boot/dts/mpc8377_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts @@ -496,7 +496,7 @@ hdd { gpios = <&mcu_pio 1 0>; - linux,default-trigger = "ide-disk"; + linux,default-trigger = "disk-activity"; }; }; }; diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts index 74b6a535a413..71842fcd621f 100644 --- a/arch/powerpc/boot/dts/mpc8378_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts @@ -480,7 +480,7 @@ hdd { gpios = <&mcu_pio 1 0>; - linux,default-trigger = "ide-disk"; + linux,default-trigger = "disk-activity"; }; }; }; diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts index 3b5cbac85368..e442a29b2fe0 100644 --- a/arch/powerpc/boot/dts/mpc8379_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts @@ -446,7 +446,7 @@ hdd { gpios = <&mcu_pio 1 0>; - linux,default-trigger = "ide-disk"; + linux,default-trigger = "disk-activity"; }; }; }; diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index ea8705ffcd76..3f6c9a6c815c 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -158,7 +158,7 @@ CONFIG_ADB=y CONFIG_ADB_CUDA=y CONFIG_ADB_PMU=y CONFIG_ADB_PMU_LED=y -CONFIG_ADB_PMU_LED_IDE=y +CONFIG_ADB_PMU_LED_DISK=y CONFIG_PMAC_APM_EMU=m CONFIG_PMAC_MEDIABAY=y CONFIG_PMAC_BACKLIGHT=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 99ccbebabfd3..1dde0be2be30 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -442,7 +442,7 @@ CONFIG_ADB=y CONFIG_ADB_CUDA=y CONFIG_ADB_PMU=y CONFIG_ADB_PMU_LED=y -CONFIG_ADB_PMU_LED_IDE=y +CONFIG_ADB_PMU_LED_DISK=y CONFIG_PMAC_APM_EMU=y CONFIG_PMAC_MEDIABAY=y CONFIG_PMAC_BACKLIGHT=y diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index ee09e99097f0..9bd87f269d6d 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -71,10 +71,8 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *shift) { - if (!arch_irqs_disabled()) { - pr_info("%s called with irq enabled\n", __func__); - dump_stack(); - } + VM_WARN(!arch_irqs_disabled(), + "%s called with irq enabled\n", __func__); return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); } diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 856f9a7944cd..64174bf95611 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -444,7 +444,8 @@ static int nvram_pstore_write(enum pstore_type_id type, */ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, int *count, struct timespec *time, char **buf, - bool *compressed, struct pstore_info *psi) + bool *compressed, ssize_t *ecc_notice_size, + struct pstore_info *psi) { struct oops_log_info *oops_hdr; unsigned int err_type, id_no, size = 0; @@ -545,6 +546,7 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, return -ENOMEM; kfree(buff); + *ecc_notice_size = 0; if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) *compressed = true; else diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 6527882ce05e..bb0354222b11 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -75,7 +75,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, } ret = 0; - *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a67c6d781c52..a4db22f65021 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -429,7 +429,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { if (fault & VM_FAULT_SIGSEGV) goto bad_area; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 669a15e7fa76..6dc07ddbfd04 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -581,30 +581,22 @@ static void verify_cpu_node_mapping(int cpu, int node) } } -static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, - void *hcpu) +/* Must run before sched domains notifier. */ +static int ppc_numa_cpu_prepare(unsigned int cpu) { - unsigned long lcpu = (unsigned long)hcpu; - int ret = NOTIFY_DONE, nid; + int nid; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - nid = numa_setup_cpu(lcpu); - verify_cpu_node_mapping((int)lcpu, nid); - ret = NOTIFY_OK; - break; + nid = numa_setup_cpu(cpu); + verify_cpu_node_mapping(cpu, nid); + return 0; +} + +static int ppc_numa_cpu_dead(unsigned int cpu) +{ #ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - unmap_cpu_from_node(lcpu); - ret = NOTIFY_OK; - break; + unmap_cpu_from_node(cpu); #endif - } - return ret; + return 0; } /* @@ -913,11 +905,6 @@ static void __init dump_numa_memory_topology(void) } } -static struct notifier_block ppc64_numa_nb = { - .notifier_call = cpu_numa_callback, - .priority = 1 /* Must run before sched domains notifier. */ -}; - /* Initialize NODE_DATA for a node on the local memory */ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { @@ -985,15 +972,18 @@ void __init initmem_init(void) setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); - register_cpu_notifier(&ppc64_numa_nb); + /* * We need the numa_cpu_lookup_table to be accurate for all CPUs, * even before we online them, so that we can use cpu_to_{node,mem} * early in boot, cf. smp_prepare_cpus(). + * _nocalls() + manual invocation is used because cpuhp is not yet + * initialized for the boot CPU. */ - for_each_present_cpu(cpu) { - numa_setup_cpu((unsigned long)cpu); - } + cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "POWER_NUMA_PREPARE", + ppc_numa_cpu_prepare, ppc_numa_cpu_dead); + for_each_present_cpu(cpu) + numa_setup_cpu(cpu); } static int __init early_numa(char *p) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 97a1d40d8696..ffd61d55fb25 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2158,31 +2158,15 @@ static void perf_event_interrupt(struct pt_regs *regs) irq_exit(); } -static void power_pmu_setup(int cpu) +int power_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); - if (!ppmu) - return; - memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; -} - -static int -power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - power_pmu_setup(cpu); - break; - - default: - break; + if (ppmu) { + memset(cpuhw, 0, sizeof(*cpuhw)); + cpuhw->mmcr[0] = MMCR0_FC; } - - return NOTIFY_OK; + return 0; } int register_power_pmu(struct power_pmu *pmu) @@ -2205,7 +2189,7 @@ int register_power_pmu(struct power_pmu *pmu) #endif /* CONFIG_PPC64 */ perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); - perf_cpu_notifier(power_pmu_notifier); - + cpuhp_setup_state(CPUHP_PERF_POWER, "PERF_POWER", + power_pmu_prepare_cpu, NULL); return 0; } diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index 6e287f1294fa..e3257f24a8a1 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -137,7 +137,7 @@ config STB03xxx config PPC4xx_GPIO bool "PPC4xx GPIO support" depends on 40x - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB help Enable gpiolib support for ppc40x based boards diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 5538e57c36c1..48fc18041ff6 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -273,7 +273,7 @@ config PPC44x_SIMPLE config PPC4xx_GPIO bool "PPC4xx GPIO support" depends on 44x - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB help Enable gpiolib support for ppc440 based boards diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig index f09016f6b3a6..bf7ae5cbd07a 100644 --- a/arch/powerpc/platforms/512x/Kconfig +++ b/arch/powerpc/platforms/512x/Kconfig @@ -6,7 +6,6 @@ config PPC_MPC512x select IPIC select PPC_PCI_CHOICE select FSL_PCI if PCI - select ARCH_WANT_OPTIONAL_GPIOLIB select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig index 2bdc8c862c46..4ef7f1cd05b7 100644 --- a/arch/powerpc/platforms/83xx/Kconfig +++ b/arch/powerpc/platforms/83xx/Kconfig @@ -116,7 +116,6 @@ endif # used for usb & gpio config PPC_MPC831x bool - select ARCH_WANT_OPTIONAL_GPIOLIB # used for math-emu config PPC_MPC832x @@ -125,9 +124,7 @@ config PPC_MPC832x # used for usb & gpio config PPC_MPC834x bool - select ARCH_WANT_OPTIONAL_GPIOLIB # used for usb & gpio config PPC_MPC837x bool - select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index e626461a63bd..df25a3ed489d 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -225,7 +225,7 @@ config GE_IMP3A select DEFAULT_UIMAGE select SWIOTLB select MMIO_NVRAM - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select GE_FPGA help This option enables support for the GE Intelligent Platforms IMP3A @@ -272,7 +272,7 @@ config CORENET_GENERIC select PPC_E500MC select PHYS_64BIT select SWIOTLB - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select GPIO_MPC8XXX select HAS_RAPIDIO select PPC_EPAPR_HV_PIC diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig index 1afd1e4a2dd2..3988f16e46c1 100644 --- a/arch/powerpc/platforms/86xx/Kconfig +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -4,7 +4,6 @@ menuconfig PPC_86xx depends on 6xx select FSL_SOC select ALTIVEC - select ARCH_WANT_OPTIONAL_GPIOLIB help The Freescale E600 SoCs have 74xx cores. @@ -37,7 +36,7 @@ config GEF_PPC9A bool "GE PPC9A" select DEFAULT_UIMAGE select MMIO_NVRAM - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select GE_FPGA help This option enables support for the GE PPC9A. @@ -46,7 +45,7 @@ config GEF_SBC310 bool "GE SBC310" select DEFAULT_UIMAGE select MMIO_NVRAM - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select GE_FPGA help This option enables support for the GE SBC310. @@ -55,7 +54,7 @@ config GEF_SBC610 bool "GE SBC610" select DEFAULT_UIMAGE select MMIO_NVRAM - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select GE_FPGA select HAS_RAPIDIO help diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 157250426b56..564d99bb2a26 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -109,7 +109,7 @@ config 8xx_COPYBACK config 8xx_GPIO bool "GPIO API Support" - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB help Saying Y here will cause the ports on an MPC8xx processor to be used with the GPIO API. If you say N here, the kernel needs less memory. diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 46a3533d3acb..3663f71fd913 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -275,7 +275,7 @@ config TAU_AVERAGE config QE_GPIO bool "QE GPIO support" depends on QUICC_ENGINE - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB help Say Y here if you're going to use hardware that connects to the QE GPIOs. @@ -285,7 +285,7 @@ config CPM2 depends on (FSL_SOC_BOOKE && PPC32) || 8260 select CPM select PPC_PCI_CHOICE - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB help The CPM2 (Communications Processor Module) is a coprocessor on embedded CPUs made by Freescale. Selecting this option means that @@ -324,7 +324,7 @@ config OF_RTC config SIMPLE_GPIO bool "Support for simple, memory-mapped GPIO controllers" depends on PPC - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB help Say Y here to support simple, memory-mapped GPIO controllers. These are usually BCSRs used to control board's switches, LEDs, @@ -334,7 +334,7 @@ config SIMPLE_GPIO config MCU_MPC8349EMITX bool "MPC8349E-mITX MCU driver" depends on I2C=y && PPC_83xx - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB help Say Y here to enable soft power-off functionality on the Freescale boards with the MPC8349E-mITX-compatible MCU chips. This driver will diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index f9af6461521a..9144204442eb 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, - void __pmem **kaddr, pfn_t *pfn, long size) + void **kaddr, pfn_t *pfn, long size) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; - *kaddr = (void __pmem __force *) bank->io_addr + offset; + *kaddr = (void *) bank->io_addr + offset; *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); return bank->size - offset; } diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index edcf2a706942..598df5708501 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -102,7 +102,7 @@ static void appldata_get_mem_data(void *data) mem_data->totalhigh = P2K(val.totalhigh); mem_data->freehigh = P2K(val.freehigh); mem_data->bufferram = P2K(val.bufferram); - mem_data->cached = P2K(global_page_state(NR_FILE_PAGES) + mem_data->cached = P2K(global_node_page_state(NR_FILE_PAGES) - val.bufferram); si_swapinfo(&val); diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 7a92e69c50bc..15711de10403 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -87,10 +87,10 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb, * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. */ -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { free_page_and_swap_cache(page); - return 1; /* avoid calling tlb_flush_mmu */ + return false; /* avoid calling tlb_flush_mmu */ } static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) @@ -98,6 +98,24 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) free_page_and_swap_cache(page); } +static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return __tlb_remove_page(tlb, page); +} + +static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, + struct page *page) +{ + return __tlb_remove_page(tlb, page); +} + +static inline void tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return tlb_remove_page(tlb, page); +} + /* * pte_free_tlb frees a pte table and clears the CRSTE for the * page table from the tlb. diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 7ec63b1d920d..037c2a253ae4 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -664,30 +664,22 @@ static struct pmu cpumf_pmu = { .cancel_txn = cpumf_pmu_cancel_txn, }; -static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action, - void *hcpu) +static int cpumf_pmf_setup(unsigned int cpu, int flags) { - int flags; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - flags = PMC_INIT; - local_irq_disable(); - setup_pmc_cpu(&flags); - local_irq_enable(); - break; - case CPU_DOWN_PREPARE: - flags = PMC_RELEASE; - local_irq_disable(); - setup_pmc_cpu(&flags); - local_irq_enable(); - break; - default: - break; - } + local_irq_disable(); + setup_pmc_cpu(&flags); + local_irq_enable(); + return 0; +} + +static int s390_pmu_online_cpu(unsigned int cpu) +{ + return cpumf_pmf_setup(cpu, PMC_INIT); +} - return NOTIFY_OK; +static int s390_pmu_offline_cpu(unsigned int cpu) +{ + return cpumf_pmf_setup(cpu, PMC_RELEASE); } static int __init cpumf_pmu_init(void) @@ -707,7 +699,7 @@ static int __init cpumf_pmu_init(void) if (rc) { pr_err("Registering for CPU-measurement alerts " "failed with rc=%i\n", rc); - goto out; + return rc; } cpumf_pmu.attr_groups = cpumf_cf_event_group(); @@ -716,10 +708,10 @@ static int __init cpumf_pmu_init(void) pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); - goto out; + return rc; } - perf_cpu_notifier(cpumf_pmu_notifier); -out: - return rc; + return cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, + "AP_PERF_S390_CF_ONLINE", + s390_pmu_online_cpu, s390_pmu_offline_cpu); } early_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 9ea26dface38..fcc634c1479a 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -974,12 +974,15 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) struct pt_regs regs; struct perf_sf_sde_regs *sde_regs; struct perf_sample_data data; - struct perf_raw_record raw; + struct perf_raw_record raw = { + .frag = { + .size = sfr->size, + .data = sfr, + }, + }; /* Setup perf sample */ perf_sample_data_init(&data, 0, event->hw.last_period); - raw.size = sfr->size; - raw.data = sfr; data.raw = &raw; /* Setup pt_regs to look like an CPU-measurement external interrupt @@ -1501,37 +1504,28 @@ static void cpumf_measurement_alert(struct ext_code ext_code, sf_disable(); } } - -static int cpumf_pmu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) +static int cpusf_pmu_setup(unsigned int cpu, int flags) { - int flags; - /* Ignore the notification if no events are scheduled on the PMU. * This might be racy... */ if (!atomic_read(&num_events)) - return NOTIFY_OK; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - flags = PMC_INIT; - local_irq_disable(); - setup_pmc_cpu(&flags); - local_irq_enable(); - break; - case CPU_DOWN_PREPARE: - flags = PMC_RELEASE; - local_irq_disable(); - setup_pmc_cpu(&flags); - local_irq_enable(); - break; - default: - break; - } + return 0; - return NOTIFY_OK; + local_irq_disable(); + setup_pmc_cpu(&flags); + local_irq_enable(); + return 0; +} + +static int s390_pmu_sf_online_cpu(unsigned int cpu) +{ + return cpusf_pmu_setup(cpu, PMC_INIT); +} + +static int s390_pmu_sf_offline_cpu(unsigned int cpu) +{ + return cpusf_pmu_setup(cpu, PMC_RELEASE); } static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) @@ -1631,7 +1625,9 @@ static int __init init_cpum_sampling_pmu(void) cpumf_measurement_alert); goto out; } - perf_cpu_notifier(cpumf_pmu_notifier); + + cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "AP_PERF_S390_SF_ONLINE", + s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu); out: return err; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6ad7eff84c82..25783dc3c813 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -456,7 +456,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); /* No reason to continue if interrupted by SIGKILL. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { fault = VM_FAULT_SIGNAL; diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 37a6c2e0e969..995b71e4db4b 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -111,7 +111,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index e803a836cb7c..0d5f3a9bb315 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -264,7 +264,6 @@ config CPU_SUBTYPE_SH7203 select CPU_HAS_FPU select SYS_SUPPORTS_SH_CMT select SYS_SUPPORTS_SH_MTU2 - select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL config CPU_SUBTYPE_SH7206 @@ -353,7 +352,6 @@ config CPU_SUBTYPE_SH7720 select CPU_SH3 select CPU_HAS_DSP select SYS_SUPPORTS_SH_CMT - select ARCH_WANT_OPTIONAL_GPIOLIB select USB_OHCI_SH if USB_OHCI_HCD select PINCTRL help @@ -419,7 +417,6 @@ config CPU_SUBTYPE_SH7723 select ARCH_SHMOBILE select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_SH_CMT - select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL help Select SH7723 if you have an SH-MobileR2 CPU. @@ -431,7 +428,6 @@ config CPU_SUBTYPE_SH7724 select ARCH_SHMOBILE select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_SH_CMT - select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL help Select SH7724 if you have an SH-MobileR2R CPU. @@ -440,7 +436,6 @@ config CPU_SUBTYPE_SH7734 bool "Support SH7734 processor" select CPU_SH4A select CPU_SHX2 - select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL help Select SH7734 if you have a SH4A SH7734 CPU. @@ -449,7 +444,6 @@ config CPU_SUBTYPE_SH7757 bool "Support SH7757 processor" select CPU_SH4A select CPU_SHX2 - select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL help Select SH7757 if you have a SH4A SH7757 CPU. @@ -475,7 +469,6 @@ config CPU_SUBTYPE_SH7785 select CPU_SHX2 select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_NUMA - select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL config CPU_SUBTYPE_SH7786 @@ -484,7 +477,6 @@ config CPU_SUBTYPE_SH7786 select CPU_SHX3 select CPU_HAS_PTEAEX select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_WANT_OPTIONAL_GPIOLIB select USB_OHCI_SH if USB_OHCI_HCD select USB_EHCI_SH if USB_EHCI_HCD select PINCTRL @@ -494,7 +486,7 @@ config CPU_SUBTYPE_SHX3 select CPU_SH4A select CPU_SHX3 select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select PINCTRL # SH4AL-DSP Processor Support @@ -513,7 +505,6 @@ config CPU_SUBTYPE_SH7722 select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_SH_CMT - select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL config CPU_SUBTYPE_SH7366 diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index 5e52d5362292..e0db04664e2e 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -70,7 +70,7 @@ config SH_7724_SOLUTION_ENGINE bool "SolutionEngine7724" select SOLUTION_ENGINE depends on CPU_SUBTYPE_SH7724 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select SND_SOC_AK4642 if SND_SIMPLE_CARD select REGULATOR_FIXED_VOLTAGE if REGULATOR help @@ -174,7 +174,6 @@ config SH_SDK7786 depends on CPU_SUBTYPE_SH7786 select SYS_SUPPORTS_PCI select NO_IOPORT_MAP if !PCI - select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_SRAM_POOL select REGULATOR_FIXED_VOLTAGE if REGULATOR help @@ -190,7 +189,7 @@ config SH_HIGHLANDER config SH_SH7757LCR bool "SH7757LCR" depends on CPU_SUBTYPE_SH7757 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select REGULATOR_FIXED_VOLTAGE if REGULATOR config SH_SH7785LCR @@ -217,14 +216,14 @@ config SH_SH7785LCR_PT config SH_URQUELL bool "Urquell" depends on CPU_SUBTYPE_SH7786 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select SYS_SUPPORTS_PCI select NO_IOPORT_MAP if !PCI config SH_MIGOR bool "Migo-R" depends on CPU_SUBTYPE_SH7722 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select REGULATOR_FIXED_VOLTAGE if REGULATOR help Select Migo-R if configuring for the SH7722 Migo-R platform @@ -233,7 +232,7 @@ config SH_MIGOR config SH_AP325RXA bool "AP-325RXA" depends on CPU_SUBTYPE_SH7723 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select REGULATOR_FIXED_VOLTAGE if REGULATOR help Renesas "AP-325RXA" support. @@ -242,7 +241,7 @@ config SH_AP325RXA config SH_KFR2R09 bool "KFR2R09" depends on CPU_SUBTYPE_SH7724 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select REGULATOR_FIXED_VOLTAGE if REGULATOR help "Kit For R2R for 2009" support. @@ -250,7 +249,7 @@ config SH_KFR2R09 config SH_ECOVEC bool "EcoVec" depends on CPU_SUBTYPE_SH7724 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select SND_SOC_DA7210 if SND_SIMPLE_CARD select REGULATOR_FIXED_VOLTAGE if REGULATOR help @@ -327,7 +326,7 @@ config SH_X3PROTO config SH_MAGIC_PANEL_R2 bool "Magic Panel R2" depends on CPU_SUBTYPE_SH7720 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select REGULATOR_FIXED_VOLTAGE if REGULATOR help Select Magic Panel R2 if configuring for Magic Panel R2. diff --git a/arch/sh/boards/mach-highlander/Kconfig b/arch/sh/boards/mach-highlander/Kconfig index def49cc0a7b9..42f5589b4bf3 100644 --- a/arch/sh/boards/mach-highlander/Kconfig +++ b/arch/sh/boards/mach-highlander/Kconfig @@ -18,7 +18,7 @@ config SH_R7780MP config SH_R7785RP bool "R7785RP board support" depends on CPU_SUBTYPE_SH7785 - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB endchoice diff --git a/arch/sh/boards/mach-rsk/Kconfig b/arch/sh/boards/mach-rsk/Kconfig index 458a11ffd022..0b9b2c4952c1 100644 --- a/arch/sh/boards/mach-rsk/Kconfig +++ b/arch/sh/boards/mach-rsk/Kconfig @@ -10,17 +10,17 @@ config SH_RSK7201 config SH_RSK7203 bool "RSK7203" - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB depends on CPU_SUBTYPE_SH7203 config SH_RSK7264 bool "RSK2+SH7264" - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB depends on CPU_SUBTYPE_SH7264 config SH_RSK7269 bool "RSK2+SH7269" - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB depends on CPU_SUBTYPE_SH7269 endchoice diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 62f80d2a9df9..025cdb1032f6 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -101,7 +101,7 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { free_page_and_swap_cache(page); - return 1; /* avoid calling tlb_flush_mmu */ + return false; /* avoid calling tlb_flush_mmu */ } static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) @@ -109,6 +109,24 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) __tlb_remove_page(tlb, page); } +static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return __tlb_remove_page(tlb, page); +} + +static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, + struct page *page) +{ + return __tlb_remove_page(tlb, page); +} + +static inline void tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return tlb_remove_page(tlb, page); +} + #define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep) #define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp) #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 4dca18347ee9..ba3269a8304b 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -352,28 +352,12 @@ static struct pmu pmu = { .read = sh_pmu_read, }; -static void sh_pmu_setup(int cpu) +static int sh_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); memset(cpuhw, 0, sizeof(struct cpu_hw_events)); -} - -static int -sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - sh_pmu_setup(cpu); - break; - - default: - break; - } - - return NOTIFY_OK; + return 0; } int register_sh_pmu(struct sh_pmu *_pmu) @@ -394,6 +378,7 @@ int register_sh_pmu(struct sh_pmu *_pmu) WARN_ON(_pmu->num_events > MAX_HWEVENTS); perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); - perf_cpu_notifier(sh_pmu_notifier); + cpuhp_setup_state(CPUHP_PERF_SUPERH, "PERF_SUPERH", sh_pmu_prepare_cpu, + NULL); return 0; } diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 79d8276377d1..9bf876780cef 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -487,7 +487,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR))) if (mm_fault_error(regs, error_code, address, fault)) diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 139e711ff80c..dcbf985ab243 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -31,14 +31,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, - unsigned long ceiling) -{ - free_pgd_range(tlb, addr, end, floor, ceiling); -} - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { @@ -82,4 +74,8 @@ static inline void arch_clear_hugepage_flags(struct page *page) { } +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling); + #endif /* _ASM_SPARC64_HUGETLB_H */ diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index 70067ce184b1..f7de0dbc38af 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -92,7 +92,8 @@ struct tsb_config { typedef struct { spinlock_t lock; unsigned long sparc64_ctx_val; - unsigned long huge_pte_count; + unsigned long hugetlb_pte_count; + unsigned long thp_pte_count; struct tsb_config tsb_block[MM_NUM_TSBS]; struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; } mm_context_t; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index e7d82803a48f..1fb317fbc0b3 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -395,7 +395,7 @@ static inline unsigned long __pte_huge_mask(void) static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | __pte_huge_mask()); + return __pte(pte_val(pte) | _PAGE_PMD_HUGE | __pte_huge_mask()); } static inline bool is_hugetlb_pte(pte_t pte) @@ -403,6 +403,11 @@ static inline bool is_hugetlb_pte(pte_t pte) return !!(pte_val(pte) & __pte_huge_mask()); } +static inline bool is_hugetlb_pmd(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PMD_HUGE); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline pmd_t pmd_mkhuge(pmd_t pmd) { diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index c6a155c3904e..32258e08da03 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -203,7 +203,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * We have to propagate the 4MB bit of the virtual address * because we are fabricating 8MB pages using 4MB hw pages. */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ brz,pn REG1, FAIL_LABEL; \ sethi %uhi(_PAGE_PMD_HUGE), REG2; \ diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S index d668ca149e64..4087a62f96b0 100644 --- a/arch/sparc/kernel/dtlb_prot.S +++ b/arch/sparc/kernel/dtlb_prot.S @@ -25,13 +25,13 @@ /* PROT ** ICACHE line 2: More real fault processing */ ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 + srlx %g5, PAGE_SHIFT, %g5 + sllx %g5, PAGE_SHIFT, %g5 ! Clear context ID bits bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault nop nop - nop - nop /* PROT ** ICACHE line 3: Unused... */ nop diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c index a979e99f8751..cac4a5554c0e 100644 --- a/arch/sparc/kernel/irq_32.c +++ b/arch/sparc/kernel/irq_32.c @@ -165,7 +165,7 @@ void irq_link(unsigned int irq) p = &irq_table[irq]; pil = p->pil; - BUG_ON(pil > SUN4D_MAX_IRQ); + BUG_ON(pil >= SUN4D_MAX_IRQ); p->next = irq_map[pil]; irq_map[pil] = p; @@ -182,7 +182,7 @@ void irq_unlink(unsigned int irq) spin_lock_irqsave(&irq_map_lock, flags); p = &irq_table[irq]; - BUG_ON(p->pil > SUN4D_MAX_IRQ); + BUG_ON(p->pil >= SUN4D_MAX_IRQ); pnext = &irq_map[p->pil]; while (*pnext != p) pnext = &(*pnext)->next; diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index ef0d8e9e1210..f22bec0db645 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -20,6 +20,10 @@ kvmap_itlb: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_IMMU, %g4 + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g4 here. + */ + /* sun4v_itlb_miss branches here with the missing virtual * address already loaded into %g4 */ @@ -128,6 +132,10 @@ kvmap_dtlb: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_DMMU, %g4 + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g4 here. + */ + /* sun4v_dtlb_miss branches here with the missing virtual * address already loaded into %g4 */ @@ -251,6 +259,10 @@ kvmap_dtlb_longpath: nop .previous + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g5 here. + */ + be,pt %xcc, sparc64_realfault_common mov FAULT_CODE_DTLB, %g4 ba,pt %xcc, winfix_trampoline diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index be98685c14c6..d568c8207af7 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -29,13 +29,17 @@ */ tsb_miss_dtlb: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g4 + srlx %g4, PAGE_SHIFT, %g4 ba,pt %xcc, tsb_miss_page_table_walk - ldxa [%g4] ASI_DMMU, %g4 + sllx %g4, PAGE_SHIFT, %g4 tsb_miss_itlb: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_IMMU, %g4 + srlx %g4, PAGE_SHIFT, %g4 ba,pt %xcc, tsb_miss_page_table_walk - ldxa [%g4] ASI_IMMU, %g4 + sllx %g4, PAGE_SHIFT, %g4 /* At this point we have: * %g1 -- PAGE_SIZE TSB entry address @@ -284,6 +288,10 @@ tsb_do_dtlb_fault: nop .previous + /* Clear context ID bits. */ + srlx %g5, PAGE_SHIFT, %g5 + sllx %g5, PAGE_SHIFT, %g5 + be,pt %xcc, sparc64_realfault_common mov FAULT_CODE_DTLB, %g4 ba,pt %xcc, winfix_trampoline diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index b6c559cbd64d..4714061d6cd3 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -241,7 +241,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; @@ -411,7 +411,7 @@ good_area: if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - switch (handle_mm_fault(mm, vma, address, flags)) { + switch (handle_mm_fault(vma, address, flags)) { case VM_FAULT_SIGBUS: case VM_FAULT_OOM: goto do_sigbus; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index cb841a33da59..e16fdd28a931 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -111,8 +111,8 @@ static unsigned int get_user_insn(unsigned long tpc) if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp))) goto out_irq_enable; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(*pmdp)) { +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (is_hugetlb_pmd(*pmdp)) { pa = pmd_pfn(*pmdp) << PAGE_SHIFT; pa += tpc & ~HPAGE_MASK; @@ -436,7 +436,7 @@ good_area: goto bad_area; } - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto exit_exception; @@ -476,14 +476,14 @@ good_area: up_read(&mm->mmap_sem); mm_rss = get_mm_rss(mm); -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) + mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE)); #endif if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - mm_rss = mm->context.huge_pte_count; + mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index ba52e6466a82..988acc8b1b80 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -12,6 +12,7 @@ #include <asm/mman.h> #include <asm/pgalloc.h> +#include <asm/pgtable.h> #include <asm/tlb.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> @@ -131,23 +132,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, { pgd_t *pgd; pud_t *pud; - pmd_t *pmd; pte_t *pte = NULL; - /* We must align the address, because our caller will run - * set_huge_pte_at() on whatever we return, which writes out - * all of the sub-ptes for the hugepage range. So we have - * to give it the first such sub-pte. - */ - addr &= HPAGE_MASK; - pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - if (pud) { - pmd = pmd_alloc(mm, pud, addr); - if (pmd) - pte = pte_alloc_map(mm, pmd, addr); - } + if (pud) + pte = (pte_t *)pmd_alloc(mm, pud, addr); + return pte; } @@ -155,19 +146,13 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd; pte_t *pte = NULL; - addr &= HPAGE_MASK; - pgd = pgd_offset(mm, addr); if (!pgd_none(*pgd)) { pud = pud_offset(pgd, addr); - if (!pud_none(*pud)) { - pmd = pmd_offset(pud, addr); - if (!pmd_none(*pmd)) - pte = pte_offset_map(pmd, addr); - } + if (!pud_none(*pud)) + pte = (pte_t *)pmd_offset(pud, addr); } return pte; } @@ -175,70 +160,143 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { - int i; - pte_t orig[2]; - unsigned long nptes; + pte_t orig; if (!pte_present(*ptep) && pte_present(entry)) - mm->context.huge_pte_count++; + mm->context.hugetlb_pte_count++; addr &= HPAGE_MASK; - - nptes = 1 << HUGETLB_PAGE_ORDER; - orig[0] = *ptep; - orig[1] = *(ptep + nptes / 2); - for (i = 0; i < nptes; i++) { - *ptep = entry; - ptep++; - addr += PAGE_SIZE; - pte_val(entry) += PAGE_SIZE; - } + orig = *ptep; + *ptep = entry; /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ - addr -= REAL_HPAGE_SIZE; - ptep -= nptes / 2; - maybe_tlb_batch_add(mm, addr, ptep, orig[1], 0); - addr -= REAL_HPAGE_SIZE; - ptep -= nptes / 2; - maybe_tlb_batch_add(mm, addr, ptep, orig[0], 0); + maybe_tlb_batch_add(mm, addr, ptep, orig, 0); + maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t entry; - int i; - unsigned long nptes; entry = *ptep; if (pte_present(entry)) - mm->context.huge_pte_count--; + mm->context.hugetlb_pte_count--; addr &= HPAGE_MASK; - nptes = 1 << HUGETLB_PAGE_ORDER; - for (i = 0; i < nptes; i++) { - *ptep = __pte(0UL); - addr += PAGE_SIZE; - ptep++; - } + *ptep = __pte(0UL); /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ - addr -= REAL_HPAGE_SIZE; - ptep -= nptes / 2; - maybe_tlb_batch_add(mm, addr, ptep, entry, 0); - addr -= REAL_HPAGE_SIZE; - ptep -= nptes / 2; maybe_tlb_batch_add(mm, addr, ptep, entry, 0); + maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0); return entry; } int pmd_huge(pmd_t pmd) { - return 0; + return !pmd_none(pmd) && + (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID; } int pud_huge(pud_t pud) { return 0; } + +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long addr) +{ + pgtable_t token = pmd_pgtable(*pmd); + + pmd_clear(pmd); + pte_free_tlb(tlb, token, addr); + atomic_long_dec(&tlb->mm->nr_ptes); +} + +static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + pmd_t *pmd; + unsigned long next; + unsigned long start; + + start = addr; + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*pmd)) + continue; + if (is_hugetlb_pmd(*pmd)) + pmd_clear(pmd); + else + hugetlb_free_pte_range(tlb, pmd, addr); + } while (pmd++, addr = next, addr != end); + + start &= PUD_MASK; + if (start < floor) + return; + if (ceiling) { + ceiling &= PUD_MASK; + if (!ceiling) + return; + } + if (end - 1 > ceiling - 1) + return; + + pmd = pmd_offset(pud, start); + pud_clear(pud); + pmd_free_tlb(tlb, pmd, start); + mm_dec_nr_pmds(tlb->mm); +} + +static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + pud_t *pud; + unsigned long next; + unsigned long start; + + start = addr; + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, + ceiling); + } while (pud++, addr = next, addr != end); + + start &= PGDIR_MASK; + if (start < floor) + return; + if (ceiling) { + ceiling &= PGDIR_MASK; + if (!ceiling) + return; + } + if (end - 1 > ceiling - 1) + return; + + pud = pud_offset(pgd, start); + pgd_clear(pgd); + pud_free_tlb(tlb, pud, start); +} + +void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + pgd_t *pgd; + unsigned long next; + + pgd = pgd_offset(tlb->mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); + } while (pgd++, addr = next, addr != end); +} diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index aec508e37490..65457c9f1365 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -346,10 +346,13 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * spin_lock_irqsave(&mm->context.lock, flags); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) + if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && + is_hugetlb_pte(pte)) { + /* We are fabricating 8MB pages using 4MB real hw pages. */ + pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT)); __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, address, pte_val(pte)); - else + } else #endif __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, address, pte_val(pte)); diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index f81cd9736700..3659d37b4d81 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -175,9 +175,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.huge_pte_count++; + mm->context.thp_pte_count++; else - mm->context.huge_pte_count--; + mm->context.thp_pte_count--; /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index a0604a493a36..6725ed45580e 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -470,7 +470,7 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long huge_pte_count; + unsigned long total_huge_pte_count; #endif unsigned int i; @@ -479,12 +479,14 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.sparc64_ctx_val = 0UL; #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - /* We reset it to zero because the fork() page copying + /* We reset them to zero because the fork() page copying * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - huge_pte_count = mm->context.huge_pte_count; - mm->context.huge_pte_count = 0; + total_huge_pte_count = mm->context.hugetlb_pte_count + + mm->context.thp_pte_count; + mm->context.hugetlb_pte_count = 0; + mm->context.thp_pte_count = 0; #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -500,8 +502,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); + if (unlikely(total_huge_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h index c505d77e4d06..e9d54a06736f 100644 --- a/arch/tile/include/asm/elf.h +++ b/arch/tile/include/asm/elf.h @@ -129,6 +129,7 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *); struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack); +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ #define ARCH_DLINFO \ do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \ diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h index e98909033e5b..2a0347af0702 100644 --- a/arch/tile/include/asm/setup.h +++ b/arch/tile/include/asm/setup.h @@ -25,7 +25,12 @@ #define MAXMEM_PFN PFN_DOWN(MAXMEM) int tile_console_write(const char *buf, int count); + +#ifdef CONFIG_EARLY_PRINTK void early_panic(const char *fmt, ...); +#else +#define early_panic panic +#endif /* Init-time routine to do tile-specific per-cpu setup. */ void setup_cpu(int boot); diff --git a/arch/tile/include/uapi/asm/auxvec.h b/arch/tile/include/uapi/asm/auxvec.h index c93e92709f14..f497123ed980 100644 --- a/arch/tile/include/uapi/asm/auxvec.h +++ b/arch/tile/include/uapi/asm/auxvec.h @@ -18,4 +18,6 @@ /* The vDSO location. */ #define AT_SYSINFO_EHDR 33 +#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ + #endif /* _ASM_TILE_AUXVEC_H */ diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 49120843ff96..bdaf71d31a4a 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -23,42 +23,50 @@ #include <linux/uaccess.h> #include <linux/signal.h> #include <asm/syscalls.h> +#include <asm/byteorder.h> /* * Syscalls that take 64-bit numbers traditionally take them in 32-bit * "high" and "low" value parts on 32-bit architectures. * In principle, one could imagine passing some register arguments as * fully 64-bit on TILE-Gx in 32-bit mode, but it seems easier to - * adapt the usual convention. + * adopt the usual convention. */ +#ifdef __BIG_ENDIAN +#define SYSCALL_PAIR(name) u32, name ## _hi, u32, name ## _lo +#else +#define SYSCALL_PAIR(name) u32, name ## _lo, u32, name ## _hi +#endif + COMPAT_SYSCALL_DEFINE4(truncate64, char __user *, filename, u32, dummy, - u32, low, u32, high) + SYSCALL_PAIR(length)) { - return sys_truncate(filename, ((loff_t)high << 32) | low); + return sys_truncate(filename, ((loff_t)length_hi << 32) | length_lo); } COMPAT_SYSCALL_DEFINE4(ftruncate64, unsigned int, fd, u32, dummy, - u32, low, u32, high) + SYSCALL_PAIR(length)) { - return sys_ftruncate(fd, ((loff_t)high << 32) | low); + return sys_ftruncate(fd, ((loff_t)length_hi << 32) | length_lo); } COMPAT_SYSCALL_DEFINE6(pread64, unsigned int, fd, char __user *, ubuf, - size_t, count, u32, dummy, u32, low, u32, high) + size_t, count, u32, dummy, SYSCALL_PAIR(offset)) { - return sys_pread64(fd, ubuf, count, ((loff_t)high << 32) | low); + return sys_pread64(fd, ubuf, count, + ((loff_t)offset_hi << 32) | offset_lo); } COMPAT_SYSCALL_DEFINE6(pwrite64, unsigned int, fd, char __user *, ubuf, - size_t, count, u32, dummy, u32, low, u32, high) + size_t, count, u32, dummy, SYSCALL_PAIR(offset)) { - return sys_pwrite64(fd, ubuf, count, ((loff_t)high << 32) | low); + return sys_pwrite64(fd, ubuf, count, + ((loff_t)offset_hi << 32) | offset_lo); } COMPAT_SYSCALL_DEFINE6(sync_file_range2, int, fd, unsigned int, flags, - u32, offset_lo, u32, offset_hi, - u32, nbytes_lo, u32, nbytes_hi) + SYSCALL_PAIR(offset), SYSCALL_PAIR(nbytes)) { return sys_sync_file_range(fd, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)nbytes_hi << 32) | nbytes_lo, @@ -66,8 +74,7 @@ COMPAT_SYSCALL_DEFINE6(sync_file_range2, int, fd, unsigned int, flags, } COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, - u32, offset_lo, u32, offset_hi, - u32, len_lo, u32, len_hi) + SYSCALL_PAIR(offset), SYSCALL_PAIR(len)) { return sys_fallocate(fd, mode, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)len_hi << 32) | len_lo); @@ -77,6 +84,8 @@ COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, * Avoid bug in generic sys_llseek() that specifies offset_high and * offset_low as "unsigned long", thus making it possible to pass * a sign-extended high 32 bits in offset_low. + * Note that we do not use SYSCALL_PAIR here since glibc passes the + * high and low parts explicitly in that order. */ COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high, unsigned int, offset_low, loff_t __user *, result, diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index 38debe706061..c7418dcbbb08 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -33,6 +33,7 @@ #include <asm/pgtable.h> #include <asm/homecache.h> #include <asm/cachectl.h> +#include <asm/byteorder.h> #include <arch/chip.h> SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len, @@ -59,13 +60,19 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len, #if !defined(__tilegx__) || defined(CONFIG_COMPAT) -ssize_t sys32_readahead(int fd, u32 offset_lo, u32 offset_hi, u32 count) +#ifdef __BIG_ENDIAN +#define SYSCALL_PAIR(name) u32 name ## _hi, u32 name ## _lo +#else +#define SYSCALL_PAIR(name) u32 name ## _lo, u32 name ## _hi +#endif + +ssize_t sys32_readahead(int fd, SYSCALL_PAIR(offset), u32 count) { return sys_readahead(fd, ((loff_t)offset_hi << 32) | offset_lo, count); } -int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi, - u32 len_lo, u32 len_hi, int advice) +int sys32_fadvise64_64(int fd, SYSCALL_PAIR(offset), + SYSCALL_PAIR(len), int advice) { return sys_fadvise64_64(fd, ((loff_t)offset_hi << 32) | offset_lo, ((loff_t)len_hi << 32) | len_lo, advice); diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index 9d171ca4302c..c5369fe643c7 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c @@ -77,7 +77,11 @@ uint64_t __umoddi3(uint64_t dividend, uint64_t divisor); EXPORT_SYMBOL(__umoddi3); int64_t __moddi3(int64_t dividend, int64_t divisor); EXPORT_SYMBOL(__moddi3); -#ifndef __tilegx__ +#ifdef __tilegx__ +typedef int TItype __attribute__((mode(TI))); +TItype __multi3(TItype a, TItype b); +EXPORT_SYMBOL(__multi3); /* required for gcc 7 and later */ +#else int64_t __muldi3(int64_t, int64_t); EXPORT_SYMBOL(__muldi3); uint64_t __lshrdi3(uint64_t, unsigned int); diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 26734214818c..beba986589e5 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -434,7 +434,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return 0; diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index c4d5bf841a7f..7cc6ee7f1a58 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -45,20 +45,20 @@ void show_mem(unsigned int filter) struct zone *zone; pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n", - (global_page_state(NR_ACTIVE_ANON) + - global_page_state(NR_ACTIVE_FILE)), - (global_page_state(NR_INACTIVE_ANON) + - global_page_state(NR_INACTIVE_FILE)), - global_page_state(NR_FILE_DIRTY), - global_page_state(NR_WRITEBACK), - global_page_state(NR_UNSTABLE_NFS), + (global_node_page_state(NR_ACTIVE_ANON) + + global_node_page_state(NR_ACTIVE_FILE)), + (global_node_page_state(NR_INACTIVE_ANON) + + global_node_page_state(NR_INACTIVE_FILE)), + global_node_page_state(NR_FILE_DIRTY), + global_node_page_state(NR_WRITEBACK), + global_node_page_state(NR_UNSTABLE_NFS), global_page_state(NR_FREE_PAGES), (global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE)), - global_page_state(NR_FILE_MAPPED), + global_node_page_state(NR_FILE_MAPPED), global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE), - global_page_state(NR_FILE_PAGES), + global_node_page_state(NR_FILE_PAGES), get_nr_swap_pages()); for_each_zone(zone) { diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 16eb63fac57d..821ff0acfe17 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -102,7 +102,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { tlb->need_flush = 1; free_page_and_swap_cache(page); - return 1; /* avoid calling tlb_flush_mmu */ + return false; /* avoid calling tlb_flush_mmu */ } static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) @@ -110,6 +110,24 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) __tlb_remove_page(tlb, page); } +static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return __tlb_remove_page(tlb, page); +} + +static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, + struct page *page) +{ + return __tlb_remove_page(tlb, page); +} + +static inline void tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + return tlb_remove_page(tlb, page); +} + /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 98783dd0fa2e..ad8f206ab5e8 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -73,7 +73,7 @@ good_area: do { int fault; - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index e5602ee9c610..0769066929c6 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -80,7 +80,7 @@ config ARCH_PUV3 select CPU_UCV2 select GENERIC_CLOCKEVENTS select HAVE_CLK - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB # CONFIGs for ARCH_PUV3 diff --git a/arch/unicore32/configs/unicore32_defconfig b/arch/unicore32/configs/unicore32_defconfig index 45f47f88d86a..aebd01fc28e5 100644 --- a/arch/unicore32/configs/unicore32_defconfig +++ b/arch/unicore32/configs/unicore32_defconfig @@ -161,7 +161,7 @@ CONFIG_LEDS_GPIO=y # LED Triggers CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y # Real Time Clock diff --git a/arch/unicore32/kernel/gpio.c b/arch/unicore32/kernel/gpio.c index 49347a0e9288..bf164bb4dba2 100644 --- a/arch/unicore32/kernel/gpio.c +++ b/arch/unicore32/kernel/gpio.c @@ -27,7 +27,7 @@ static const struct gpio_led puv3_gpio_leds[] = { { .name = "cpuhealth", .gpio = GPO_CPU_HEALTH, .active_low = 0, .default_trigger = "heartbeat", }, { .name = "hdd_led", .gpio = GPO_HDD_LED, .active_low = 1, - .default_trigger = "ide-disk", }, + .default_trigger = "disk-activity", }, }; static const struct gpio_led_platform_data puv3_gpio_led_data = { diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index 2ec3d3adcefc..6c7f70bcaae3 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -194,7 +194,7 @@ good_area: * If for any reason at all we couldn't handle the fault, make * sure we exit gracefully rather than endlessly redo the fault. */ - fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + fault = handle_mm_fault(vma, addr & PAGE_MASK, flags); return fault; check_stack: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5977fea2c8b1..2fa55851d2a9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,6 +22,7 @@ config X86 select ANON_INODES select ARCH_CLOCKSOURCE_DATA select ARCH_DISCARD_MEMBLOCK + select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 6fce7f096b88..830ed391e7ef 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -126,14 +126,6 @@ else KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args) endif -# Make sure compiler does not have buggy stack-protector support. -ifdef CONFIG_CC_STACKPROTECTOR - cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh - ifneq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) - $(warning stack-protector enabled but compiler support broken) - endif -endif - ifdef CONFIG_X86_X32 x32_ld_ok := $(call try-run,\ /bin/echo -e '1: .quad 1b' | \ diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 3329844e3c43..f840766659a8 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -331,15 +331,9 @@ static void vgetcpu_cpu_init(void *arg) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); } -static int -vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg) +static int vgetcpu_online(unsigned int cpu) { - long cpu = (long)arg; - - if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) - smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1); - - return NOTIFY_DONE; + return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1); } static int __init init_vdso(void) @@ -350,15 +344,9 @@ static int __init init_vdso(void) init_vdso_image(&vdso_image_x32); #endif - cpu_notifier_register_begin(); - - on_each_cpu(vgetcpu_cpu_init, NULL, 1); /* notifier priority > KVM */ - __hotcpu_notifier(vgetcpu_cpu_notifier, 30); - - cpu_notifier_register_done(); - - return 0; + return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE, + "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL); } subsys_initcall(init_vdso); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index bd3e8421b57c..e07a22bb9308 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -370,13 +370,13 @@ static int amd_pmu_cpu_prepare(int cpu) WARN_ON_ONCE(cpuc->amd_nb); if (!x86_pmu.amd_nb_constraints) - return NOTIFY_OK; + return 0; cpuc->amd_nb = amd_alloc_nb(cpu); if (!cpuc->amd_nb) - return NOTIFY_BAD; + return -ENOMEM; - return NOTIFY_OK; + return 0; } static void amd_pmu_cpu_starting(int cpu) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index feb90f6730e8..155ea5324ae0 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -655,8 +655,12 @@ fail: } if (event->attr.sample_type & PERF_SAMPLE_RAW) { - raw.size = sizeof(u32) + ibs_data.size; - raw.data = ibs_data.data; + raw = (struct perf_raw_record){ + .frag = { + .size = sizeof(u32) + ibs_data.size, + .data = ibs_data.data, + }, + }; data.raw = &raw; } @@ -721,13 +725,10 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) return ret; } -static __init int perf_event_ibs_init(void) +static __init void perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; - if (!ibs_caps) - return -ENODEV; /* ibs not supported by the cpu */ - perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) { @@ -738,13 +739,11 @@ static __init int perf_event_ibs_init(void) register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); - - return 0; } #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ -static __init int perf_event_ibs_init(void) { return 0; } +static __init void perf_event_ibs_init(void) { } #endif @@ -921,7 +920,7 @@ static inline int get_ibs_lvt_offset(void) return val & IBSCTL_LVT_OFFSET_MASK; } -static void setup_APIC_ibs(void *dummy) +static void setup_APIC_ibs(void) { int offset; @@ -936,7 +935,7 @@ failed: smp_processor_id()); } -static void clear_APIC_ibs(void *dummy) +static void clear_APIC_ibs(void) { int offset; @@ -945,18 +944,24 @@ static void clear_APIC_ibs(void *dummy) setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); } +static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) +{ + setup_APIC_ibs(); + return 0; +} + #ifdef CONFIG_PM static int perf_ibs_suspend(void) { - clear_APIC_ibs(NULL); + clear_APIC_ibs(); return 0; } static void perf_ibs_resume(void) { ibs_eilvt_setup(); - setup_APIC_ibs(NULL); + setup_APIC_ibs(); } static struct syscore_ops perf_ibs_syscore_ops = { @@ -975,27 +980,15 @@ static inline void perf_ibs_pm_init(void) { } #endif -static int -perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) { - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - setup_APIC_ibs(NULL); - break; - case CPU_DYING: - clear_APIC_ibs(NULL); - break; - default: - break; - } - - return NOTIFY_OK; + clear_APIC_ibs(); + return 0; } static __init int amd_ibs_init(void) { u32 caps; - int ret = -EINVAL; caps = __get_ibs_caps(); if (!caps) @@ -1004,22 +997,25 @@ static __init int amd_ibs_init(void) ibs_eilvt_setup(); if (!ibs_eilvt_valid()) - goto out; + return -EINVAL; perf_ibs_pm_init(); - cpu_notifier_register_begin(); + ibs_caps = caps; /* make ibs_caps visible to other cpus: */ smp_mb(); - smp_call_function(setup_APIC_ibs, NULL, 1); - __perf_cpu_notifier(perf_ibs_cpu_notifier); - cpu_notifier_register_done(); + /* + * x86_pmu_amd_ibs_starting_cpu will be called from core on + * all online cpus. + */ + cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, + "AP_PERF_X86_AMD_IBS_STARTING", + x86_pmu_amd_ibs_starting_cpu, + x86_pmu_amd_ibs_dying_cpu); - ret = perf_event_ibs_init(); -out: - if (ret) - pr_err("Failed to setup IBS, %d\n", ret); - return ret; + perf_event_ibs_init(); + + return 0; } /* Since we need the pci subsystem to init ibs we can't do this earlier: */ diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 55a3529dbf12..9842270ed2f2 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -228,12 +228,12 @@ static struct pmu pmu_class = { .read = pmu_event_read, }; -static void power_cpu_exit(int cpu) +static int power_cpu_exit(unsigned int cpu) { int target; if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask)) - return; + return 0; /* * Find a new CPU on the same compute unit, if was set in cpumask @@ -245,9 +245,10 @@ static void power_cpu_exit(int cpu) cpumask_set_cpu(target, &cpu_mask); perf_pmu_migrate_context(&pmu_class, cpu, target); } + return 0; } -static void power_cpu_init(int cpu) +static int power_cpu_init(unsigned int cpu) { int target; @@ -255,7 +256,7 @@ static void power_cpu_init(int cpu) * 1) If any CPU is set at cpu_mask in the same compute unit, do * nothing. * 2) If no CPU is set at cpu_mask in the same compute unit, - * set current STARTING CPU. + * set current ONLINE CPU. * * Note: if there is a CPU aside of the new one already in the * sibling mask, then it is also in cpu_mask. @@ -263,33 +264,9 @@ static void power_cpu_init(int cpu) target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); if (target >= nr_cpumask_bits) cpumask_set_cpu(cpu, &cpu_mask); + return 0; } -static int -power_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_FAILED: - case CPU_STARTING: - power_cpu_init(cpu); - break; - case CPU_DOWN_PREPARE: - power_cpu_exit(cpu); - break; - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block power_cpu_notifier_nb = { - .notifier_call = power_cpu_notifier, - .priority = CPU_PRI_PERF, -}; - static const struct x86_cpu_id cpu_match[] = { { .vendor = X86_VENDOR_AMD, .family = 0x15 }, {}, @@ -297,7 +274,7 @@ static const struct x86_cpu_id cpu_match[] = { static int __init amd_power_pmu_init(void) { - int cpu, target, ret; + int ret; if (!x86_match_cpu(cpu_match)) return 0; @@ -312,38 +289,25 @@ static int __init amd_power_pmu_init(void) return -ENODEV; } - cpu_notifier_register_begin(); - /* Choose one online core of each compute unit. */ - for_each_online_cpu(cpu) { - target = cpumask_first(topology_sibling_cpumask(cpu)); - if (!cpumask_test_cpu(target, &cpu_mask)) - cpumask_set_cpu(target, &cpu_mask); - } + cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, + "AP_PERF_X86_AMD_POWER_ONLINE", + power_cpu_init, power_cpu_exit); ret = perf_pmu_register(&pmu_class, "power", -1); if (WARN_ON(ret)) { pr_warn("AMD Power PMU registration failed\n"); - goto out; + return ret; } - __register_cpu_notifier(&power_cpu_notifier_nb); - pr_info("AMD Power PMU detected\n"); - -out: - cpu_notifier_register_done(); - return ret; } module_init(amd_power_pmu_init); static void __exit amd_power_pmu_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&power_cpu_notifier_nb); - cpu_notifier_register_done(); - + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE); perf_pmu_unregister(&pmu_class); } module_exit(amd_power_pmu_exit); diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 98ac57381bf9..e6131d4454e6 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -358,7 +358,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, return this; } -static void amd_uncore_cpu_starting(unsigned int cpu) +static int amd_uncore_cpu_starting(unsigned int cpu) { unsigned int eax, ebx, ecx, edx; struct amd_uncore *uncore; @@ -384,6 +384,8 @@ static void amd_uncore_cpu_starting(unsigned int cpu) uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2); *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; } + + return 0; } static void uncore_online(unsigned int cpu, @@ -398,13 +400,15 @@ static void uncore_online(unsigned int cpu, cpumask_set_cpu(cpu, uncore->active_mask); } -static void amd_uncore_cpu_online(unsigned int cpu) +static int amd_uncore_cpu_online(unsigned int cpu) { if (amd_uncore_nb) uncore_online(cpu, amd_uncore_nb); if (amd_uncore_l2) uncore_online(cpu, amd_uncore_l2); + + return 0; } static void uncore_down_prepare(unsigned int cpu, @@ -433,13 +437,15 @@ static void uncore_down_prepare(unsigned int cpu, } } -static void amd_uncore_cpu_down_prepare(unsigned int cpu) +static int amd_uncore_cpu_down_prepare(unsigned int cpu) { if (amd_uncore_nb) uncore_down_prepare(cpu, amd_uncore_nb); if (amd_uncore_l2) uncore_down_prepare(cpu, amd_uncore_l2); + + return 0; } static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) @@ -454,74 +460,19 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) *per_cpu_ptr(uncores, cpu) = NULL; } -static void amd_uncore_cpu_dead(unsigned int cpu) +static int amd_uncore_cpu_dead(unsigned int cpu) { if (amd_uncore_nb) uncore_dead(cpu, amd_uncore_nb); if (amd_uncore_l2) uncore_dead(cpu, amd_uncore_l2); -} - -static int -amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action, - void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - if (amd_uncore_cpu_up_prepare(cpu)) - return notifier_from_errno(-ENOMEM); - break; - - case CPU_STARTING: - amd_uncore_cpu_starting(cpu); - break; - - case CPU_ONLINE: - amd_uncore_cpu_online(cpu); - break; - - case CPU_DOWN_PREPARE: - amd_uncore_cpu_down_prepare(cpu); - break; - - case CPU_UP_CANCELED: - case CPU_DEAD: - amd_uncore_cpu_dead(cpu); - break; - - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block amd_uncore_cpu_notifier_block = { - .notifier_call = amd_uncore_cpu_notifier, - .priority = CPU_PRI_PERF + 1, -}; - -static void __init init_cpu_already_online(void *dummy) -{ - unsigned int cpu = smp_processor_id(); - - amd_uncore_cpu_starting(cpu); - amd_uncore_cpu_online(cpu); -} -static void cleanup_cpu_online(void *dummy) -{ - unsigned int cpu = smp_processor_id(); - - amd_uncore_cpu_dead(cpu); + return 0; } static int __init amd_uncore_init(void) { - unsigned int cpu, cpu2; int ret = -ENODEV; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) @@ -558,38 +509,29 @@ static int __init amd_uncore_init(void) ret = 0; } - if (ret) - goto fail_nodev; - - cpu_notifier_register_begin(); - - /* init cpus already online before registering for hotplug notifier */ - for_each_online_cpu(cpu) { - ret = amd_uncore_cpu_up_prepare(cpu); - if (ret) - goto fail_online; - smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); - } - - __register_cpu_notifier(&amd_uncore_cpu_notifier_block); - cpu_notifier_register_done(); - + /* + * Install callbacks. Core will call them for each online cpu. + */ + if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, + "PERF_X86_AMD_UNCORE_PREP", + amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) + goto fail_l2; + + if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, + "AP_PERF_X86_AMD_UNCORE_STARTING", + amd_uncore_cpu_starting, NULL)) + goto fail_prep; + if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, + "AP_PERF_X86_AMD_UNCORE_ONLINE", + amd_uncore_cpu_online, + amd_uncore_cpu_down_prepare)) + goto fail_start; return 0; - -fail_online: - for_each_online_cpu(cpu2) { - if (cpu2 == cpu) - break; - smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1); - } - cpu_notifier_register_done(); - - /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */ - amd_uncore_nb = amd_uncore_l2 = NULL; - - if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) - perf_pmu_unregister(&amd_l2_pmu); +fail_start: + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); +fail_prep: + cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); fail_l2: if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) perf_pmu_unregister(&amd_nb_pmu); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index dfebbde2a4cc..c17f0de5fd39 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1477,49 +1477,49 @@ NOKPROBE_SYMBOL(perf_event_nmi_handler); struct event_constraint emptyconstraint; struct event_constraint unconstrained; -static int -x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +static int x86_pmu_prepare_cpu(unsigned int cpu) { - unsigned int cpu = (long)hcpu; struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - int i, ret = NOTIFY_OK; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) - cpuc->kfree_on_online[i] = NULL; - if (x86_pmu.cpu_prepare) - ret = x86_pmu.cpu_prepare(cpu); - break; - - case CPU_STARTING: - if (x86_pmu.cpu_starting) - x86_pmu.cpu_starting(cpu); - break; + int i; - case CPU_ONLINE: - for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) { - kfree(cpuc->kfree_on_online[i]); - cpuc->kfree_on_online[i] = NULL; - } - break; + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) + cpuc->kfree_on_online[i] = NULL; + if (x86_pmu.cpu_prepare) + return x86_pmu.cpu_prepare(cpu); + return 0; +} - case CPU_DYING: - if (x86_pmu.cpu_dying) - x86_pmu.cpu_dying(cpu); - break; +static int x86_pmu_dead_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_dead) + x86_pmu.cpu_dead(cpu); + return 0; +} - case CPU_UP_CANCELED: - case CPU_DEAD: - if (x86_pmu.cpu_dead) - x86_pmu.cpu_dead(cpu); - break; +static int x86_pmu_online_cpu(unsigned int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int i; - default: - break; + for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) { + kfree(cpuc->kfree_on_online[i]); + cpuc->kfree_on_online[i] = NULL; } + return 0; +} - return ret; +static int x86_pmu_starting_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_starting) + x86_pmu.cpu_starting(cpu); + return 0; +} + +static int x86_pmu_dying_cpu(unsigned int cpu) +{ + if (x86_pmu.cpu_dying) + x86_pmu.cpu_dying(cpu); + return 0; } static void __init pmu_check_apic(void) @@ -1787,10 +1787,39 @@ static int __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); - perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); - perf_cpu_notifier(x86_pmu_notifier); + /* + * Install callbacks. Core will call them for each online + * cpu. + */ + err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE", + x86_pmu_prepare_cpu, x86_pmu_dead_cpu); + if (err) + return err; + + err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING, + "AP_PERF_X86_STARTING", x86_pmu_starting_cpu, + x86_pmu_dying_cpu); + if (err) + goto out; + + err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE", + x86_pmu_online_cpu, NULL); + if (err) + goto out1; + + err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + if (err) + goto out2; return 0; + +out2: + cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE); +out1: + cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING); +out: + cpuhp_remove_state(CPUHP_PERF_X86_PREPARE); + return err; } early_initcall(init_hw_perf_events); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0974ba11e954..2cbde2f449aa 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3109,7 +3109,7 @@ static int intel_pmu_cpu_prepare(int cpu) cpuc->excl_thread_id = 0; } - return NOTIFY_OK; + return 0; err_constraint_list: kfree(cpuc->constraint_list); @@ -3120,7 +3120,7 @@ err_shared_regs: cpuc->shared_regs = NULL; err: - return NOTIFY_BAD; + return -ENOMEM; } static void intel_pmu_cpu_starting(int cpu) diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 7b5fd811ef45..783c49ddef29 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -1577,7 +1577,7 @@ static inline void cqm_pick_event_reader(int cpu) cpumask_set_cpu(cpu, &cqm_cpumask); } -static void intel_cqm_cpu_starting(unsigned int cpu) +static int intel_cqm_cpu_starting(unsigned int cpu) { struct intel_pqr_state *state = &per_cpu(pqr_state, cpu); struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -1588,39 +1588,26 @@ static void intel_cqm_cpu_starting(unsigned int cpu) WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); + + cqm_pick_event_reader(cpu); + return 0; } -static void intel_cqm_cpu_exit(unsigned int cpu) +static int intel_cqm_cpu_exit(unsigned int cpu) { int target; /* Is @cpu the current cqm reader for this package ? */ if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) - return; + return 0; /* Find another online reader in this package */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); if (target < nr_cpu_ids) cpumask_set_cpu(target, &cqm_cpumask); -} - -static int intel_cqm_cpu_notifier(struct notifier_block *nb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_PREPARE: - intel_cqm_cpu_exit(cpu); - break; - case CPU_STARTING: - intel_cqm_cpu_starting(cpu); - cqm_pick_event_reader(cpu); - break; - } - return NOTIFY_OK; + return 0; } static const struct x86_cpu_id intel_cqm_match[] = { @@ -1682,7 +1669,7 @@ out: static int __init intel_cqm_init(void) { char *str = NULL, scale[20]; - int i, cpu, ret; + int cpu, ret; if (x86_match_cpu(intel_cqm_match)) cqm_enabled = true; @@ -1705,8 +1692,7 @@ static int __init intel_cqm_init(void) * * Also, check that the scales match on all cpus. */ - cpu_notifier_register_begin(); - + get_online_cpus(); for_each_online_cpu(cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -1743,11 +1729,6 @@ static int __init intel_cqm_init(void) if (ret) goto out; - for_each_online_cpu(i) { - intel_cqm_cpu_starting(i); - cqm_pick_event_reader(i); - } - if (mbm_enabled) ret = intel_mbm_init(); if (ret && !cqm_enabled) @@ -1772,12 +1753,18 @@ static int __init intel_cqm_init(void) pr_info("Intel MBM enabled\n"); /* - * Register the hot cpu notifier once we are sure cqm + * Setup the hot cpu notifier once we are sure cqm * is enabled to avoid notifier leak. */ - __perf_cpu_notifier(intel_cqm_cpu_notifier); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING, + "AP_PERF_X86_CQM_STARTING", + intel_cqm_cpu_starting, NULL); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "AP_PERF_X86_CQM_ONLINE", + NULL, intel_cqm_cpu_exit); + out: - cpu_notifier_register_done(); + put_online_cpus(); + if (ret) { kfree(str); cqm_cleanup(); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4c7638b91fa5..3ca87b5a8677 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -366,7 +366,7 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) * Check if exiting cpu is the designated reader. If so migrate the * events when there is a valid target available */ -static void cstate_cpu_exit(int cpu) +static int cstate_cpu_exit(unsigned int cpu) { unsigned int target; @@ -391,9 +391,10 @@ static void cstate_cpu_exit(int cpu) perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); } } + return 0; } -static void cstate_cpu_init(int cpu) +static int cstate_cpu_init(unsigned int cpu) { unsigned int target; @@ -415,31 +416,10 @@ static void cstate_cpu_init(int cpu) topology_core_cpumask(cpu)); if (has_cstate_pkg && target >= nr_cpu_ids) cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); -} -static int cstate_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - cstate_cpu_init(cpu); - break; - case CPU_DOWN_PREPARE: - cstate_cpu_exit(cpu); - break; - default: - break; - } - return NOTIFY_OK; + return 0; } -static struct notifier_block cstate_cpu_nb = { - .notifier_call = cstate_cpu_notifier, - .priority = CPU_PRI_PERF + 1, -}; - static struct pmu cstate_core_pmu = { .attr_groups = core_attr_groups, .name = "cstate_core", @@ -600,18 +580,20 @@ static inline void cstate_cleanup(void) static int __init cstate_init(void) { - int cpu, err; + int err; - cpu_notifier_register_begin(); - for_each_online_cpu(cpu) - cstate_cpu_init(cpu); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, + "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init, + NULL); + cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, + "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit); if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { has_cstate_core = false; pr_info("Failed to register cstate core pmu\n"); - goto out; + return err; } } @@ -621,12 +603,10 @@ static int __init cstate_init(void) has_cstate_pkg = false; pr_info("Failed to register cstate pkg pmu\n"); cstate_cleanup(); - goto out; + return err; } } - __register_cpu_notifier(&cstate_cpu_nb); -out: - cpu_notifier_register_done(); + return err; } @@ -652,9 +632,8 @@ module_init(cstate_pmu_init); static void __exit cstate_pmu_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&cstate_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); cstate_cleanup(); - cpu_notifier_register_done(); } module_exit(cstate_pmu_exit); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index d0c58b35155f..28865938aadf 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -556,14 +556,14 @@ const struct attribute_group *rapl_attr_groups[] = { NULL, }; -static void rapl_cpu_exit(int cpu) +static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; /* Check if exiting cpu is used for collecting rapl events */ if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) - return; + return 0; pmu->cpu = -1; /* Find a new cpu to collect rapl events */ @@ -575,9 +575,10 @@ static void rapl_cpu_exit(int cpu) pmu->cpu = target; perf_pmu_migrate_context(pmu->pmu, cpu, target); } + return 0; } -static void rapl_cpu_init(int cpu) +static int rapl_cpu_online(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; @@ -588,13 +589,14 @@ static void rapl_cpu_init(int cpu) */ target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); if (target < nr_cpu_ids) - return; + return 0; cpumask_set_cpu(cpu, &rapl_cpu_mask); pmu->cpu = cpu; + return 0; } -static int rapl_cpu_prepare(int cpu) +static int rapl_cpu_prepare(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); @@ -615,33 +617,6 @@ static int rapl_cpu_prepare(int cpu) return 0; } -static int rapl_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - rapl_cpu_prepare(cpu); - break; - - case CPU_DOWN_FAILED: - case CPU_ONLINE: - rapl_cpu_init(cpu); - break; - - case CPU_DOWN_PREPARE: - rapl_cpu_exit(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block rapl_cpu_nb = { - .notifier_call = rapl_cpu_notifier, - .priority = CPU_PRI_PERF + 1, -}; - static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -692,24 +667,6 @@ static void __init rapl_advertise(void) } } -static int __init rapl_prepare_cpus(void) -{ - unsigned int cpu, pkg; - int ret; - - for_each_online_cpu(cpu) { - pkg = topology_logical_package_id(cpu); - if (rapl_pmus->pmus[pkg]) - continue; - - ret = rapl_cpu_prepare(cpu); - if (ret) - return ret; - rapl_cpu_init(cpu); - } - return 0; -} - static void cleanup_rapl_pmus(void) { int i; @@ -837,35 +794,44 @@ static int __init rapl_pmu_init(void) if (ret) return ret; - cpu_notifier_register_begin(); + /* + * Install callbacks. Core will call them for each online cpu. + */ - ret = rapl_prepare_cpus(); + ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP", + rapl_cpu_prepare, NULL); if (ret) goto out; + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, + "AP_PERF_X86_RAPL_ONLINE", + rapl_cpu_online, rapl_cpu_offline); + if (ret) + goto out1; + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); if (ret) - goto out; + goto out2; - __register_cpu_notifier(&rapl_cpu_nb); - cpu_notifier_register_done(); rapl_advertise(); return 0; +out2: + cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); +out1: + cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP); out: pr_warn("Initialization failed (%d), disabled\n", ret); cleanup_rapl_pmus(); - cpu_notifier_register_done(); return ret; } module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&rapl_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP); perf_pmu_unregister(&rapl_pmus->pmu); cleanup_rapl_pmus(); - cpu_notifier_register_done(); } module_exit(intel_rapl_exit); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 59b4974c697f..3f3d0d67749b 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1052,7 +1052,7 @@ static void uncore_pci_exit(void) } } -static void uncore_cpu_dying(int cpu) +static int uncore_cpu_dying(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; @@ -1069,16 +1069,19 @@ static void uncore_cpu_dying(int cpu) uncore_box_exit(box); } } + return 0; } -static void uncore_cpu_starting(int cpu, bool init) +static int first_init; + +static int uncore_cpu_starting(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; int i, pkg, ncpus = 1; - if (init) { + if (first_init) { /* * On init we get the number of online cpus in the package * and set refcount for all of them. @@ -1099,9 +1102,11 @@ static void uncore_cpu_starting(int cpu, bool init) uncore_box_init(box); } } + + return 0; } -static int uncore_cpu_prepare(int cpu) +static int uncore_cpu_prepare(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; @@ -1164,13 +1169,13 @@ static void uncore_change_context(struct intel_uncore_type **uncores, uncore_change_type_ctx(*uncores, old_cpu, new_cpu); } -static void uncore_event_exit_cpu(int cpu) +static int uncore_event_cpu_offline(unsigned int cpu) { int target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) - return; + return 0; /* Find a new cpu to collect uncore events */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); @@ -1183,9 +1188,10 @@ static void uncore_event_exit_cpu(int cpu) uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); + return 0; } -static void uncore_event_init_cpu(int cpu) +static int uncore_event_cpu_online(unsigned int cpu) { int target; @@ -1195,50 +1201,15 @@ static void uncore_event_init_cpu(int cpu) */ target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); if (target < nr_cpu_ids) - return; + return 0; cpumask_set_cpu(cpu, &uncore_cpu_mask); uncore_change_context(uncore_msr_uncores, -1, cpu); uncore_change_context(uncore_pci_uncores, -1, cpu); + return 0; } -static int uncore_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - return notifier_from_errno(uncore_cpu_prepare(cpu)); - - case CPU_STARTING: - uncore_cpu_starting(cpu, false); - case CPU_DOWN_FAILED: - uncore_event_init_cpu(cpu); - break; - - case CPU_UP_CANCELED: - case CPU_DYING: - uncore_cpu_dying(cpu); - break; - - case CPU_DOWN_PREPARE: - uncore_event_exit_cpu(cpu); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block uncore_cpu_nb = { - .notifier_call = uncore_cpu_notifier, - /* - * to migrate uncore events, our notifier should be executed - * before perf core's notifier. - */ - .priority = CPU_PRI_PERF + 1, -}; - static int __init type_pmu_register(struct intel_uncore_type *type) { int i, ret; @@ -1282,41 +1253,6 @@ err: return ret; } -static void __init uncore_cpu_setup(void *dummy) -{ - uncore_cpu_starting(smp_processor_id(), true); -} - -/* Lazy to avoid allocation of a few bytes for the normal case */ -static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC); - -static int __init uncore_cpumask_init(bool msr) -{ - unsigned int cpu; - - for_each_online_cpu(cpu) { - unsigned int pkg = topology_logical_package_id(cpu); - int ret; - - if (test_and_set_bit(pkg, packages)) - continue; - /* - * The first online cpu of each package allocates and takes - * the refcounts for all other online cpus in that package. - * If msrs are not enabled no allocation is required. - */ - if (msr) { - ret = uncore_cpu_prepare(cpu); - if (ret) - return ret; - } - uncore_event_init_cpu(cpu); - smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1); - } - __register_cpu_notifier(&uncore_cpu_nb); - return 0; -} - #define X86_UNCORE_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } @@ -1440,11 +1376,33 @@ static int __init intel_uncore_init(void) if (cret && pret) return -ENODEV; - cpu_notifier_register_begin(); - ret = uncore_cpumask_init(!cret); - if (ret) - goto err; - cpu_notifier_register_done(); + /* + * Install callbacks. Core will call them for each online cpu. + * + * The first online cpu of each package allocates and takes + * the refcounts for all other online cpus in that package. + * If msrs are not enabled no allocation is required and + * uncore_cpu_prepare() is not called for each online cpu. + */ + if (!cret) { + ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP, + "PERF_X86_UNCORE_PREP", + uncore_cpu_prepare, NULL); + if (ret) + goto err; + } else { + cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP, + "PERF_X86_UNCORE_PREP", + uncore_cpu_prepare, NULL); + } + first_init = 1; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, + "AP_PERF_X86_UNCORE_STARTING", + uncore_cpu_starting, uncore_cpu_dying); + first_init = 0; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, + "AP_PERF_X86_UNCORE_ONLINE", + uncore_event_cpu_online, uncore_event_cpu_offline); return 0; err: @@ -1452,17 +1410,16 @@ err: on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); - cpu_notifier_register_done(); return ret; } module_init(intel_uncore_init); static void __exit intel_uncore_exit(void) { - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&uncore_cpu_nb); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING); + cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); - cpu_notifier_register_done(); } module_exit(intel_uncore_exit); diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 94c18ebfd68c..5391b0ae7cc3 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -145,7 +145,6 @@ static inline void disable_acpi(void) { } #define ARCH_HAS_POWER_INIT 1 #ifdef CONFIG_ACPI_NUMA -extern int acpi_numa; extern int x86_acpi_numa_init(void); #endif /* CONFIG_ACPI_NUMA */ @@ -170,4 +169,6 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) } #endif +#define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT) + #endif /* _ASM_X86_ACPI_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 59d34c521d96..9b7fa6313f1a 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -16,6 +16,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} #define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define cpu_acpi_id(cpu) 0 #define safe_smp_processor_id() 0 #endif /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c64b1e9c5d1a..d683993248c8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -225,7 +225,6 @@ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 574c23cf761a..b6d425999f99 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -81,7 +81,11 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *page; - page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); + gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO; + + if (mm == &init_mm) + gfp &= ~__GFP_ACCOUNT; + page = alloc_pages(gfp, 0); if (!page) return NULL; if (!pgtable_pmd_page_ctor(page)) { @@ -125,7 +129,11 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL); + gfp_t gfp = GFP_KERNEL_ACCOUNT; + + if (mm == &init_mm) + gfp &= ~__GFP_ACCOUNT; + return (pud_t *)get_zeroed_page(gfp); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index fbc5e92e1ecc..643eba42d620 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -26,13 +26,11 @@ * @n: length of the copy in bytes * * Copy data to persistent memory media via non-temporal stores so that - * a subsequent arch_wmb_pmem() can flush cpu and memory controller - * write buffers to guarantee durability. + * a subsequent pmem driver flush operation will drain posted write queues. */ -static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, - size_t n) +static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) { - int unwritten; + int rem; /* * We are copying between two kernel buffers, if @@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, * fault) we would have already reported a general protection fault * before the WARN+BUG. */ - unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, - (void __user *) src, n); - if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", - __func__, dst, src, unwritten)) + rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n); + if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n", + __func__, dst, src, rem)) BUG(); } -static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, - size_t n) +static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) { if (static_cpu_has(X86_FEATURE_MCE_RECOVERY)) - return memcpy_mcsafe(dst, (void __force *) src, n); - memcpy(dst, (void __force *) src, n); + return memcpy_mcsafe(dst, src, n); + memcpy(dst, src, n); return 0; } /** - * arch_wmb_pmem - synchronize writes to persistent memory - * - * After a series of arch_memcpy_to_pmem() operations this drains data - * from cpu write buffers and any platform (memory controller) buffers - * to ensure that written data is durable on persistent memory media. - */ -static inline void arch_wmb_pmem(void) -{ - /* - * wmb() to 'sfence' all previous writes such that they are - * architecturally visible to 'pcommit'. Note, that we've - * already arranged for pmem writes to avoid the cache via - * arch_memcpy_to_pmem(). - */ - wmb(); - pcommit_sfence(); -} - -/** * arch_wb_cache_pmem - write back a cache range with CLWB * @vaddr: virtual start address * @size: number of bytes to write back * * Write back a cache range using the CLWB (cache line write back) - * instruction. This function requires explicit ordering with an - * arch_wmb_pmem() call. + * instruction. */ -static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) +static inline void arch_wb_cache_pmem(void *addr, size_t size) { u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; unsigned long clflush_mask = x86_clflush_size - 1; - void *vaddr = (void __force *)addr; - void *vend = vaddr + size; + void *vend = addr + size; void *p; - for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + for (p = (void *)((unsigned long)addr & ~clflush_mask); p < vend; p += x86_clflush_size) clwb(p); } @@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i) * @i: iterator with source data * * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. - * This function requires explicit ordering with an arch_wmb_pmem() call. */ -static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, +static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, struct iov_iter *i) { - void *vaddr = (void __force *)addr; size_t len; /* TODO: skip the write-back by always using non-temporal stores */ - len = copy_from_iter_nocache(vaddr, bytes, i); + len = copy_from_iter_nocache(addr, bytes, i); if (__iter_needs_pmem_wb(i)) arch_wb_cache_pmem(addr, bytes); @@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, * @size: number of bytes to zero * * Write zeros into the memory range starting at 'addr' for 'size' bytes. - * This function requires explicit ordering with an arch_wmb_pmem() call. */ -static inline void arch_clear_pmem(void __pmem *addr, size_t size) +static inline void arch_clear_pmem(void *addr, size_t size) { - void *vaddr = (void __force *)addr; - - memset(vaddr, 0, size); + memset(addr, 0, size); arch_wb_cache_pmem(addr, size); } -static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +static inline void arch_invalidate_pmem(void *addr, size_t size) { - clflush_cache_range((void __force *) addr, size); -} - -static inline bool __arch_has_wmb_pmem(void) -{ - /* - * We require that wmb() be an 'sfence', that is only guaranteed on - * 64-bit builds - */ - return static_cpu_has(X86_FEATURE_PCOMMIT); + clflush_cache_range(addr, size); } #endif /* CONFIG_ARCH_HAS_PMEM_API */ #endif /* __ASM_X86_PMEM_H__ */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c9734dc76257..ebd0c164cd4e 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -33,6 +33,7 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) } DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); @@ -148,6 +149,7 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); void smp_store_boot_cpu_info(void); void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) +#define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu) #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index d96d04377765..587d7914ea4b 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -253,52 +253,6 @@ static inline void clwb(volatile void *__p) : [pax] "a" (p)); } -/** - * pcommit_sfence() - persistent commit and fence - * - * The PCOMMIT instruction ensures that data that has been flushed from the - * processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to - * memory and is durable on the DIMM. The primary use case for this is - * persistent memory. - * - * This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT - * with appropriate fencing. - * - * Example: - * void flush_and_commit_buffer(void *vaddr, unsigned int size) - * { - * unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; - * void *vend = vaddr + size; - * void *p; - * - * for (p = (void *)((unsigned long)vaddr & ~clflush_mask); - * p < vend; p += boot_cpu_data.x86_clflush_size) - * clwb(p); - * - * // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes - * // MFENCE via mb() also works - * wmb(); - * - * // PCOMMIT and the required SFENCE for ordering - * pcommit_sfence(); - * } - * - * After this function completes the data pointed to by 'vaddr' has been - * accepted to memory and will be durable if the 'vaddr' points to persistent - * memory. - * - * PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify - * things we include both the PCOMMIT and the required SFENCE in the - * alternatives generated by pcommit_sfence(). - */ -static inline void pcommit_sfence(void) -{ - alternative(ASM_NOP7, - ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */ - "sfence", - X86_FEATURE_PCOMMIT); -} - #define nop() asm volatile ("nop") diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 14c63c7e8337..a002b07a7099 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -72,7 +72,6 @@ #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 -#define SECONDARY_EXEC_PCOMMIT 0x00200000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h index 0d809e9fc975..3bdd10d71223 100644 --- a/arch/x86/include/asm/xen/cpuid.h +++ b/arch/x86/include/asm/xen/cpuid.h @@ -76,15 +76,18 @@ /* * Leaf 5 (0x40000x04) * HVM-specific features + * EAX: Features + * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) */ -/* EAX Features */ /* Virtualized APIC registers */ #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized x2APIC accesses */ #define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Memory mapped from other domains has valid IOMMU entries */ #define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) +/* vcpu id is present in EBX */ +#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) #define XEN_CPUID_MAX_NUM_LEAVES 4 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 5b15d94a33f8..37fee272618f 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -78,7 +78,6 @@ #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 -#define EXIT_REASON_PCOMMIT 65 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -127,8 +126,7 @@ { EXIT_REASON_INVVPID, "INVVPID" }, \ { EXIT_REASON_INVPCID, "INVPCID" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ - { EXIT_REASON_XRSTORS, "XRSTORS" }, \ - { EXIT_REASON_PCOMMIT, "PCOMMIT" } + { EXIT_REASON_XRSTORS, "XRSTORS" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 9414f84584e4..6738e5c82cca 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -161,13 +161,15 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) /** * acpi_register_lapic - register a local apic and generates a logic cpu number * @id: local apic id to register + * @acpiid: ACPI id to register * @enabled: this cpu is enabled or not * * Returns the logic cpu number which maps to the local apic */ -static int acpi_register_lapic(int id, u8 enabled) +static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) { unsigned int ver = 0; + int cpu; if (id >= MAX_LOCAL_APIC) { printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); @@ -182,7 +184,11 @@ static int acpi_register_lapic(int id, u8 enabled) if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; - return generic_processor_info(id, ver); + cpu = generic_processor_info(id, ver); + if (cpu >= 0) + early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; + + return cpu; } static int __init @@ -212,7 +218,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) if (!apic->apic_id_valid(apic_id) && enabled) printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); else - acpi_register_lapic(apic_id, enabled); + acpi_register_lapic(apic_id, processor->uid, enabled); #else printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); #endif @@ -240,6 +246,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) * when we use CPU hotplug. */ acpi_register_lapic(processor->id, /* APIC ID */ + processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); return 0; @@ -258,6 +265,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) acpi_table_print_madt_entry(header); acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ + processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); return 0; @@ -714,7 +722,7 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { int cpu; - cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED); + cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED); if (cpu < 0) { pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); return cpu; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index cefacbad1531..456316f6c868 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -215,26 +215,18 @@ void apbt_setup_secondary_clock(void) * cpu timers during the offline process due to the ordering of notification. * the extra interrupt is harmless. */ -static int apbt_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) +static int apbt_cpu_dead(unsigned int cpu) { - unsigned long cpu = (unsigned long)hcpu; struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DEAD: - dw_apb_clockevent_pause(adev->timer); - if (system_state == SYSTEM_RUNNING) { - pr_debug("skipping APBT CPU %lu offline\n", cpu); - } else { - pr_debug("APBT clockevent for cpu %lu offline\n", cpu); - dw_apb_clockevent_stop(adev->timer); - } - break; - default: - pr_debug("APBT notified %lu, no action\n", action); + dw_apb_clockevent_pause(adev->timer); + if (system_state == SYSTEM_RUNNING) { + pr_debug("skipping APBT CPU %u offline\n", cpu); + } else { + pr_debug("APBT clockevent for cpu %u offline\n", cpu); + dw_apb_clockevent_stop(adev->timer); } - return NOTIFY_OK; + return 0; } static __init int apbt_late_init(void) @@ -242,9 +234,8 @@ static __init int apbt_late_init(void) if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT || !apb_timer_block_enabled) return 0; - /* This notifier should be called after workqueue is ready */ - hotcpu_notifier(apbt_cpuhp_notify, -20); - return 0; + return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "X86_APB_DEAD", NULL, + apbt_cpu_dead); } fs_initcall(apbt_late_init); #else diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f943d2f453a4..ac8d8ad8b009 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -92,8 +92,10 @@ static int apic_extnmi = APIC_EXTNMI_BSP; */ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 24170d0809ba..6368fa69d2af 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -152,68 +152,48 @@ static void init_x2apic_ldr(void) } } - /* - * At CPU state changes, update the x2apic cluster sibling info. - */ -static int -update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) +/* + * At CPU state changes, update the x2apic cluster sibling info. + */ +int x2apic_prepare_cpu(unsigned int cpu) { - unsigned int this_cpu = (unsigned long)hcpu; - unsigned int cpu; - int err = 0; - - switch (action) { - case CPU_UP_PREPARE: - if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), - GFP_KERNEL)) { - err = -ENOMEM; - } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu), - GFP_KERNEL)) { - free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); - err = -ENOMEM; - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - for_each_online_cpu(cpu) { - if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) - continue; - cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); - cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); - } - free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); - free_cpumask_var(per_cpu(ipi_mask, this_cpu)); - break; + if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) + return -ENOMEM; + + if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) { + free_cpumask_var(per_cpu(cpus_in_cluster, cpu)); + return -ENOMEM; } - return notifier_from_errno(err); + return 0; } -static struct notifier_block x2apic_cpu_notifier = { - .notifier_call = update_clusterinfo, -}; - -static int x2apic_init_cpu_notifier(void) +int x2apic_dead_cpu(unsigned int this_cpu) { - int cpu = smp_processor_id(); - - zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL); + int cpu; - BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); - - cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); - register_hotcpu_notifier(&x2apic_cpu_notifier); - return 1; + for_each_online_cpu(cpu) { + if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) + continue; + cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); + cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); + } + free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); + free_cpumask_var(per_cpu(ipi_mask, this_cpu)); + return 0; } static int x2apic_cluster_probe(void) { - if (x2apic_mode) - return x2apic_init_cpu_notifier(); - else + int cpu = smp_processor_id(); + + if (!x2apic_mode) return 0; + + cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); + cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", + x2apic_prepare_cpu, x2apic_dead_cpu); + return 1; } static const struct cpumask *x2apic_cluster_target_cpus(void) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f112af7aa62e..3d747070fe67 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -710,31 +710,29 @@ static void hpet_work(struct work_struct *w) complete(&hpet_work->complete); } -static int hpet_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) +static int hpet_cpuhp_online(unsigned int cpu) { - unsigned long cpu = (unsigned long)hcpu; struct hpet_work_struct work; + + INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work); + init_completion(&work.complete); + /* FIXME: add schedule_work_on() */ + schedule_delayed_work_on(cpu, &work.work, 0); + wait_for_completion(&work.complete); + destroy_delayed_work_on_stack(&work.work); + return 0; +} + +static int hpet_cpuhp_dead(unsigned int cpu) +{ struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work); - init_completion(&work.complete); - /* FIXME: add schedule_work_on() */ - schedule_delayed_work_on(cpu, &work.work, 0); - wait_for_completion(&work.complete); - destroy_delayed_work_on_stack(&work.work); - break; - case CPU_DEAD: - if (hdev) { - free_irq(hdev->irq, hdev); - hdev->flags &= ~HPET_DEV_USED; - per_cpu(cpu_hpet_dev, cpu) = NULL; - } - break; - } - return NOTIFY_OK; + if (!hdev) + return 0; + free_irq(hdev->irq, hdev); + hdev->flags &= ~HPET_DEV_USED; + per_cpu(cpu_hpet_dev, cpu) = NULL; + return 0; } #else @@ -750,11 +748,8 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd) } #endif -static int hpet_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) -{ - return NOTIFY_OK; -} +#define hpet_cpuhp_online NULL +#define hpet_cpuhp_dead NULL #endif @@ -931,7 +926,7 @@ out_nohpet: */ static __init int hpet_late_init(void) { - int cpu; + int ret; if (boot_hpet_disable) return -ENODEV; @@ -961,16 +956,20 @@ static __init int hpet_late_init(void) if (boot_cpu_has(X86_FEATURE_ARAT)) return 0; - cpu_notifier_register_begin(); - for_each_online_cpu(cpu) { - hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); - } - /* This notifier should be called after workqueue is ready */ - __hotcpu_notifier(hpet_cpuhp_notify, -20); - cpu_notifier_register_done(); - + ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "AP_X86_HPET_ONLINE", + hpet_cpuhp_online, NULL); + if (ret) + return ret; + ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "X86_HPET_DEAD", NULL, + hpet_cpuhp_dead); + if (ret) + goto err_cpuhp; return 0; + +err_cpuhp: + cpuhp_remove_state(CPUHP_AP_X86_HPET_ONLINE); + return ret; } fs_initcall(hpet_late_init); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a2616584b6e9..6cb2b02fcc87 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -400,10 +400,6 @@ static void __init reserve_initrd(void) memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); } -static void __init early_initrd_acpi_init(void) -{ - early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start); -} #else static void __init early_reserve_initrd(void) { @@ -411,9 +407,6 @@ static void __init early_reserve_initrd(void) static void __init reserve_initrd(void) { } -static void __init early_initrd_acpi_init(void) -{ -} #endif /* CONFIG_BLK_DEV_INITRD */ static void __init parse_setup_data(void) @@ -1149,7 +1142,7 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); - early_initrd_acpi_init(); + acpi_table_upgrade(); vsmp_init(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index e4fcb87ba7a6..7a40e068302d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -236,6 +236,8 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); + per_cpu(x86_cpu_to_acpiid, cpu) = + early_per_cpu_map(x86_cpu_to_acpiid, cpu); #endif #ifdef CONFIG_X86_32 per_cpu(x86_cpu_to_logical_apicid, cpu) = @@ -271,6 +273,7 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; + early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL; #endif #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 9b0185fbe3eb..654f6c66fe45 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -323,25 +323,16 @@ static int tboot_wait_for_aps(int num_aps) return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps); } -static int tboot_cpu_callback(struct notifier_block *nfb, unsigned long action, - void *hcpu) +static int tboot_dying_cpu(unsigned int cpu) { - switch (action) { - case CPU_DYING: - atomic_inc(&ap_wfs_count); - if (num_online_cpus() == 1) - if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) - return NOTIFY_BAD; - break; + atomic_inc(&ap_wfs_count); + if (num_online_cpus() == 1) { + if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) + return -EBUSY; } - return NOTIFY_OK; + return 0; } -static struct notifier_block tboot_cpu_notifier = -{ - .notifier_call = tboot_cpu_callback, -}; - #ifdef CONFIG_DEBUG_FS #define TBOOT_LOG_UUID { 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \ @@ -417,8 +408,8 @@ static __init int tboot_late_init(void) tboot_create_trampoline(); atomic_set(&ap_wfs_count, 0); - register_hotcpu_notifier(&tboot_cpu_notifier); - + cpuhp_setup_state(CPUHP_AP_X86_TBOOT_DYING, "AP_X86_TBOOT_DYING", NULL, + tboot_dying_cpu); #ifdef CONFIG_DEBUG_FS debugfs_create_file("tboot_log", S_IRUSR, arch_debugfs_dir, NULL, &tboot_log_fops); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7597b42a8a88..643565364497 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -366,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | - F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT); + F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB); /* cpuid 0xD.1.eax */ const u32 kvm_cpuid_D_1_eax_x86_features = diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index e17a74b1d852..35058c2c0eea 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -144,14 +144,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) return best && (best->ebx & bit(X86_FEATURE_RTM)); } -static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 7, 0); - return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); -} - static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7758680db20b..df07a0a4611f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2707,8 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_PCOMMIT; + SECONDARY_EXEC_XSAVES; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ @@ -3270,7 +3269,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_XSAVES | SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_PCOMMIT | SECONDARY_EXEC_TSC_SCALING; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, @@ -4858,9 +4856,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) if (!enable_pml) exec_control &= ~SECONDARY_EXEC_ENABLE_PML; - /* Currently, we allow L1 guest to directly run pcommit instruction. */ - exec_control &= ~SECONDARY_EXEC_PCOMMIT; - return exec_control; } @@ -4904,9 +4899,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); - if (cpu_has_secondary_exec_ctrls()) + if (cpu_has_secondary_exec_ctrls()) { vmcs_write32(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control(vmx)); + } if (kvm_vcpu_apicv_active(&vmx->vcpu)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); @@ -7564,13 +7560,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) return 1; } -static int handle_pcommit(struct kvm_vcpu *vcpu) -{ - /* we never catch pcommit instruct for L1 guest. */ - WARN_ON(1); - return 1; -} - /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -7621,7 +7610,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_XSAVES] = handle_xsaves, [EXIT_REASON_XRSTORS] = handle_xrstors, [EXIT_REASON_PML_FULL] = handle_pml_full, - [EXIT_REASON_PCOMMIT] = handle_pcommit, }; static const int kvm_vmx_max_exit_handlers = @@ -7930,8 +7918,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * the XSS exit bitmap in vmcs12. */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); - case EXIT_REASON_PCOMMIT: - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT); default: return true; } @@ -9094,15 +9080,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (cpu_has_secondary_exec_ctrls()) vmcs_set_secondary_exec_control(secondary_exec_ctl); - - if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { - if (guest_cpuid_has_pcommit(vcpu)) - vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_PCOMMIT; - else - vmx->nested.nested_vmx_secondary_ctls_high &= - ~SECONDARY_EXEC_PCOMMIT; - } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -9715,8 +9692,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_PCOMMIT); + SECONDARY_EXEC_APIC_REGISTER_VIRT); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12->secondary_vm_exec_control; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b2766723c951..45608a7da9b3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5552,9 +5552,10 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) } EXPORT_SYMBOL_GPL(kvm_fast_pio_out); -static void tsc_bad(void *info) +static int kvmclock_cpu_down_prep(unsigned int cpu) { __this_cpu_write(cpu_tsc_khz, 0); + return 0; } static void tsc_khz_changed(void *data) @@ -5659,35 +5660,18 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = { .notifier_call = kvmclock_cpufreq_notifier }; -static int kvmclock_cpu_notifier(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int kvmclock_cpu_online(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); - break; - case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, tsc_bad, NULL, 1); - break; - } - return NOTIFY_OK; + tsc_khz_changed(NULL); + return 0; } -static struct notifier_block kvmclock_cpu_notifier_block = { - .notifier_call = kvmclock_cpu_notifier, - .priority = -INT_MAX -}; - static void kvm_timer_init(void) { int cpu; max_tsc_khz = tsc_khz; - cpu_notifier_register_begin(); if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { #ifdef CONFIG_CPU_FREQ struct cpufreq_policy policy; @@ -5702,12 +5686,9 @@ static void kvm_timer_init(void) CPUFREQ_TRANSITION_NOTIFIER); } pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); - for_each_online_cpu(cpu) - smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); - - __register_hotcpu_notifier(&kvmclock_cpu_notifier_block); - cpu_notifier_register_done(); + cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE", + kvmclock_cpu_online, kvmclock_cpu_down_prep); } static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); @@ -5896,7 +5877,7 @@ void kvm_arch_exit(void) if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block); + cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); #endif diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index ec378cd7b71e..767be7c76034 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -1012,7 +1012,7 @@ GrpTable: Grp15 4: XSAVE 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) -7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) +7: clflush | clflushopt (66) | sfence (11B) EndTable GrpTable: Grp16 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d22161ab941d..dc8023060456 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1353,7 +1353,7 @@ good_area: * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); major |= fault & VM_FAULT_MAJOR; /* diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 9c086c57105c..968ac028c34e 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -1,4 +1,5 @@ /* Common code for 32 and 64-bit NUMA */ +#include <linux/acpi.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/string.h> @@ -15,7 +16,6 @@ #include <asm/e820.h> #include <asm/proto.h> #include <asm/dma.h> -#include <asm/acpi.h> #include <asm/amd_nb.h> #include "numa_internal.h" diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index aa0ff4b02a96..3feec5af4e67 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -6,7 +6,7 @@ #include <asm/fixmap.h> #include <asm/mtrr.h> -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO +#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO) #ifdef CONFIG_HIGHPTE #define PGALLOC_USER_GFP __GFP_HIGHMEM @@ -18,7 +18,7 @@ gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(PGALLOC_GFP); + return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT); } pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) @@ -207,9 +207,13 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) { int i; bool failed = false; + gfp_t gfp = PGALLOC_GFP; + + if (mm == &init_mm) + gfp &= ~__GFP_ACCOUNT; for(i = 0; i < PREALLOCATED_PMDS; i++) { - pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); + pmd_t *pmd = (pmd_t *)__get_free_page(gfp); if (!pmd) failed = true; if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) { diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index b5f821881465..b1ecff460a46 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -15,8 +15,6 @@ #include <linux/bitmap.h> #include <linux/module.h> #include <linux/topology.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> #include <linux/mm.h> #include <asm/proto.h> #include <asm/numa.h> @@ -24,51 +22,6 @@ #include <asm/apic.h> #include <asm/uv/uv.h> -int acpi_numa __initdata; - -static __init int setup_node(int pxm) -{ - return acpi_map_pxm_to_node(pxm); -} - -static __init void bad_srat(void) -{ - printk(KERN_ERR "SRAT: SRAT not used.\n"); - acpi_numa = -1; -} - -static __init inline int srat_disabled(void) -{ - return acpi_numa < 0; -} - -/* - * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for - * I/O localities since SRAT does not list them. I/O localities are - * not supported at this point. - */ -void __init acpi_numa_slit_init(struct acpi_table_slit *slit) -{ - int i, j; - - for (i = 0; i < slit->locality_count; i++) { - const int from_node = pxm_to_node(i); - - if (from_node == NUMA_NO_NODE) - continue; - - for (j = 0; j < slit->locality_count; j++) { - const int to_node = pxm_to_node(j); - - if (to_node == NUMA_NO_NODE) - continue; - - numa_set_distance(from_node, to_node, - slit->entry[slit->locality_count * i + j]); - } - } -} - /* Callback for Proximity Domain -> x2APIC mapping */ void __init acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) @@ -91,7 +44,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) pxm, apic_id); return; } - node = setup_node(pxm); + node = acpi_map_pxm_to_node(pxm); if (node < 0) { printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); bad_srat(); @@ -104,7 +57,6 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) } set_apicid_to_node(apic_id, node); node_set(node, numa_nodes_parsed); - acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); } @@ -127,7 +79,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) pxm = pa->proximity_domain_lo; if (acpi_srat_revision >= 2) pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8; - node = setup_node(pxm); + node = acpi_map_pxm_to_node(pxm); if (node < 0) { printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); bad_srat(); @@ -146,74 +98,10 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) set_apicid_to_node(apic_id, node); node_set(node, numa_nodes_parsed); - acpi_numa = 1; printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); } -#ifdef CONFIG_MEMORY_HOTPLUG -static inline int save_add_info(void) {return 1;} -#else -static inline int save_add_info(void) {return 0;} -#endif - -/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ -int __init -acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) -{ - u64 start, end; - u32 hotpluggable; - int node, pxm; - - if (srat_disabled()) - goto out_err; - if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) - goto out_err_bad_srat; - if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) - goto out_err; - hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; - if (hotpluggable && !save_add_info()) - goto out_err; - - start = ma->base_address; - end = start + ma->length; - pxm = ma->proximity_domain; - if (acpi_srat_revision <= 1) - pxm &= 0xff; - - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains.\n"); - goto out_err_bad_srat; - } - - if (numa_add_memblk(node, start, end) < 0) - goto out_err_bad_srat; - - node_set(node, numa_nodes_parsed); - - pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", - node, pxm, - (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? " hotplug" : "", - ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : ""); - - /* Mark hotplug range in memblock. */ - if (hotpluggable && memblock_mark_hotplug(start, ma->length)) - pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", - (unsigned long long)start, (unsigned long long)end - 1); - - max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1)); - - return 0; -out_err_bad_srat: - bad_srat(); -out_err: - return -1; -} - -void __init acpi_numa_arch_fixup(void) {} - int __init x86_acpi_numa_init(void) { int ret; diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index be14cc3e48d5..3be012115853 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -20,10 +20,121 @@ #include <linux/init.h> #include <linux/string.h> +#include <xen/xen.h> #include <xen/xen-ops.h> +#include <xen/interface/platform.h> #include <asm/page.h> #include <asm/setup.h> +#include <asm/xen/hypercall.h> + +static efi_char16_t vendor[100] __initdata; + +static efi_system_table_t efi_systab_xen __initdata = { + .hdr = { + .signature = EFI_SYSTEM_TABLE_SIGNATURE, + .revision = 0, /* Initialized later. */ + .headersize = 0, /* Ignored by Linux Kernel. */ + .crc32 = 0, /* Ignored by Linux Kernel. */ + .reserved = 0 + }, + .fw_vendor = EFI_INVALID_TABLE_ADDR, /* Initialized later. */ + .fw_revision = 0, /* Initialized later. */ + .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR, + /* Not used under Xen. */ + .boottime = (efi_boot_services_t *)EFI_INVALID_TABLE_ADDR, + /* Not used under Xen. */ + .nr_tables = 0, /* Initialized later. */ + .tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */ +}; + +static const struct efi efi_xen __initconst = { + .systab = NULL, /* Initialized later. */ + .runtime_version = 0, /* Initialized later. */ + .mps = EFI_INVALID_TABLE_ADDR, + .acpi = EFI_INVALID_TABLE_ADDR, + .acpi20 = EFI_INVALID_TABLE_ADDR, + .smbios = EFI_INVALID_TABLE_ADDR, + .smbios3 = EFI_INVALID_TABLE_ADDR, + .sal_systab = EFI_INVALID_TABLE_ADDR, + .boot_info = EFI_INVALID_TABLE_ADDR, + .hcdp = EFI_INVALID_TABLE_ADDR, + .uga = EFI_INVALID_TABLE_ADDR, + .uv_systab = EFI_INVALID_TABLE_ADDR, + .fw_vendor = EFI_INVALID_TABLE_ADDR, + .runtime = EFI_INVALID_TABLE_ADDR, + .config_table = EFI_INVALID_TABLE_ADDR, + .get_time = xen_efi_get_time, + .set_time = xen_efi_set_time, + .get_wakeup_time = xen_efi_get_wakeup_time, + .set_wakeup_time = xen_efi_set_wakeup_time, + .get_variable = xen_efi_get_variable, + .get_next_variable = xen_efi_get_next_variable, + .set_variable = xen_efi_set_variable, + .query_variable_info = xen_efi_query_variable_info, + .update_capsule = xen_efi_update_capsule, + .query_capsule_caps = xen_efi_query_capsule_caps, + .get_next_high_mono_count = xen_efi_get_next_high_mono_count, + .reset_system = NULL, /* Functionality provided by Xen. */ + .set_virtual_address_map = NULL, /* Not used under Xen. */ + .flags = 0 /* Initialized later. */ +}; + +static efi_system_table_t __init *xen_efi_probe(void) +{ + struct xen_platform_op op = { + .cmd = XENPF_firmware_info, + .u.firmware_info = { + .type = XEN_FW_EFI_INFO, + .index = XEN_FW_EFI_CONFIG_TABLE + } + }; + union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info; + + if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0) + return NULL; + + /* Here we know that Xen runs on EFI platform. */ + + efi = efi_xen; + + efi_systab_xen.tables = info->cfg.addr; + efi_systab_xen.nr_tables = info->cfg.nent; + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_VENDOR; + info->vendor.bufsz = sizeof(vendor); + set_xen_guest_handle(info->vendor.name, vendor); + + if (HYPERVISOR_platform_op(&op) == 0) { + efi_systab_xen.fw_vendor = __pa_symbol(vendor); + efi_systab_xen.fw_revision = info->vendor.revision; + } else + efi_systab_xen.fw_vendor = __pa_symbol(L"UNKNOWN"); + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_VERSION; + + if (HYPERVISOR_platform_op(&op) == 0) + efi_systab_xen.hdr.revision = info->version; + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION; + + if (HYPERVISOR_platform_op(&op) == 0) + efi.runtime_version = info->version; + + return &efi_systab_xen; +} void __init xen_efi_init(void) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0f87db2cc6a8..69b4b6d29738 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -59,6 +59,7 @@ #include <asm/xen/pci.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> +#include <asm/xen/cpuid.h> #include <asm/fixmap.h> #include <asm/processor.h> #include <asm/proto.h> @@ -118,6 +119,10 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); */ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +/* Linux <-> Xen vCPU id mapping */ +DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); + enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); @@ -179,7 +184,7 @@ static void clamp_max_cpus(void) #endif } -static void xen_vcpu_setup(int cpu) +void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -202,8 +207,9 @@ static void xen_vcpu_setup(int cpu) if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) return; } - if (cpu < MAX_VIRT_CPUS) - per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; if (!have_vcpu_info_placement) { if (cpu >= MAX_VIRT_CPUS) @@ -223,7 +229,8 @@ static void xen_vcpu_setup(int cpu) hypervisor has no unregister variant and this hypercall does not allow to over-write info.mfn and info.offset. */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); if (err) { printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); @@ -247,10 +254,11 @@ void xen_vcpu_restore(void) for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); - bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL); + bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), + NULL); if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) BUG(); xen_setup_runstate_info(cpu); @@ -259,7 +267,7 @@ void xen_vcpu_restore(void) xen_vcpu_setup(cpu); if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) BUG(); } } @@ -588,7 +596,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE); unsigned long frames[pages]; int f; @@ -637,7 +645,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE); unsigned long frames[pages]; int f; @@ -1135,8 +1143,11 @@ void xen_setup_vcpu_info_placement(void) { int cpu; - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + /* Set up direct vCPU id mapping for PV guests. */ + per_cpu(xen_vcpu_id, cpu) = cpu; xen_vcpu_setup(cpu); + } /* xen_vcpu_setup managed to place the vcpu_info within the * percpu area for all cpus, so make use of it. Note that for @@ -1727,6 +1738,9 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif xen_raw_console_write("about to get started...\n"); + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + xen_setup_runstate_info(0); xen_efi_init(); @@ -1768,9 +1782,10 @@ void __ref xen_hvm_init_shared_info(void) * in that case multiple vcpus might be online. */ for_each_online_cpu(cpu) { /* Leave it to be NULL. */ - if (cpu >= MAX_VIRT_CPUS) + if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS) continue; - per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; } } @@ -1795,6 +1810,12 @@ static void __init init_hvm_pv_info(void) xen_setup_features(); + cpuid(base + 4, &eax, &ebx, &ecx, &edx); + if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT) + this_cpu_write(xen_vcpu_id, ebx); + else + this_cpu_write(xen_vcpu_id, smp_processor_id()); + pv_info.name = "Xen HVM"; xen_domain_type = XEN_HVM_DOMAIN; @@ -1806,6 +1827,10 @@ static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action, int cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: + if (cpu_acpi_id(cpu) != U32_MAX) + per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); + else + per_cpu(xen_vcpu_id, cpu) = cpu; xen_vcpu_setup(cpu); if (xen_have_vector_callback) { if (xen_feature(XENFEAT_hvm_safe_pvclock)) diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index e079500b17f3..de4144c24f1c 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -111,63 +111,18 @@ int arch_gnttab_init(unsigned long nr_shared) } #ifdef CONFIG_XEN_PVH -#include <xen/balloon.h> #include <xen/events.h> -#include <linux/slab.h> -static int __init xlated_setup_gnttab_pages(void) -{ - struct page **pages; - xen_pfn_t *pfns; - void *vaddr; - int rc; - unsigned int i; - unsigned long nr_grant_frames = gnttab_max_grant_frames(); - - BUG_ON(nr_grant_frames == 0); - pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); - if (!pfns) { - kfree(pages); - return -ENOMEM; - } - rc = alloc_xenballooned_pages(nr_grant_frames, pages); - if (rc) { - pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, - nr_grant_frames, rc); - kfree(pages); - kfree(pfns); - return rc; - } - for (i = 0; i < nr_grant_frames; i++) - pfns[i] = page_to_pfn(pages[i]); - - vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL); - if (!vaddr) { - pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, - nr_grant_frames, rc); - free_xenballooned_pages(nr_grant_frames, pages); - kfree(pages); - kfree(pfns); - return -ENOMEM; - } - kfree(pages); - - xen_auto_xlat_grant_frames.pfn = pfns; - xen_auto_xlat_grant_frames.count = nr_grant_frames; - xen_auto_xlat_grant_frames.vaddr = vaddr; - - return 0; -} - +#include <xen/xen-ops.h> static int __init xen_pvh_gnttab_setup(void) { if (!xen_pvh_domain()) return -ENODEV; - return xlated_setup_gnttab_pages(); + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); + + return xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, + &xen_auto_xlat_grant_frames.vaddr, + xen_auto_xlat_grant_frames.count); } /* Call it _before_ __gnttab_init as we need to initialize the * xen_auto_xlat_grant_frames first. */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index a1207cb6472a..33e92955e09d 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -109,7 +109,8 @@ static void xen_safe_halt(void) static void xen_halt(void) { if (irqs_disabled()) - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, + xen_vcpu_nr(smp_processor_id()), NULL); else xen_safe_halt(); } diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 9466354d3e49..32bdc2c90297 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -547,7 +547,7 @@ void xen_pmu_init(int cpu) return; fail: - pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n", + pr_info_once("Could not initialize VPMU for cpu %d, error %d\n", cpu, err); free_pages((unsigned long)xenpmu_data, 0); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 719cf291dcdf..0b4d04c8ab4d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -322,6 +322,13 @@ static void __init xen_smp_prepare_boot_cpu(void) xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } + + /* + * Setup vcpu_info for boot CPU. + */ + if (xen_hvm_domain()) + xen_vcpu_setup(0); + /* * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up @@ -454,7 +461,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) #endif ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); - if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) BUG(); kfree(ctxt); @@ -492,7 +499,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) if (rc) return rc; - rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); BUG_ON(rc); while (cpu_report_state(cpu) != CPU_ONLINE) @@ -520,7 +527,8 @@ static int xen_cpu_disable(void) static void xen_cpu_die(unsigned int cpu) { - while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { + while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, + xen_vcpu_nr(cpu), NULL)) { __set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ/10); } @@ -536,7 +544,7 @@ static void xen_cpu_die(unsigned int cpu) static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ { play_dead_common(); - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL); cpu_bringup(); /* * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) @@ -576,7 +584,7 @@ static void stop_self(void *v) set_cpu_online(cpu, false); - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL); BUG(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 6deba5bc7e34..67356d29d74d 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -11,8 +11,6 @@ #include <linux/interrupt.h> #include <linux/clocksource.h> #include <linux/clockchips.h> -#include <linux/kernel_stat.h> -#include <linux/math64.h> #include <linux/gfp.h> #include <linux/slab.h> #include <linux/pvclock_gtod.h> @@ -31,44 +29,6 @@ /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 -#define NS_PER_TICK (1000000000LL / HZ) - -/* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); - -/* unused ns of stolen time */ -static DEFINE_PER_CPU(u64, xen_residual_stolen); - -static void do_stolen_accounting(void) -{ - struct vcpu_runstate_info state; - struct vcpu_runstate_info *snap; - s64 runnable, offline, stolen; - cputime_t ticks; - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - snap = this_cpu_ptr(&xen_runstate_snapshot); - - /* work out how much time the VCPU has not been runn*ing* */ - runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; - offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; - - *snap = state; - - /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. */ - stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); - - if (stolen < 0) - stolen = 0; - - ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __this_cpu_write(xen_residual_stolen, stolen); - account_steal_ticks(ticks); -} /* Get the TSC speed from Xen */ static unsigned long xen_tsc_khz(void) @@ -263,8 +223,10 @@ static int xen_vcpuop_shutdown(struct clock_event_device *evt) { int cpu = smp_processor_id(); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || - HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu), + NULL) || + HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL)) BUG(); return 0; @@ -274,7 +236,8 @@ static int xen_vcpuop_set_oneshot(struct clock_event_device *evt) { int cpu = smp_processor_id(); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL)) BUG(); return 0; @@ -293,7 +256,8 @@ static int xen_vcpuop_set_next_event(unsigned long delta, /* Get an event anyway, even if the timeout is already expired */ single.flags = 0; - ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); + ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu), + &single); BUG_ON(ret != 0); return ret; @@ -335,8 +299,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; } - do_stolen_accounting(); - return ret; } @@ -394,13 +356,15 @@ void xen_timer_resume(void) return; for_each_online_cpu(cpu) { - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, + xen_vcpu_nr(cpu), NULL)) BUG(); } } static const struct pv_time_ops xen_time_ops __initconst = { .sched_clock = xen_clocksource_read, + .steal_clock = xen_steal_clock, }; static void __init xen_time_init(void) @@ -414,7 +378,8 @@ static void __init xen_time_init(void) clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL) == 0) { /* Successfully turned off 100Hz tick, so we have the vcpuop-based timer interface */ printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); @@ -431,6 +396,8 @@ static void __init xen_time_init(void) xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 4140b070f2e9..3cbce3b085e7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -76,6 +76,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); +void xen_vcpu_setup(int cpu); void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c index ef90479e0397..0fecc8a2c0b5 100644 --- a/arch/xtensa/kernel/perf_event.c +++ b/arch/xtensa/kernel/perf_event.c @@ -404,7 +404,7 @@ static struct pmu xtensa_pmu = { .read = xtensa_pmu_read, }; -static void xtensa_pmu_setup(void) +static int xtensa_pmu_setup(int cpu) { unsigned i; @@ -413,21 +413,7 @@ static void xtensa_pmu_setup(void) set_er(0, XTENSA_PMU_PMCTRL(i)); set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i)); } -} - -static int xtensa_pmu_notifier(struct notifier_block *self, - unsigned long action, void *data) -{ - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: - xtensa_pmu_setup(); - break; - - default: - break; - } - - return NOTIFY_OK; + return 0; } static int __init xtensa_pmu_init(void) @@ -435,7 +421,13 @@ static int __init xtensa_pmu_init(void) int ret; int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT); - perf_cpu_notifier(xtensa_pmu_notifier); + ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING, + "AP_PERF_XTENSA_STARTING", xtensa_pmu_setup, + NULL); + if (ret) { + pr_err("xtensa_pmu: failed to register CPU-hotplug.\n"); + return ret; + } #if XTENSA_FAKE_NMI enable_irq(irq); #else diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 7f4a1fdb1502..2725e08ef353 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -110,7 +110,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) return; |