diff options
Diffstat (limited to 'arch/powerpc')
281 files changed, 3635 insertions, 3557 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 88eace4e28c3..a2a168e2dfe7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -88,6 +88,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF + select ARCH_BINFMT_ELF_RANDOMIZE_PIE select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM @@ -128,6 +129,7 @@ config PPC select HAVE_BPF_JIT if PPC64 select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAS_GCOV_PROFILE_ALL select GENERIC_SMP_IDLE_THREAD select GENERIC_CMOS_UPDATE select GENERIC_TIME_VSYSCALL_OLD @@ -148,6 +150,8 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select NO_BOOTMEM + select HAVE_GENERIC_RCU_GUP config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -549,7 +553,7 @@ config PPC_4K_PAGES bool "4k page size" config PPC_16K_PAGES - bool "16k page size" if 44x + bool "16k page size" if 44x || PPC_8xx config PPC_64K_PAGES bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64 diff --git a/arch/powerpc/boot/dts/b4860emu.dts b/arch/powerpc/boot/dts/b4860emu.dts index 85646b4f96e1..2aa5cd318ce8 100644 --- a/arch/powerpc/boot/dts/b4860emu.dts +++ b/arch/powerpc/boot/dts/b4860emu.dts @@ -193,9 +193,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "fsl/qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0"; - reg = <0xe1000 0x1000>; }; /include/ "fsl/qoriq-dma-0.dtsi" diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi index 8b47edcfabf0..e5bde0b85135 100644 --- a/arch/powerpc/boot/dts/b4qds.dtsi +++ b/arch/powerpc/boot/dts/b4qds.dtsi @@ -152,6 +152,29 @@ reg = <0x68>; }; }; + + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x2>; + + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + }; + + i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x3>; + + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; }; }; diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi b/arch/powerpc/boot/dts/bsc9131rdb.dtsi index 9e6c01339ccc..45efcbadb23c 100644 --- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi +++ b/arch/powerpc/boot/dts/bsc9131rdb.dtsi @@ -40,31 +40,6 @@ compatible = "fsl,ifc-nand"; reg = <0x0 0x0 0x4000>; - partition@0 { - /* This location must not be altered */ - /* 3MB for u-boot Bootloader Image */ - reg = <0x0 0x00300000>; - label = "NAND U-Boot Image"; - read-only; - }; - - partition@300000 { - /* 1MB for DTB Image */ - reg = <0x00300000 0x00100000>; - label = "NAND DTB Image"; - }; - - partition@400000 { - /* 8MB for Linux Kernel Image */ - reg = <0x00400000 0x00800000>; - label = "NAND Linux Kernel Image"; - }; - - partition@c00000 { - /* Rest space for Root file System Image */ - reg = <0x00c00000 0x07400000>; - label = "NAND RFS Image"; - }; }; }; @@ -82,31 +57,6 @@ reg = <0>; spi-max-frequency = <50000000>; - /* 512KB for u-boot Bootloader Image */ - partition@0 { - reg = <0x0 0x00080000>; - label = "SPI Flash U-Boot Image"; - read-only; - }; - - /* 512KB for DTB Image */ - partition@80000 { - reg = <0x00080000 0x00080000>; - label = "SPI Flash DTB Image"; - }; - - /* 4MB for Linux Kernel Image */ - partition@100000 { - reg = <0x00100000 0x00400000>; - label = "SPI Flash Kernel Image"; - }; - - /*11MB for RFS Image */ - partition@500000 { - reg = <0x00500000 0x00B00000>; - label = "SPI Flash RFS Image"; - }; - }; }; diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi index d67894459ac8..86161ae6c966 100644 --- a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi @@ -80,33 +80,9 @@ compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0"; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4420-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi index 582381dba1d7..65100b9636b7 100644 --- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi @@ -124,33 +124,9 @@ compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0"; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 69ce1026c948..efd74db4f9b0 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -305,53 +305,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p2041-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi index cd63cb1b1042..d7425ef1ae41 100644 --- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi @@ -332,53 +332,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p3041-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi index 12947ccddf25..7005a4a4cef0 100644 --- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi @@ -352,35 +352,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p4080-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; pll2: pll2@840 { #clock-cells = <1>; @@ -398,24 +372,6 @@ clock-output-names = "pll3", "pll3-div2"; }; - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; - mux2: mux2@40 { #clock-cells = <0>; reg = <0x40 0x4>; diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi index 4c4a2b0436b2..55834211bd28 100644 --- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi @@ -337,53 +337,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; }; rcpm: global-utilities@e2000 { diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi index 67296fdd9698..6e4cd6ce363c 100644 --- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi @@ -297,53 +297,9 @@ #sleep-cells = <2>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen1.dtsi" + global-utilities@e1000 { compatible = "fsl,p5040-clockgen", "fsl,qoriq-clockgen-1.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - clock-frequency = <0>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-1.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-1.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2"; - }; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux0"; - }; - - mux1: mux1@20 { - #clock-cells = <0>; - reg = <0x20 0x4>; - compatible = "fsl,qoriq-core-mux-1.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; - clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; - clock-output-names = "cmux1"; - }; mux2: mux2@40 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi new file mode 100644 index 000000000000..4ece1edbff63 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi @@ -0,0 +1,85 @@ +/* + * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ] + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +global-utilities@e1000 { + compatible = "fsl,qoriq-clockgen-1.0"; + ranges = <0x0 0xe1000 0x1000>; + reg = <0xe1000 0x1000>; + clock-frequency = <0>; + #address-cells = <1>; + #size-cells = <1>; + + sysclk: sysclk { + #clock-cells = <0>; + compatible = "fsl,qoriq-sysclk-1.0", "fixed-clock"; + clock-output-names = "sysclk"; + }; + pll0: pll0@800 { + #clock-cells = <1>; + reg = <0x800 0x4>; + compatible = "fsl,qoriq-core-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "pll0", "pll0-div2"; + }; + pll1: pll1@820 { + #clock-cells = <1>; + reg = <0x820 0x4>; + compatible = "fsl,qoriq-core-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "pll1", "pll1-div2"; + }; + mux0: mux0@0 { + #clock-cells = <0>; + reg = <0x0 0x4>; + compatible = "fsl,qoriq-core-mux-1.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; + clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; + clock-output-names = "cmux0"; + }; + mux1: mux1@20 { + #clock-cells = <0>; + reg = <0x20 0x4>; + compatible = "fsl,qoriq-core-mux-1.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>; + clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2"; + clock-output-names = "cmux1"; + }; + platform_pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-1.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi new file mode 100644 index 000000000000..48e0b6e4ce33 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi @@ -0,0 +1,68 @@ +/* + * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ] + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +global-utilities@e1000 { + compatible = "fsl,qoriq-clockgen-2.0"; + ranges = <0x0 0xe1000 0x1000>; + reg = <0xe1000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + + sysclk: sysclk { + #clock-cells = <0>; + compatible = "fsl,qoriq-sysclk-2.0", "fixed-clock"; + clock-output-names = "sysclk"; + }; + pll0: pll0@800 { + #clock-cells = <1>; + reg = <0x800 0x4>; + compatible = "fsl,qoriq-core-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "pll0", "pll0-div2", "pll0-div4"; + }; + pll1: pll1@820 { + #clock-cells = <1>; + reg = <0x820 0x4>; + compatible = "fsl,qoriq-core-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "pll1", "pll1-div2", "pll1-div4"; + }; + platform_pll: platform-pll@c00 { + #clock-cells = <1>; + reg = <0xc00 0x4>; + compatible = "fsl,qoriq-platform-pll-2.0"; + clocks = <&sysclk>; + clock-output-names = "platform-pll", "platform-pll-div2"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index 12e597eea3c8..15ae462e758f 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -281,35 +281,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t1040-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk", "fixed-clock"; - }; - - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index aecee9690a88..1ce91e3485a9 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -305,34 +305,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t2080-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk", "fixed-clock"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; mux0: mux0@0 { #clock-cells = <0>; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index 7e2fc7cdce48..0e96fcabe812 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -368,34 +368,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0"; - ranges = <0x0 0xe1000 0x1000>; - reg = <0xe1000 0x1000>; - #address-cells = <1>; - #size-cells = <1>; - - sysclk: sysclk { - #clock-cells = <0>; - compatible = "fsl,qoriq-sysclk-2.0"; - clock-output-names = "sysclk"; - }; - - pll0: pll0@800 { - #clock-cells = <1>; - reg = <0x800 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll0", "pll0-div2", "pll0-div4"; - }; - - pll1: pll1@820 { - #clock-cells = <1>; - reg = <0x820 0x4>; - compatible = "fsl,qoriq-core-pll-2.0"; - clocks = <&sysclk>; - clock-output-names = "pll1", "pll1-div2", "pll1-div4"; - }; pll2: pll2@840 { #clock-cells = <1>; diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts index 2fed3bc0b990..394ea9c943c9 100644 --- a/arch/powerpc/boot/dts/p3041ds.dts +++ b/arch/powerpc/boot/dts/p3041ds.dts @@ -98,6 +98,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts index 2869fea717dd..b7f3057cd894 100644 --- a/arch/powerpc/boot/dts/p5020ds.dts +++ b/arch/powerpc/boot/dts/p5020ds.dts @@ -98,6 +98,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts index 860b5ccf76c0..7e04bf487c04 100644 --- a/arch/powerpc/boot/dts/p5040ds.dts +++ b/arch/powerpc/boot/dts/p5040ds.dts @@ -95,6 +95,26 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + ina220@44 { + compatible = "ti,ina220"; + reg = <0x44>; + shunt-resistor = <1000>; + }; + ina220@45 { + compatible = "ti,ina220"; + reg = <0x45>; + shunt-resistor = <1000>; + }; adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi index 1cf0f3c5f7e5..187add885cae 100644 --- a/arch/powerpc/boot/dts/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/t104xrdb.dtsi @@ -83,6 +83,13 @@ }; }; + i2c@118000 { + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; + i2c@118100 { pca9546@77 { compatible = "nxp,pca9546"; diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi index 555dc6e03d89..59061834d54e 100644 --- a/arch/powerpc/boot/dts/t208xqds.dtsi +++ b/arch/powerpc/boot/dts/t208xqds.dtsi @@ -169,6 +169,17 @@ shunt-resistor = <1000>; }; }; + + i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x3>; + + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + }; }; }; diff --git a/arch/powerpc/boot/dts/t4240emu.dts b/arch/powerpc/boot/dts/t4240emu.dts index bc12127a03fb..decaf357db9c 100644 --- a/arch/powerpc/boot/dts/t4240emu.dts +++ b/arch/powerpc/boot/dts/t4240emu.dts @@ -250,9 +250,9 @@ fsl,liodn-bits = <12>; }; - clockgen: global-utilities@e1000 { +/include/ "fsl/qoriq-clockgen2.dtsi" + global-utilities@e1000 { compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0"; - reg = <0xe1000 0x1000>; }; /include/ "fsl/qoriq-dma-0.dtsi" diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index d367a0aece2a..d80161b633f4 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -144,13 +144,24 @@ static char cmdline[BOOT_COMMAND_LINE_SIZE] static void prep_cmdline(void *chosen) { + unsigned int getline_timeout = 5000; + int v; + int n; + + /* Wait-for-input time */ + n = getprop(chosen, "linux,cmdline-timeout", &v, sizeof(v)); + if (n == sizeof(v)) + getline_timeout = v; + if (cmdline[0] == '\0') getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1); printf("\n\rLinux/PowerPC load: %s", cmdline); + /* If possible, edit the command line */ - if (console_ops.edit_cmdline) - console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE); + if (console_ops.edit_cmdline && getline_timeout) + console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE, getline_timeout); + printf("\n\r"); /* Put the command line back into the devtree for the kernel */ diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index 8aad3c55aeda..5e75e1c5518e 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -58,7 +58,7 @@ extern struct dt_ops dt_ops; struct console_ops { int (*open)(void); void (*write)(const char *buf, int len); - void (*edit_cmdline)(char *buf, int len); + void (*edit_cmdline)(char *buf, int len, unsigned int getline_timeout); void (*close)(void); void *data; }; diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index f2156f07571f..167ee9433de6 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -33,7 +33,7 @@ static void serial_write(const char *buf, int len) scdp->putc(*buf++); } -static void serial_edit_cmdline(char *buf, int len) +static void serial_edit_cmdline(char *buf, int len, unsigned int timeout) { int timer = 0, count; char ch, *cp; @@ -44,7 +44,7 @@ static void serial_edit_cmdline(char *buf, int len) cp = &buf[count]; count++; - while (timer++ < 5*1000) { + do { if (scdp->tstc()) { while (((ch = scdp->getc()) != '\n') && (ch != '\r')) { /* Test for backspace/delete */ @@ -70,7 +70,7 @@ static void serial_edit_cmdline(char *buf, int len) break; /* Exit 'timer' loop */ } udelay(1000); /* 1 msec */ - } + } while (timer++ < timeout); *cp = 0; } diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index dc939de9b5b0..b4c4b469e320 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -100,7 +100,6 @@ CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y CONFIG_TUN=m # CONFIG_NET_VENDOR_3COM is not set CONFIG_FS_ENET=y diff --git a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig index e5a648115ada..7cb9719abf3d 100644 --- a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig +++ b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig @@ -113,7 +113,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set diff --git a/arch/powerpc/configs/86xx/gef_sbc310_defconfig b/arch/powerpc/configs/86xx/gef_sbc310_defconfig index 8317b6010ba6..ecabf625d249 100644 --- a/arch/powerpc/configs/86xx/gef_sbc310_defconfig +++ b/arch/powerpc/configs/86xx/gef_sbc310_defconfig @@ -114,7 +114,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set diff --git a/arch/powerpc/configs/86xx/gef_sbc610_defconfig b/arch/powerpc/configs/86xx/gef_sbc610_defconfig index 124d66f0282c..4a4a86fb0d3d 100644 --- a/arch/powerpc/configs/86xx/gef_sbc610_defconfig +++ b/arch/powerpc/configs/86xx/gef_sbc610_defconfig @@ -165,7 +165,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set diff --git a/arch/powerpc/configs/86xx/sbc8641d_defconfig b/arch/powerpc/configs/86xx/sbc8641d_defconfig index 1e151594c691..99ea8746bbaf 100644 --- a/arch/powerpc/configs/86xx/sbc8641d_defconfig +++ b/arch/powerpc/configs/86xx/sbc8641d_defconfig @@ -167,7 +167,6 @@ CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 59734916986a..8a08d6dcb0b4 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -211,7 +211,6 @@ CONFIG_MV643XX_ETH=y # CONFIG_NETDEV_10000 is not set # CONFIG_ATM_DRIVERS is not set CONFIG_NETCONSOLE=m -CONFIG_NETPOLL_TRAP=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 688e9e4d29a1..611efe99faeb 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -144,6 +144,7 @@ CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y CONFIG_UIO=y CONFIG_STAGING=y +CONFIG_MEMORY=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y CONFIG_EXT2_FS=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 6db97e4414b2..be24a18c0d96 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -118,6 +118,7 @@ CONFIG_FSL_DMA=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y CONFIG_FSL_CORENET_CF=y +CONFIG_MEMORY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index d2c415489f72..02395fab19bd 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -215,6 +215,7 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y # CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 87460083dbc7..b5d1b82a1b43 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -216,6 +216,7 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y +CONFIG_MEMORY=y # CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 20bc5e2d368d..5830d735c5c3 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -154,7 +154,6 @@ CONFIG_WINDFARM_PM121=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y CONFIG_TUN=m CONFIG_VIRTIO_NET=m CONFIG_VHOST_NET=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index c3a3269b0865..67885b2d70aa 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -103,7 +103,6 @@ CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y CONFIG_TUN=m CONFIG_VORTEX=y CONFIG_ACENIC=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index fec5870f1818..ad6d6b5af7d7 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -629,7 +629,6 @@ CONFIG_SLIP_SMART=y CONFIG_NET_FC=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y -CONFIG_NETPOLL_TRAP=y CONFIG_VIRTIO_NET=m # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_JOYDEV=m diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2e637c881d2b..879de5efb073 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -36,7 +36,7 @@ CONFIG_KEXEC=y CONFIG_SCHED_SMT=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" -CONFIG_PM_RUNTIME=y +CONFIG_PM=y CONFIG_PM_DEBUG=y # CONFIG_SECCOMP is not set # CONFIG_PCI is not set diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index dd2a9cab4b50..1f97364017c7 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -133,7 +133,6 @@ CONFIG_DM_UEVENT=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y CONFIG_TUN=m CONFIG_VIRTIO_NET=m CONFIG_VHOST_NET=m diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index d2008887eb8c..ac7ca5852827 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -134,7 +134,6 @@ CONFIG_DM_UEVENT=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_NETCONSOLE=y -CONFIG_NETPOLL_TRAP=y CONFIG_TUN=m CONFIG_VIRTIO_NET=m CONFIG_VHOST_NET=m diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index f9e8b9491efc..d3feba5a275f 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -66,7 +66,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, src = data + done; } while (done + 63 < len); - memset(temp, 0, sizeof(temp)); + memzero_explicit(temp, sizeof(temp)); partial = 0; } memcpy(sctx->buffer + partial, src, len - done); @@ -154,4 +154,4 @@ module_exit(sha1_powerpc_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); -MODULE_ALIAS("sha1-powerpc"); +MODULE_ALIAS_CRYPTO("sha1-powerpc"); diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 31e8f59aff38..382b28e364dc 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += clkdev.h -generic-y += hash.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index bab79a110c7b..a3bf5be111ff 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -33,12 +33,9 @@ #define mb() __asm__ __volatile__ ("sync" : : : "memory") #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define read_barrier_depends() do { } while(0) #define set_mb(var, value) do { var = value; mb(); } while (0) -#ifdef CONFIG_SMP - #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC #else @@ -46,20 +43,26 @@ #endif #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") +#define dma_rmb() __lwsync() +#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") + +#ifdef CONFIG_SMP +#define smp_lwsync() __lwsync() #define smp_mb() mb() #define smp_rmb() __lwsync() #define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") -#define smp_read_barrier_depends() read_barrier_depends() #else -#define __lwsync() barrier() +#define smp_lwsync() barrier() #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while(0) #endif /* CONFIG_SMP */ +#define read_barrier_depends() do { } while (0) +#define smp_read_barrier_depends() do { } while (0) + /* * This is a barrier which prevents following instructions from being * started until the value of the argument x is known. For example, if @@ -72,7 +75,7 @@ #define smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ - __lwsync(); \ + smp_lwsync(); \ ACCESS_ONCE(*p) = (v); \ } while (0) @@ -80,7 +83,7 @@ do { \ ({ \ typeof(*p) ___p1 = ACCESS_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ - __lwsync(); \ + smp_lwsync(); \ ___p1; \ }) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index bd3bd573d0ae..59abc620f8e8 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -14,9 +14,9 @@ * * The bitop functions are defined to work on unsigned longs, so for a * ppc64 system the bits end up numbered: - * |63..............0|127............64|191...........128|255...........196| + * |63..............0|127............64|191...........128|255...........192| * and on ppc32: - * |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224| + * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224| * * There are a few little-endian macros used mostly for filesystem * bitmaps, these work on similar bit arrays layouts, but @@ -213,7 +213,7 @@ static __inline__ unsigned long ffz(unsigned long x) return __ilog2(x & -x); } -static __inline__ int __ffs(unsigned long x) +static __inline__ unsigned long __ffs(unsigned long x) { return __ilog2(x & -x); } diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h new file mode 100644 index 000000000000..d2f99ca1e3a6 --- /dev/null +++ b/arch/powerpc/include/asm/cpuidle.h @@ -0,0 +1,20 @@ +#ifndef _ASM_POWERPC_CPUIDLE_H +#define _ASM_POWERPC_CPUIDLE_H + +#ifdef CONFIG_PPC_POWERNV +/* Used in powernv idle state management */ +#define PNV_THREAD_RUNNING 0 +#define PNV_THREAD_NAP 1 +#define PNV_THREAD_SLEEP 2 +#define PNV_THREAD_WINKLE 3 +#define PNV_CORE_IDLE_LOCK_BIT 0x100 +#define PNV_CORE_IDLE_THREAD_BITS 0x0FF + +#ifndef __ASSEMBLY__ +extern u32 pnv_fastsleep_workaround_at_entry[]; +extern u32 pnv_fastsleep_workaround_at_exit[]; +#endif + +#endif + +#endif diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index daa5af91163c..22d5a7da9e68 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -448,13 +448,9 @@ extern const char *powerpc_base_platform; CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) #define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) -#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \ - CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | \ - CPU_FTR_ICSWX | CPU_FTR_DABRX ) - #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2) +#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ @@ -505,13 +501,13 @@ enum { #ifdef __powerpc64__ #ifdef CONFIG_PPC_BOOK3E -#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2) +#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500) #else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ - CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE) + CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE) #endif #else enum { diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index ca07f9c27335..0652ebe117af 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -39,6 +39,7 @@ struct device_node; #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ #define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */ +#define EEH_EARLY_DUMP_LOG 0x20 /* Dump log immediately */ /* * Delay for PE reset, all in ms @@ -72,6 +73,7 @@ struct device_node; #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ #define EEH_PE_CFG_BLOCKED (1 << 2) /* Block config access */ +#define EEH_PE_RESET (1 << 3) /* PE reset in progress */ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 888d8f3f2524..57d289acb803 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -28,8 +28,7 @@ the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -extern unsigned long randomize_et_dyn(unsigned long base); -#define ELF_ET_DYN_BASE (randomize_et_dyn(0x20000000)) +#define ELF_ET_DYN_BASE 0x20000000 #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index a6774560afe3..493e72f64b35 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -70,39 +70,39 @@ #define CPU_UNKNOWN (~((u32)0)) /* Utility macros */ -#define SKIP_TO_NEXT_CPU(reg_entry) \ -({ \ - while (reg_entry->reg_id != REG_ID("CPUEND")) \ - reg_entry++; \ - reg_entry++; \ +#define SKIP_TO_NEXT_CPU(reg_entry) \ +({ \ + while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) \ + reg_entry++; \ + reg_entry++; \ }) /* Kernel Dump section info */ struct fadump_section { - u32 request_flag; - u16 source_data_type; - u16 error_flags; - u64 source_address; - u64 source_len; - u64 bytes_dumped; - u64 destination_address; + __be32 request_flag; + __be16 source_data_type; + __be16 error_flags; + __be64 source_address; + __be64 source_len; + __be64 bytes_dumped; + __be64 destination_address; }; /* ibm,configure-kernel-dump header. */ struct fadump_section_header { - u32 dump_format_version; - u16 dump_num_sections; - u16 dump_status_flag; - u32 offset_first_dump_section; + __be32 dump_format_version; + __be16 dump_num_sections; + __be16 dump_status_flag; + __be32 offset_first_dump_section; /* Fields for disk dump option. */ - u32 dd_block_size; - u64 dd_block_offset; - u64 dd_num_blocks; - u32 dd_offset_disk_path; + __be32 dd_block_size; + __be64 dd_block_offset; + __be64 dd_num_blocks; + __be32 dd_offset_disk_path; /* Maximum time allowed to prevent an automatic dump-reboot. */ - u32 max_time_auto; + __be32 max_time_auto; }; /* @@ -174,15 +174,15 @@ static inline u64 str_to_u64(const char *str) /* Register save area header. */ struct fadump_reg_save_area_header { - u64 magic_number; - u32 version; - u32 num_cpu_offset; + __be64 magic_number; + __be32 version; + __be32 num_cpu_offset; }; /* Register entry. */ struct fadump_reg_entry { - u64 reg_id; - u64 reg_value; + __be64 reg_id; + __be64 reg_value; }; /* fadump crash info structure */ diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h index 77ced0b3d81d..43b6bb1a4a9c 100644 --- a/arch/powerpc/include/asm/fsl_guts.h +++ b/arch/powerpc/include/asm/fsl_guts.h @@ -68,7 +68,10 @@ struct ccsr_guts { u8 res0b4[0xc0 - 0xb4]; __be32 iovselsr; /* 0x.00c0 - I/O voltage select status register Called 'elbcvselcr' on 86xx SOCs */ - u8 res0c4[0x224 - 0xc4]; + u8 res0c4[0x100 - 0xc4]; + __be32 rcwsr[16]; /* 0x.0100 - Reset Control Word Status registers + There are 16 registers */ + u8 res140[0x224 - 0x140]; __be32 iodelay1; /* 0x.0224 - IO delay control register 1 */ __be32 iodelay2; /* 0x.0228 - IO delay control register 2 */ u8 res22c[0x604 - 0x22c]; diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 1bbb3013d6aa..8add8b861e8d 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -21,7 +21,12 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT -#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending +#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x)) static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 766b77d527ac..1d53a65b4ec1 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd) #endif /* CONFIG_PPC_BOOK3S_64 */ -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { /* @@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, */ unsigned long idx = 0; - pte_t *dir = hugepd_page(*hpdp); + pte_t *dir = hugepd_page(hpd); #ifndef CONFIG_PPC_FSL_BOOK3E - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); #endif return dir + idx; @@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, } #define hugepd_shift(x) 0 -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { return 0; diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 97d3869991ca..a8d2ef30d473 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -617,10 +617,14 @@ static inline void name at \ /* * We don't do relaxed operations yet, at least not with this semantic */ -#define readb_relaxed(addr) readb(addr) -#define readw_relaxed(addr) readw(addr) -#define readl_relaxed(addr) readl(addr) -#define readq_relaxed(addr) readq(addr) +#define readb_relaxed(addr) readb(addr) +#define readw_relaxed(addr) readw(addr) +#define readl_relaxed(addr) readl(addr) +#define readq_relaxed(addr) readq(addr) +#define writeb_relaxed(v, addr) writeb(v, addr) +#define writew_relaxed(v, addr) writew(v, addr) +#define writel_relaxed(v, addr) writel(v, addr) +#define writeq_relaxed(v, addr) writeq(v, addr) #ifdef CONFIG_PPC32 #define mmiowb() @@ -851,9 +855,6 @@ static inline void * bus_to_virt(unsigned long address) #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set) -void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, - size_t size, unsigned long flags); - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_IO_H */ diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 42632c7a2a4e..9cfa3706a1b8 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -137,13 +137,16 @@ static inline void set_iommu_table_base_and_group(struct device *dev, iommu_add_device(dev); } -extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, - struct scatterlist *sglist, int nelems, - unsigned long mask, enum dma_data_direction direction, - struct dma_attrs *attrs); -extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs); +extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, + struct scatterlist *sglist, int nelems, + unsigned long mask, + enum dma_data_direction direction, + struct dma_attrs *attrs); +extern void ppc_iommu_unmap_sg(struct iommu_table *tbl, + struct scatterlist *sglist, + int nelems, + enum dma_data_direction direction, + struct dma_attrs *attrs); extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, size_t size, dma_addr_t *dma_handle, diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 19c36cba37c4..a46f5f45570c 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -86,6 +86,11 @@ extern int overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); extern void machine_kexec_mask_interrupts(void); +static inline bool kdump_in_progress(void) +{ + return crashing_cpu >= 0; +} + #else /* !CONFIG_KEXEC */ static inline void crash_kexec_secondary(struct pt_regs *regs) { } @@ -106,6 +111,11 @@ static inline int crash_shutdown_unregister(crash_shutdown_t handler) return 0; } +static inline bool kdump_in_progress(void) +{ + return false; +} + #endif /* CONFIG_KEXEC */ #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6acf0c2a0f99..942c7b1678e3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, unsigned long gpa, bool dirty); -extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, pgd_t *pgdir, bool realmode, unsigned long *idx_ret); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 0aa817933e6a..2d81e202bdcc 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ -extern unsigned long kvm_rma_pages; #endif #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ @@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ - rb |= v >> (62 - 8); /* B field */ + rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 047855619cc4..7efd666a3fa7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table { struct page *pages[0]; }; -struct kvm_rma_info { - atomic_t use_count; - unsigned long base_pfn; -}; - /* XICS components, defined in book3s_xics.c */ struct kvmppc_xics; struct kvmppc_icp; @@ -214,16 +209,9 @@ struct revmap_entry { #define KVMPPC_RMAP_PRESENT 0x100000000ul #define KVMPPC_RMAP_INDEX 0xfffffffful -/* Low-order bits in memslot->arch.slot_phys[] */ -#define KVMPPC_PAGE_ORDER_MASK 0x1f -#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */ -#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ -#define KVMPPC_GOT_PAGE 0x80 - struct kvm_arch_memory_slot { #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE unsigned long *rmap; - unsigned long *slot_phys; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ }; @@ -242,14 +230,12 @@ struct kvm_arch { struct kvm_rma_info *rma; unsigned long vrma_slb_v; int rma_setup_done; - int using_mmu_notifiers; u32 hpt_order; atomic_t vcpus_running; u32 online_vcores; unsigned long hpt_npte; unsigned long hpt_mask; atomic_t hpte_mod_interest; - spinlock_t slot_phys_lock; cpumask_t need_tlb_flush; int hpt_cma_alloc; #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ @@ -297,6 +283,7 @@ struct kvmppc_vcore { struct list_head runnable_threads; spinlock_t lock; wait_queue_head_t wq; + spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; struct kvm_vcpu *runner; @@ -308,6 +295,7 @@ struct kvmppc_vcore { ulong dpdes; /* doorbell state (POWER8) */ void *mpp_buffer; /* Micro Partition Prefetch buffer */ bool mpp_buffer_is_valid; + ulong conferring_threads; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -664,6 +652,8 @@ struct kvm_vcpu_arch { spinlock_t tbacct_lock; u64 busy_stolen; u64 busy_preempt; + + u32 emul_inst; #endif }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a6dcdb6d13c1..46bf652c9169 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce); extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba); -extern struct kvm_rma_info *kvm_alloc_rma(void); -extern void kvm_release_rma(struct kvm_rma_info *ri); extern struct page *kvm_alloc_hpt(unsigned long nr_pages); extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 307347f8ddbd..c8175a3fe560 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -42,7 +42,7 @@ struct machdep_calls { unsigned long newpp, unsigned long vpn, int bpsize, int apsize, - int ssize, int local); + int ssize, unsigned long flags); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, int psize, int ssize); @@ -60,7 +60,7 @@ struct machdep_calls { void (*hugepage_invalidate)(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize); + int psize, int ssize, int local); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ void (*hpte_clear_all)(void); @@ -142,7 +142,6 @@ struct machdep_calls { #endif void (*restart)(char *cmd); - void (*power_off)(void); void (*halt)(void); void (*panic)(char *str); void (*cpu_die)(void); @@ -292,10 +291,6 @@ struct machdep_calls { #ifdef CONFIG_ARCH_RANDOM int (*get_random_long)(unsigned long *v); #endif - -#ifdef CONFIG_MEMORY_HOTREMOVE - int (*remove_memory)(u64, u64); -#endif }; extern void e500_idle(void); @@ -343,16 +338,6 @@ extern sys_ctrler_t sys_ctrler; #endif /* CONFIG_PPC_PMAC */ - -/* Functions to produce codes on the leds. - * The SRC code should be unique for the message category and should - * be limited to the lower 24 bits (the upper 8 are set by these funcs), - * and (for boot & dump) should be sorted numerically in the order - * the events occur. - */ -/* Print a boot progress message. */ -void ppc64_boot_msg(unsigned int src, const char *msg); - static inline void log_error(char *buf, unsigned int err_type, int fatal) { if (ppc_md.log_error) diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 3d11d3ce79ec..986b9e1e1044 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -56,6 +56,7 @@ * additional information from the MI_EPN, and MI_TWC registers. */ #define SPRN_MI_RPN 790 +#define MI_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */ /* Define an RPN value for mapping kernel memory to large virtual * pages for boot initialization. This has real page number of 0, @@ -129,6 +130,7 @@ * additional information from the MD_EPN, and MD_TWC registers. */ #define SPRN_MD_RPN 798 +#define MD_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */ /* This is a temporary storage register that could be used to save * a processor working register during a tablewalk. diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index aeebc94b2bce..4f13c3ed7acf 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -316,27 +316,33 @@ static inline unsigned long hpt_hash(unsigned long vpn, return hash & 0x7fffffffffUL; } +#define HPTE_LOCAL_UPDATE 0x1 +#define HPTE_NOHPTE_UPDATE 0x2 + extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize, int subpage_prot); + unsigned long flags, int ssize, int subpage_prot); extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize); + unsigned long flags, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); -extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags); +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize); + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, - int local, int ssize, unsigned int psize); + unsigned long flags, int ssize, unsigned int psize); #else static inline int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, - unsigned long trap, int local, + unsigned long trap, unsigned long flags, int ssize, unsigned int psize) { BUG(); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9124b0ede1fc..eb95b675109b 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -56,6 +56,14 @@ struct opal_sg_list { #define OPAL_HARDWARE_FROZEN -13 #define OPAL_WRONG_STATE -14 #define OPAL_ASYNC_COMPLETION -15 +#define OPAL_I2C_TIMEOUT -17 +#define OPAL_I2C_INVALID_CMD -18 +#define OPAL_I2C_LBUS_PARITY -19 +#define OPAL_I2C_BKEND_OVERRUN -20 +#define OPAL_I2C_BKEND_ACCESS -21 +#define OPAL_I2C_ARBT_LOST -22 +#define OPAL_I2C_NACK_RCVD -23 +#define OPAL_I2C_STOP_ERR -24 /* API Tokens (in r0) */ #define OPAL_INVALID_CALL -1 @@ -152,8 +160,25 @@ struct opal_sg_list { #define OPAL_PCI_ERR_INJECT 96 #define OPAL_PCI_EEH_FREEZE_SET 97 #define OPAL_HANDLE_HMI 98 +#define OPAL_CONFIG_CPU_IDLE_STATE 99 +#define OPAL_SLW_SET_REG 100 #define OPAL_REGISTER_DUMP_REGION 101 #define OPAL_UNREGISTER_DUMP_REGION 102 +#define OPAL_WRITE_TPO 103 +#define OPAL_READ_TPO 104 +#define OPAL_IPMI_SEND 107 +#define OPAL_IPMI_RECV 108 +#define OPAL_I2C_REQUEST 109 + +/* Device tree flags */ + +/* Flags set in power-mgmt nodes in device tree if + * respective idle states are supported in the platform. + */ +#define OPAL_PM_NAP_ENABLED 0x00010000 +#define OPAL_PM_SLEEP_ENABLED 0x00020000 +#define OPAL_PM_WINKLE_ENABLED 0x00040000 +#define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000 #ifndef __ASSEMBLY__ @@ -284,62 +309,6 @@ enum OpalMessageType { OPAL_MSG_TYPE_MAX, }; -/* Machine check related definitions */ -enum OpalMCE_Version { - OpalMCE_V1 = 1, -}; - -enum OpalMCE_Severity { - OpalMCE_SEV_NO_ERROR = 0, - OpalMCE_SEV_WARNING = 1, - OpalMCE_SEV_ERROR_SYNC = 2, - OpalMCE_SEV_FATAL = 3, -}; - -enum OpalMCE_Disposition { - OpalMCE_DISPOSITION_RECOVERED = 0, - OpalMCE_DISPOSITION_NOT_RECOVERED = 1, -}; - -enum OpalMCE_Initiator { - OpalMCE_INITIATOR_UNKNOWN = 0, - OpalMCE_INITIATOR_CPU = 1, -}; - -enum OpalMCE_ErrorType { - OpalMCE_ERROR_TYPE_UNKNOWN = 0, - OpalMCE_ERROR_TYPE_UE = 1, - OpalMCE_ERROR_TYPE_SLB = 2, - OpalMCE_ERROR_TYPE_ERAT = 3, - OpalMCE_ERROR_TYPE_TLB = 4, -}; - -enum OpalMCE_UeErrorType { - OpalMCE_UE_ERROR_INDETERMINATE = 0, - OpalMCE_UE_ERROR_IFETCH = 1, - OpalMCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2, - OpalMCE_UE_ERROR_LOAD_STORE = 3, - OpalMCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4, -}; - -enum OpalMCE_SlbErrorType { - OpalMCE_SLB_ERROR_INDETERMINATE = 0, - OpalMCE_SLB_ERROR_PARITY = 1, - OpalMCE_SLB_ERROR_MULTIHIT = 2, -}; - -enum OpalMCE_EratErrorType { - OpalMCE_ERAT_ERROR_INDETERMINATE = 0, - OpalMCE_ERAT_ERROR_PARITY = 1, - OpalMCE_ERAT_ERROR_MULTIHIT = 2, -}; - -enum OpalMCE_TlbErrorType { - OpalMCE_TLB_ERROR_INDETERMINATE = 0, - OpalMCE_TLB_ERROR_PARITY = 1, - OpalMCE_TLB_ERROR_MULTIHIT = 2, -}; - enum OpalThreadStatus { OPAL_THREAD_INACTIVE = 0x0, OPAL_THREAD_STARTED = 0x1, @@ -452,52 +421,15 @@ struct opal_msg { __be64 params[8]; }; -struct opal_machine_check_event { - enum OpalMCE_Version version:8; /* 0x00 */ - uint8_t in_use; /* 0x01 */ - enum OpalMCE_Severity severity:8; /* 0x02 */ - enum OpalMCE_Initiator initiator:8; /* 0x03 */ - enum OpalMCE_ErrorType error_type:8; /* 0x04 */ - enum OpalMCE_Disposition disposition:8; /* 0x05 */ - uint8_t reserved_1[2]; /* 0x06 */ - uint64_t gpr3; /* 0x08 */ - uint64_t srr0; /* 0x10 */ - uint64_t srr1; /* 0x18 */ - union { /* 0x20 */ - struct { - enum OpalMCE_UeErrorType ue_error_type:8; - uint8_t effective_address_provided; - uint8_t physical_address_provided; - uint8_t reserved_1[5]; - uint64_t effective_address; - uint64_t physical_address; - uint8_t reserved_2[8]; - } ue_error; - - struct { - enum OpalMCE_SlbErrorType slb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } slb_error; - - struct { - enum OpalMCE_EratErrorType erat_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } erat_error; +enum { + OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1, +}; - struct { - enum OpalMCE_TlbErrorType tlb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; - } tlb_error; - } u; +struct opal_ipmi_msg { + uint8_t version; + uint8_t netfn; + uint8_t cmd; + uint8_t data[]; }; /* FSP memory errors handling */ @@ -801,6 +733,24 @@ typedef struct oppanel_line { uint64_t line_len; } oppanel_line_t; +/* OPAL I2C request */ +struct opal_i2c_request { + uint8_t type; +#define OPAL_I2C_RAW_READ 0 +#define OPAL_I2C_RAW_WRITE 1 +#define OPAL_I2C_SM_READ 2 +#define OPAL_I2C_SM_WRITE 3 + uint8_t flags; +#define OPAL_I2C_ADDR_10 0x01 /* Not supported yet */ + uint8_t subaddr_sz; /* Max 4 */ + uint8_t reserved; + __be16 addr; /* 7 or 10 bit address */ + __be16 reserved2; + __be32 subaddr; /* Sub-address if any */ + __be32 size; /* Data size */ + __be64 buffer_ra; /* Buffer real address */ +}; + /* /sys/firmware/opal */ extern struct kobject *opal_kobj; @@ -819,6 +769,9 @@ int64_t opal_rtc_read(__be32 *year_month_day, __be64 *hour_minute_second_millisecond); int64_t opal_rtc_write(uint32_t year_month_day, uint64_t hour_minute_second_millisecond); +int64_t opal_tpo_read(uint64_t token, __be32 *year_mon_day, __be32 *hour_min); +int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day, + uint32_t hour_min); int64_t opal_cec_power_down(uint64_t request); int64_t opal_cec_reboot(void); int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset); @@ -962,7 +915,14 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); +int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); +int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, + uint64_t msg_len); +int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, + uint64_t *msg_len); +int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id, + struct opal_i2c_request *oreq); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, @@ -992,8 +952,6 @@ extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg); extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data); struct rtc_time; -extern int opal_set_rtc_time(struct rtc_time *tm); -extern void opal_get_rtc_time(struct rtc_time *tm); extern unsigned long opal_get_boot_time(void); extern void opal_nvram_init(void); extern void opal_flash_init(void); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index a5139ea6910b..e5f22c6c4bf9 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -42,7 +42,6 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ #define get_slb_shadow() (get_paca()->slb_shadow_ptr) struct task_struct; -struct opal_machine_check_event; /* * Defines the layout of the paca. @@ -154,11 +153,15 @@ struct paca_struct { #endif #ifdef CONFIG_PPC_POWERNV - /* Pointer to OPAL machine check event structure set by the - * early exception handler for use by high level C handler - */ - struct opal_machine_check_event *opal_mc_evt; + /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */ + u32 *core_idle_state_ptr; + u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */ + /* Mask to indicate thread id in core */ + u8 thread_mask; + /* Mask to denote subcore sibling threads */ + u8 subcore_sibling_mask; #endif + #ifdef CONFIG_PPC_BOOK3S_64 /* Exclusive emergency stack pointer for machine check exception. */ void *mc_emergency_sp; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 26fe1ae15212..69c059887a2c 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -379,12 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd) } #endif -#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) +#define is_hugepd(hpd) (hugepd_ok(hpd)) +#define pgd_huge pgd_huge int pgd_huge(pgd_t pgd); #else /* CONFIG_HUGETLB_PAGE */ #define is_hugepd(pdep) 0 #define pgd_huge(pgd) 0 #endif /* CONFIG_HUGETLB_PAGE */ +#define __hugepd(x) ((hugepd_t) { (x) }) struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 4ca90a39d6d0..725247beebec 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -159,8 +159,6 @@ struct pci_dn { int pci_ext_config_space; /* for pci devices */ - bool force_32bit_msi; - struct pci_dev *pcidev; /* back-pointer to the pci device */ #ifdef CONFIG_EEH struct eeh_dev *edev; /* eeh device */ diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index e9a9f60e596d..fc3ee06eab87 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -3,7 +3,6 @@ #ifdef __KERNEL__ #include <linux/mm.h> -#include <asm-generic/tlb.h> #ifdef CONFIG_PPC_BOOK3E extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address); @@ -14,6 +13,8 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, } #endif /* !CONFIG_PPC_BOOK3E */ +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); + #ifdef CONFIG_PPC64 #include <asm/pgalloc-64.h> #else diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 945e47adf7db..234e07c47803 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -170,6 +170,25 @@ static inline unsigned long pte_update(pte_t *p, #ifdef PTE_ATOMIC_UPDATES unsigned long old, tmp; +#ifdef CONFIG_PPC_8xx + unsigned long tmp2; + + __asm__ __volatile__("\ +1: lwarx %0,0,%4\n\ + andc %1,%0,%5\n\ + or %1,%1,%6\n\ + /* 0x200 == Extended encoding, bit 22 */ \ + /* Bit 22 has to be 1 if neither _PAGE_USER nor _PAGE_RW are set */ \ + rlwimi %1,%1,32-2,0x200\n /* get _PAGE_USER */ \ + rlwinm %3,%1,32-1,0x200\n /* get _PAGE_RW */ \ + or %1,%3,%1\n\ + xori %1,%1,0x200\n" +" stwcx. %1,0,%4\n\ + bne- 1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2) + : "r" (p), "r" (clr), "r" (set), "m" (*p) + : "cc" ); +#else /* CONFIG_PPC_8xx */ __asm__ __volatile__("\ 1: lwarx %0,0,%3\n\ andc %1,%0,%4\n\ @@ -180,6 +199,7 @@ static inline unsigned long pte_update(pte_t *p, : "=&r" (old), "=&r" (tmp), "=m" (*p) : "r" (p), "r" (clr), "r" (set), "m" (*p) : "cc" ); +#endif /* CONFIG_PPC_8xx */ #else /* PTE_ATOMIC_UPDATES */ unsigned long old = pte_val(*p); *p = __pte((old & ~clr) | set); diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 7b935683f268..132ee1d482c2 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -57,7 +57,21 @@ #define pgd_present(pgd) (pgd_val(pgd) != 0) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) #define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd)) + +#ifndef __ASSEMBLY__ + +static inline pte_t pgd_pte(pgd_t pgd) +{ + return __pte(pgd_val(pgd)); +} + +static inline pgd_t pte_pgd(pte_t pte) +{ + return __pgd(pte_val(pte)); +} +extern struct page *pgd_page(pgd_t pgd); + +#endif /* !__ASSEMBLY__ */ #define pud_offset(pgdp, addr) \ (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h index a56b82fb0609..1de35bbd02a6 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h @@ -38,4 +38,7 @@ /* Bits to mask out from a PGD/PUD to get to the PMD page */ #define PUD_MASKED_BITS 0x1ff +#define pgd_pte(pgd) (pud_pte(((pud_t){ pgd }))) +#define pte_pgd(pte) ((pgd_t)pte_pud(pte)) + #endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index ae153c40ab7c..b9dcc936e2d1 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -152,7 +152,7 @@ #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ || (pmd_val(pmd) & PMD_BAD_BITS)) -#define pmd_present(pmd) (pmd_val(pmd) != 0) +#define pmd_present(pmd) (!pmd_none(pmd)) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) #define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) extern struct page *pmd_page(pmd_t pmd); @@ -164,9 +164,21 @@ extern struct page *pmd_page(pmd_t pmd); #define pud_present(pud) (pud_val(pud) != 0) #define pud_clear(pudp) (pud_val(*(pudp)) = 0) #define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) -#define pud_page(pud) virt_to_page(pud_page_vaddr(pud)) +extern struct page *pud_page(pud_t pud); + +static inline pte_t pud_pte(pud_t pud) +{ + return __pte(pud_val(pud)); +} + +static inline pud_t pte_pud(pte_t pte) +{ + return __pud(pte_val(pte)); +} +#define pud_write(pud) pte_write(pud_pte(pud)) #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) +#define pgd_write(pgd) pte_write(pgd_pte(pgd)) /* * Find an entry in a page-table-directory. We combine the address region @@ -422,7 +434,22 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); - +/* + * + * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs + * page. The hugetlbfs page table walking and mangling paths are totally + * separated form the core VM paths and they're differentiated by + * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run. + * + * pmd_trans_huge() is defined as false at build time if + * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build + * time in such case. + * + * For ppc64 we need to differntiate from explicit hugepages from THP, because + * for THP we also track the subpage details at the pmd level. We don't do + * that for explicit huge pages. + * + */ static inline int pmd_trans_huge(pmd_t pmd) { /* @@ -431,16 +458,6 @@ static inline int pmd_trans_huge(pmd_t pmd) return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE); } -static inline int pmd_large(pmd_t pmd) -{ - /* - * leaf pte for huge page, bottom two bits != 00 - */ - if (pmd_trans_huge(pmd)) - return pmd_val(pmd) & _PAGE_PRESENT; - return 0; -} - static inline int pmd_trans_splitting(pmd_t pmd) { if (pmd_trans_huge(pmd)) @@ -451,6 +468,14 @@ static inline int pmd_trans_splitting(pmd_t pmd) extern int has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline int pmd_large(pmd_t pmd) +{ + /* + * leaf pte for huge page, bottom two bits != 00 + */ + return ((pmd_val(pmd) & 0x3) != 0x0); +} + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); @@ -467,6 +492,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd) } #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) @@ -575,6 +601,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, */ return true; } - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 316f9a5da173..a8805fee0df9 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -274,11 +274,9 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); - extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr); + unsigned long end, int write, + struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #define has_transparent_hugepage() 0 diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 6f8536208049..03cd858a401c 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -194,6 +194,7 @@ #define PPC_INST_NAP 0x4c000364 #define PPC_INST_SLEEP 0x4c0003a4 +#define PPC_INST_WINKLE 0x4c0003e4 /* A2 specific instructions */ #define PPC_INST_ERATWE 0x7c0001a6 @@ -204,6 +205,7 @@ #define PPC_INST_ERATSX_DOT 0x7c000127 /* Misc instructions for BPF compiler */ +#define PPC_INST_LBZ 0x88000000 #define PPC_INST_LD 0xe8000000 #define PPC_INST_LHZ 0xa0000000 #define PPC_INST_LHBRX 0x7c00062c @@ -374,6 +376,7 @@ #define PPC_NAP stringify_in_c(.long PPC_INST_NAP) #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) +#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) /* BHRB instructions */ #define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index dda7ac4c80bd..bf117d8fb45f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -451,8 +451,9 @@ extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -extern void power7_nap(int check_irq); -extern void power7_sleep(void); +extern unsigned long power7_nap(int check_irq); +extern unsigned long power7_sleep(void); +extern unsigned long power7_winkle(void); extern void flush_instruction_cache(void); extern void hard_reset_now(void); extern void poweroff_now(void); diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index d44826e4ff97..daa4616e61c4 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -48,19 +48,22 @@ */ #define _PAGE_RW 0x0400 /* lsb PP bits, inverted in HW */ #define _PAGE_USER 0x0800 /* msb PP bits */ +/* set when neither _PAGE_USER nor _PAGE_RW are set */ +#define _PAGE_KNLRO 0x0200 #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c -#define _PTE_NONE_MASK _PAGE_ACCESSED +#define _PTE_NONE_MASK _PAGE_KNLRO /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED) +#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_KNLRO) #define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c998279bd85b..1c874fb533bb 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -118,8 +118,10 @@ #define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV) #ifdef __BIG_ENDIAN__ #define MSR_ __MSR +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV) #else #define MSR_ (__MSR | MSR_LE) +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV | MSR_LE) #endif #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) @@ -371,6 +373,7 @@ #define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */ #define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */ #define SPRN_PPR 0x380 /* SMT Thread status Register */ +#define SPRN_TSCR 0x399 /* Thread Switch Control Register */ #define SPRN_DEC 0x016 /* Decrement Register */ #define SPRN_DER 0x095 /* Debug Enable Regsiter */ @@ -728,6 +731,7 @@ #define SPRN_BESCR 806 /* Branch event status and control register */ #define BESCR_GE 0x8000000000000000ULL /* Global Enable */ #define SPRN_WORT 895 /* Workload optimization register - thread */ +#define SPRN_WORC 863 /* Workload optimization register - core */ #define SPRN_PMC1 787 #define SPRN_PMC2 788 diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 11ba86e17631..fbdf18cf954c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -8,7 +8,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern int mem_init_done; /* set on boot once kmalloc can be called */ -extern int init_bootmem_done; /* set once bootmem is available */ extern unsigned long long memory_limit; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); @@ -24,7 +23,7 @@ extern void reloc_got2(unsigned long); #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) void check_for_initrd(void); -void do_init_bootmem(void); +void initmem_init(void); void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 6240698fee9a..ff21b7a2f0cc 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task, static inline int syscall_get_arch(void) { - return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64; + int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64; +#ifdef __LITTLE_ENDIAN__ + arch |= __AUDIT_ARCH_LE; +#endif + return arch; } #endif /* _ASM_SYSCALL_H */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index ce9577d693be..91062eef582f 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -366,3 +366,4 @@ SYSCALL_SPU(seccomp) SYSCALL_SPU(getrandom) SYSCALL_SPU(memfd_create) SYSCALL_SPU(bpf) +COMPAT_SYS(execveat) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index b034ecdb7c74..ebc4f165690a 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -71,13 +71,12 @@ struct thread_info { #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) /* how to get the thread information struct from C */ +register unsigned long __current_r1 asm("r1"); static inline struct thread_info *current_thread_info(void) { - register unsigned long sp asm("r1"); - /* gcc4, at least, is smart enough to turn this into a single * rlwinm for ppc32 and clrrdi for ppc64 */ - return (struct thread_info *)(sp & ~(THREAD_SIZE-1)); + return (struct thread_info *)(__current_r1 & ~(THREAD_SIZE-1)); } #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index e2b428b0f7ba..20733fa518ae 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -27,6 +27,7 @@ #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry extern void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 2def01ed0cb2..23d351ca0303 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -107,14 +107,14 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->index) __flush_tlb_pending(batch); @@ -125,9 +125,11 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, - int ssize, int local); + int ssize, unsigned long flags); extern void flush_hash_range(unsigned long number, int local); - +extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 9485b43a7c00..a0c071d24e0e 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -284,7 +284,7 @@ do { \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) #endif /* __powerpc64__ */ @@ -297,7 +297,7 @@ do { \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) @@ -308,7 +308,7 @@ do { \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e0da021caa00..36b79c31eedd 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define __NR_syscalls 362 +#define __NR_syscalls 363 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h index a2eac409c1ec..e5f8dd366212 100644 --- a/arch/powerpc/include/asm/vga.h +++ b/arch/powerpc/include/asm/vga.h @@ -38,12 +38,10 @@ static inline u16 scr_readw(volatile const u16 *addr) #endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */ -extern unsigned long vgacon_remap_base; - #ifdef __powerpc64__ #define VGA_MAP_MEM(x,s) ((unsigned long) ioremap((x), s)) #else -#define VGA_MAP_MEM(x,s) (x + vgacon_remap_base) +#define VGA_MAP_MEM(x,s) (x) #endif #define vga_readb(x) (*(x)) diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0d050ea37a04..6997f4a271df 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -98,7 +98,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr); static inline void xics_push_cppr(unsigned int vec) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) return; @@ -111,7 +111,7 @@ static inline void xics_push_cppr(unsigned int vec) static inline unsigned char xics_pop_cppr(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index < 1)) return LOWEST_PRIORITY; @@ -121,7 +121,7 @@ static inline unsigned char xics_pop_cppr(void) static inline void xics_set_base_cppr(unsigned char cppr) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* we only really want to set the priority when there's * just one cppr value on the stack @@ -133,7 +133,7 @@ static inline void xics_set_base_cppr(unsigned char cppr) static inline unsigned char xics_cppr_top(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); return os_cppr->stack[os_cppr->index]; } diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index a9c3e2e18c05..c046666038f8 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -87,4 +87,9 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_INCOMING_CPU 49 + +#define SO_ATTACH_BPF 50 +#define SO_DETACH_BPF SO_DETACH_FILTER + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index f55351f2e66e..ef5b5b1f3123 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -384,5 +384,6 @@ #define __NR_getrandom 359 #define __NR_memfd_create 360 #define __NR_bpf 361 +#define __NR_execveat 362 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 34f55524d456..86150fbb42c3 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -908,7 +908,7 @@ int fix_alignment(struct pt_regs *regs) flush_fp_to_thread(current); } - if ((nb == 16)) { + if (nb == 16) { if (flags & F) { /* Special case for 16-byte FP loads and stores */ PPC_WARN_ALIGNMENT(fp_pair, regs); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9d7dede2847c..e624f9646350 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -489,7 +489,6 @@ int main(void) DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); - DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); @@ -499,6 +498,7 @@ int main(void) DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); + DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); @@ -727,10 +727,14 @@ int main(void) #endif #ifdef CONFIG_PPC_POWERNV - DEFINE(OPAL_MC_GPR3, offsetof(struct opal_machine_check_event, gpr3)); - DEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0)); - DEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1)); - DEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt)); + DEFINE(PACA_CORE_IDLE_STATE_PTR, + offsetof(struct paca_struct, core_idle_state_ptr)); + DEFINE(PACA_THREAD_IDLE_STATE, + offsetof(struct paca_struct, thread_idle_state)); + DEFINE(PACA_THREAD_MASK, + offsetof(struct paca_struct, thread_mask)); + DEFINE(PACA_SUBCORE_SIBLING_MASK, + offsetof(struct paca_struct, subcore_sibling_mask)); #endif return 0; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index c78e6dac4d7d..cfa0f81a5bb0 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -12,7 +12,6 @@ #undef DEBUG #include <linux/crash_dump.h> -#include <linux/bootmem.h> #include <linux/io.h> #include <linux/memblock.h> #include <asm/code-patching.h> diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index d55c76c571f3..f4217819cc31 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -41,7 +41,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); - __get_cpu_var(irq_stat).doorbell_irqs++; + __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux(); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 54d0116256f7..4c68bfe4108a 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -60,16 +60,16 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { - return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, - device_to_mask(dev), direction, attrs); + return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, + device_to_mask(dev), direction, attrs); } static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction, struct dma_attrs *attrs) { - iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, - attrs); + ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, + direction, attrs); } /* We support DMA to/from any memory page via the iommu */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 2248a1999c64..e1b6d8e17289 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -143,6 +143,8 @@ static int __init eeh_setup(char *str) { if (!strcmp(str, "off")) eeh_add_flag(EEH_FORCE_DISABLED); + else if (!strcmp(str, "early_log")) + eeh_add_flag(EEH_EARLY_DUMP_LOG); return 1; } @@ -758,30 +760,41 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) int eeh_reset_pe(struct eeh_pe *pe) { int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - int i, rc; + int i, state, ret; + + /* Mark as reset and block config space */ + eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); /* Take three shots at resetting the bus */ - for (i=0; i<3; i++) { + for (i = 0; i < 3; i++) { eeh_reset_pe_once(pe); /* * EEH_PE_ISOLATED is expected to be removed after * BAR restore. */ - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((rc & flags) == flags) - return 0; + state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if ((state & flags) == flags) { + ret = 0; + goto out; + } - if (rc < 0) { - pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + if (state < 0) { + pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", __func__, pe->phb->global_number, pe->addr); - return -1; + ret = -ENOTRECOVERABLE; + goto out; } - pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", - i+1, pe->phb->global_number, pe->addr, rc); + + /* We might run out of credits */ + ret = -EIO; + pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", + __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } - return -1; +out: + eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + return ret; } /** @@ -920,11 +933,8 @@ int eeh_init(void) pr_warn("%s: Platform EEH operation not found\n", __func__); return -EEXIST; - } else if ((ret = eeh_ops->init())) { - pr_warn("%s: Failed to call platform init function (%d)\n", - __func__, ret); + } else if ((ret = eeh_ops->init())) return ret; - } /* Initialize EEH event */ ret = eeh_event_init(); @@ -1209,6 +1219,7 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) static struct pci_device_id eeh_reset_ids[] = { { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ + { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ { 0 } }; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 6535936bdf27..b17e793ba67e 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -528,13 +528,11 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Issue reset */ - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); ret = eeh_reset_pe(pe); if (ret) { - eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); return ret; } - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Unfreeze the PE */ ret = eeh_clear_pe_frozen_state(pe, true); @@ -601,19 +599,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); rc = eeh_reset_pe(pe); - if (rc) { - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + if (rc) return rc; - } pci_lock_rescan_remove(); /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index f19b1e5cb060..1ceecdda810b 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -65,7 +65,7 @@ static ssize_t eeh_pe_state_show(struct device *dev, return -ENODEV; state = eeh_ops->get_state(edev->pe, NULL); - return sprintf(buf, "%0x08x %0x08x\n", + return sprintf(buf, "0x%08x 0x%08x\n", state, edev->pe->state); } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 22b45a4955cd..10a093579191 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1424,12 +1424,18 @@ _GLOBAL(ftrace_graph_caller) lwz r4, 44(r1) subi r4, r4, MCOUNT_INSN_SIZE - /* get the parent address */ - addi r3, r1, 52 + /* Grab the LR out of the caller stack frame */ + lwz r3,52(r1) bl prepare_ftrace_return nop + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + stw r3,52(r1) + MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr @@ -1457,4 +1463,4 @@ _GLOBAL(return_to_handler) blr #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#endif /* CONFIG_MCOUNT */ +#endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5bbd1bc8c3b0..194e46dcf08d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -659,7 +659,13 @@ _GLOBAL(ret_from_except_lite) 3: #endif bl save_nvgprs + /* + * Use a non volatile GPR to save and restore our thread_info flags + * across the call to restore_interrupts. + */ + mr r30,r4 bl restore_interrupts + mr r4,r30 addi r3,r1,STACK_FRAME_OVERHEAD bl do_notify_resume b ret_from_except @@ -1221,13 +1227,20 @@ _GLOBAL(ftrace_graph_caller) ld r4, 128(r1) subi r4, r4, MCOUNT_INSN_SIZE - /* get the parent address */ + /* Grab the LR out of the caller stack frame */ ld r11, 112(r1) - addi r3, r11, 16 + ld r3, 16(r11) bl prepare_ftrace_return nop + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + ld r11, 112(r1) + std r3, 16(r11) + ld r0, 128(r1) mtlr r0 addi r1, r1, 112 @@ -1235,28 +1248,6 @@ _GLOBAL(ftrace_graph_caller) _GLOBAL(return_to_handler) /* need to save return values */ - std r4, -24(r1) - std r3, -16(r1) - std r31, -8(r1) - mr r31, r1 - stdu r1, -112(r1) - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - - ld r1, 0(r1) - ld r4, -24(r1) - ld r3, -16(r1) - ld r31, -8(r1) - - /* Jump back to real return address */ - blr - -_GLOBAL(mod_return_to_handler) - /* need to save return values */ std r4, -32(r1) std r3, -24(r1) /* save TOC */ @@ -1266,7 +1257,7 @@ _GLOBAL(mod_return_to_handler) stdu r1, -112(r1) /* - * We are in a module using the module's TOC. + * We might be called from a module. * Switch to our TOC to run inside the core kernel. */ ld r2, PACATOC(r13) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 72e783ea0681..c2df8150bd7a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -15,6 +15,7 @@ #include <asm/hw_irq.h> #include <asm/exception-64s.h> #include <asm/ptrace.h> +#include <asm/cpuidle.h> /* * We layout physical memory as follows: @@ -101,23 +102,34 @@ system_reset_pSeries: #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION /* Running native on arch 2.06 or later, check if we are - * waking up from nap. We only handle no state loss and - * supervisor state loss. We do -not- handle hypervisor - * state loss at this time. + * waking up from nap/sleep/winkle. */ mfspr r13,SPRN_SRR1 rlwinm. r13,r13,47-31,30,31 beq 9f - /* waking up from powersave (nap) state */ - cmpwi cr1,r13,2 - /* Total loss of HV state is fatal, we could try to use the - * PIR to locate a PACA, then use an emergency stack etc... - * OPAL v3 based powernv platforms have new idle states - * which fall in this catagory. + cmpwi cr3,r13,2 + + /* + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. */ - bgt cr1,8f GET_PACA(r13) + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,8f /* Either sleep or Winkle */ + + /* Waking up from nap should not cause hypervisor state loss */ + bgt cr3,. + + /* Waking up from nap */ + li r0,PNV_THREAD_RUNNING + stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE li r0,KVM_HWTHREAD_IN_KERNEL @@ -131,7 +143,9 @@ BEGIN_FTR_SECTION 1: #endif - beq cr1,2f + /* Return SRR1 from power7_nap() */ + mfspr r3,SPRN_SRR1 + beq cr3,2f b power7_wakeup_noloss 2: b power7_wakeup_loss @@ -292,15 +306,26 @@ decrementer_pSeries: . = 0xc00 .globl system_call_pSeries system_call_pSeries: - HMT_MEDIUM + /* + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems + * that support it) before changing to HMT_MEDIUM. That allows the KVM + * code to save that value into the guest state (it is the guest's PPR + * value). Otherwise just change to HMT_MEDIUM as userspace has + * already saved the PPR. + */ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER SET_SCRATCH0(r13) GET_PACA(r13) std r9,PACA_EXGEN+EX_R9(r13) + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); + HMT_MEDIUM; std r10,PACA_EXGEN+EX_R10(r13) + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); mfcr r9 KVMTEST(0xc00) GET_SCRATCH0(r13) +#else + HMT_MEDIUM; #endif SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_RFID @@ -1301,23 +1326,6 @@ hmi_exception_after_realmode: EXCEPTION_PROLOG_0(PACA_EXGEN) b hmi_exception_hv -#ifdef CONFIG_PPC_POWERNV -_GLOBAL(opal_mc_secondary_handler) - HMT_MEDIUM_PPR_DISCARD - SET_SCRATCH0(r13) - GET_PACA(r13) - clrldi r3,r3,2 - tovirt(r3,r3) - std r3,PACA_OPAL_MC_EVT(r13) - ld r13,OPAL_MC_SRR0(r3) - mtspr SPRN_SRR0,r13 - ld r13,OPAL_MC_SRR1(r3) - mtspr SPRN_SRR1,r13 - ld r3,OPAL_MC_GPR3(r3) - GET_SCRATCH0(r13) - b machine_check_pSeries -#endif /* CONFIG_PPC_POWERNV */ - #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ @@ -1386,6 +1394,7 @@ machine_check_handle_early: MACHINE_CHECK_HANDLER_WINDUP GET_PACA(r13) ld r1,PACAR1(r13) + li r3,PNV_THREAD_NAP b power7_enter_nap_mode 4: #endif @@ -1571,9 +1580,11 @@ do_hash_page: * r3 contains the faulting address * r4 contains the required access permissions * r5 contains the trap number + * r6 contains dsisr * * at return r3 = 0 for success, 1 for page fault, negative for error */ + ld r6,_DSISR(r1) bl hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 742694c1d852..26d091a1a54c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -58,7 +58,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, const __be32 *sections; int i, num_sections; int size; - const int *token; + const __be32 *token; if (depth != 1 || strcmp(uname, "rtas") != 0) return 0; @@ -72,7 +72,7 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, return 1; fw_dump.fadump_supported = 1; - fw_dump.ibm_configure_kernel_dump = *token; + fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token); /* * The 'ibm,kernel-dump' rtas node is present only if there is @@ -147,11 +147,11 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm, memset(fdm, 0, sizeof(struct fadump_mem_struct)); addr = addr & PAGE_MASK; - fdm->header.dump_format_version = 0x00000001; - fdm->header.dump_num_sections = 3; + fdm->header.dump_format_version = cpu_to_be32(0x00000001); + fdm->header.dump_num_sections = cpu_to_be16(3); fdm->header.dump_status_flag = 0; fdm->header.offset_first_dump_section = - (u32)offsetof(struct fadump_mem_struct, cpu_state_data); + cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data)); /* * Fields for disk dump option. @@ -167,27 +167,27 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm, /* Kernel dump sections */ /* cpu state data section. */ - fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG; - fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA; + fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); + fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA); fdm->cpu_state_data.source_address = 0; - fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size; - fdm->cpu_state_data.destination_address = addr; + fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size); + fdm->cpu_state_data.destination_address = cpu_to_be64(addr); addr += fw_dump.cpu_state_data_size; /* hpte region section */ - fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG; - fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION; + fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); + fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION); fdm->hpte_region.source_address = 0; - fdm->hpte_region.source_len = fw_dump.hpte_region_size; - fdm->hpte_region.destination_address = addr; + fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size); + fdm->hpte_region.destination_address = cpu_to_be64(addr); addr += fw_dump.hpte_region_size; /* RMA region section */ - fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG; - fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION; - fdm->rmr_region.source_address = RMA_START; - fdm->rmr_region.source_len = fw_dump.boot_memory_size; - fdm->rmr_region.destination_address = addr; + fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); + fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION); + fdm->rmr_region.source_address = cpu_to_be64(RMA_START); + fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size); + fdm->rmr_region.destination_address = cpu_to_be64(addr); addr += fw_dump.boot_memory_size; return addr; @@ -272,7 +272,7 @@ int __init fadump_reserve_mem(void) * first kernel. */ if (fdm_active) - fw_dump.boot_memory_size = fdm_active->rmr_region.source_len; + fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len); else fw_dump.boot_memory_size = fadump_calculate_reserve_size(); @@ -314,8 +314,8 @@ int __init fadump_reserve_mem(void) (unsigned long)(base >> 20)); fw_dump.fadumphdr_addr = - fdm_active->rmr_region.destination_address + - fdm_active->rmr_region.source_len; + be64_to_cpu(fdm_active->rmr_region.destination_address) + + be64_to_cpu(fdm_active->rmr_region.source_len); pr_debug("fadumphdr_addr = %p\n", (void *) fw_dump.fadumphdr_addr); } else { @@ -472,9 +472,9 @@ fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); - while (reg_entry->reg_id != REG_ID("CPUEND")) { - fadump_set_regval(regs, reg_entry->reg_id, - reg_entry->reg_value); + while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) { + fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id), + be64_to_cpu(reg_entry->reg_value)); reg_entry++; } reg_entry++; @@ -603,20 +603,20 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) if (!fdm->cpu_state_data.bytes_dumped) return -EINVAL; - addr = fdm->cpu_state_data.destination_address; + addr = be64_to_cpu(fdm->cpu_state_data.destination_address); vaddr = __va(addr); reg_header = vaddr; - if (reg_header->magic_number != REGSAVE_AREA_MAGIC) { + if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) { printk(KERN_ERR "Unable to read register save area.\n"); return -ENOENT; } pr_debug("--------CPU State Data------------\n"); - pr_debug("Magic Number: %llx\n", reg_header->magic_number); - pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset); + pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number)); + pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset)); - vaddr += reg_header->num_cpu_offset; - num_cpus = *((u32 *)(vaddr)); + vaddr += be32_to_cpu(reg_header->num_cpu_offset); + num_cpus = be32_to_cpu(*((__be32 *)(vaddr))); pr_debug("NumCpus : %u\n", num_cpus); vaddr += sizeof(u32); reg_entry = (struct fadump_reg_entry *)vaddr; @@ -639,13 +639,13 @@ static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) fdh = __va(fw_dump.fadumphdr_addr); for (i = 0; i < num_cpus; i++) { - if (reg_entry->reg_id != REG_ID("CPUSTRT")) { + if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) { printk(KERN_ERR "Unable to read CPU state data\n"); rc = -ENOENT; goto error_out; } /* Lower 4 bytes of reg_value contains logical cpu id */ - cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK; + cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK; if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) { SKIP_TO_NEXT_CPU(reg_entry); continue; @@ -692,7 +692,7 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active) return -EINVAL; /* Check if the dump data is valid. */ - if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) || + if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) || (fdm_active->cpu_state_data.error_flags != 0) || (fdm_active->rmr_region.error_flags != 0)) { printk(KERN_ERR "Dump taken by platform is not valid\n"); @@ -828,7 +828,7 @@ static void fadump_setup_crash_memory_ranges(void) static inline unsigned long fadump_relocate(unsigned long paddr) { if (paddr > RMA_START && paddr < fw_dump.boot_memory_size) - return fdm.rmr_region.destination_address + paddr; + return be64_to_cpu(fdm.rmr_region.destination_address) + paddr; else return paddr; } @@ -902,7 +902,7 @@ static int fadump_create_elfcore_headers(char *bufp) * to the specified destination_address. Hence set * the correct offset. */ - phdr->p_offset = fdm.rmr_region.destination_address; + phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address); } phdr->p_paddr = mbase; @@ -951,7 +951,7 @@ static void register_fadump(void) fadump_setup_crash_memory_ranges(); - addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len; + addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len); /* Initialize fadump crash info header. */ addr = init_fadump_header(addr); vaddr = __va(addr); @@ -1023,7 +1023,7 @@ void fadump_cleanup(void) /* Invalidate the registration only if dump is active. */ if (fw_dump.dump_active) { init_fadump_mem_struct(&fdm, - fdm_active->cpu_state_data.destination_address); + be64_to_cpu(fdm_active->cpu_state_data.destination_address)); fadump_invalidate_dump(&fdm); } } @@ -1063,7 +1063,7 @@ static void fadump_invalidate_release_mem(void) return; } - destination_address = fdm_active->cpu_state_data.destination_address; + destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address); fadump_cleanup(); mutex_unlock(&fadump_mutex); @@ -1183,31 +1183,31 @@ static int fadump_region_show(struct seq_file *m, void *private) seq_printf(m, "CPU : [%#016llx-%#016llx] %#llx bytes, " "Dumped: %#llx\n", - fdm_ptr->cpu_state_data.destination_address, - fdm_ptr->cpu_state_data.destination_address + - fdm_ptr->cpu_state_data.source_len - 1, - fdm_ptr->cpu_state_data.source_len, - fdm_ptr->cpu_state_data.bytes_dumped); + be64_to_cpu(fdm_ptr->cpu_state_data.destination_address), + be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) + + be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1, + be64_to_cpu(fdm_ptr->cpu_state_data.source_len), + be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped)); seq_printf(m, "HPTE: [%#016llx-%#016llx] %#llx bytes, " "Dumped: %#llx\n", - fdm_ptr->hpte_region.destination_address, - fdm_ptr->hpte_region.destination_address + - fdm_ptr->hpte_region.source_len - 1, - fdm_ptr->hpte_region.source_len, - fdm_ptr->hpte_region.bytes_dumped); + be64_to_cpu(fdm_ptr->hpte_region.destination_address), + be64_to_cpu(fdm_ptr->hpte_region.destination_address) + + be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1, + be64_to_cpu(fdm_ptr->hpte_region.source_len), + be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped)); seq_printf(m, "DUMP: [%#016llx-%#016llx] %#llx bytes, " "Dumped: %#llx\n", - fdm_ptr->rmr_region.destination_address, - fdm_ptr->rmr_region.destination_address + - fdm_ptr->rmr_region.source_len - 1, - fdm_ptr->rmr_region.source_len, - fdm_ptr->rmr_region.bytes_dumped); + be64_to_cpu(fdm_ptr->rmr_region.destination_address), + be64_to_cpu(fdm_ptr->rmr_region.destination_address) + + be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1, + be64_to_cpu(fdm_ptr->rmr_region.source_len), + be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); if (!fdm_active || (fw_dump.reserve_dump_area_start == - fdm_ptr->cpu_state_data.destination_address)) + be64_to_cpu(fdm_ptr->cpu_state_data.destination_address))) goto out; /* Dump is active. Show reserved memory region. */ @@ -1215,10 +1215,10 @@ static int fadump_region_show(struct seq_file *m, void *private) " : [%#016llx-%#016llx] %#llx bytes, " "Dumped: %#llx\n", (unsigned long long)fw_dump.reserve_dump_area_start, - fdm_ptr->cpu_state_data.destination_address - 1, - fdm_ptr->cpu_state_data.destination_address - + be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1, + be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - fw_dump.reserve_dump_area_start, - fdm_ptr->cpu_state_data.destination_address - + be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - fw_dump.reserve_dump_area_start); out: if (fdm_active) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 390311c0f03d..44d4d8eb3c85 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -449,7 +449,7 @@ void ftrace_replace_code(int enable) rec = ftrace_rec_iter_record(iter); ret = __ftrace_replace_code(rec, enable); if (ret) { - ftrace_bug(ret, rec->ip); + ftrace_bug(ret, rec); return; } } @@ -510,79 +510,36 @@ int ftrace_disable_ftrace_graph_caller(void) } #endif /* CONFIG_DYNAMIC_FTRACE */ -#ifdef CONFIG_PPC64 -extern void mod_return_to_handler(void); -#endif - /* * Hook the return address and push it in the stack of return addrs - * in current thread info. + * in current thread info. Return the address we want to divert to. */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - unsigned long old; - int faulted; struct ftrace_graph_ent trace; - unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) - return; + goto out; if (unlikely(atomic_read(¤t->tracing_graph_pause))) - return; - -#ifdef CONFIG_PPC64 - /* non core kernel code needs to save and restore the TOC */ - if (REGION_ID(self_addr) != KERNEL_REGION_ID) - return_hooker = (unsigned long)&mod_return_to_handler; -#endif - - return_hooker = ppc_function_entry((void *)return_hooker); + goto out; - /* - * Protect against fault, even if it shouldn't - * happen. This tool is too much intrusive to - * ignore such a protection. - */ - asm volatile( - "1: " PPC_LL "%[old], 0(%[parent])\n" - "2: " PPC_STL "%[return_hooker], 0(%[parent])\n" - " li %[faulted], 0\n" - "3:\n" - - ".section .fixup, \"ax\"\n" - "4: li %[faulted], 1\n" - " b 3b\n" - ".previous\n" - - ".section __ex_table,\"a\"\n" - PPC_LONG_ALIGN "\n" - PPC_LONG "1b,4b\n" - PPC_LONG "2b,4b\n" - ".previous" - - : [old] "=&r" (old), [faulted] "=r" (faulted) - : [parent] "r" (parent), [return_hooker] "r" (return_hooker) - : "memory" - ); - - if (unlikely(faulted)) { - ftrace_graph_stop(); - WARN_ON(1); - return; - } + return_hooker = ppc_function_entry(return_to_handler); - trace.func = self_addr; + trace.func = ip; trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - *parent = old; - return; - } + if (!ftrace_graph_entry(&trace)) + goto out; + + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + goto out; - if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) - *parent = old; + parent = return_hooker; +out: + return parent; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fafff8dbd5d9..d99aac0d69f1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,13 +33,31 @@ /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 -#define DO_8xx_CPU6(val, reg) \ - li reg, val; \ - stw reg, 12(r0); \ - lwz reg, 12(r0); +#define SPRN_MI_TWC_ADDR 0x2b80 +#define SPRN_MI_RPN_ADDR 0x2d80 +#define SPRN_MD_TWC_ADDR 0x3b80 +#define SPRN_MD_RPN_ADDR 0x3d80 + +#define MTSPR_CPU6(spr, reg, treg) \ + li treg, spr##_ADDR; \ + stw treg, 12(r0); \ + lwz treg, 12(r0); \ + mtspr spr, reg #else -#define DO_8xx_CPU6(val, reg) +#define MTSPR_CPU6(spr, reg, treg) \ + mtspr spr, reg #endif + +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#ifdef CONFIG_PPC_16K_PAGES +#define RPN_PATTERN (0x00f0 | MD_SPS16K) +#else +#define RPN_PATTERN 0x00f0 +#endif + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -65,13 +83,6 @@ _ENTRY(_start); * 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to * the "internal" processor registers before MMU_init is called. * - * The TLB code currently contains a major hack. Since I use the condition - * code register, I have to save and restore it. I am out of registers, so - * I just store it in memory location 0 (the TLB handlers are not reentrant). - * To avoid making any decisions, I need to use the "segment" valid bit - * in the first level table, but that would require many changes to the - * Linux page directory/table functions that I don't want to do right now. - * * -- Dan */ .globl __start @@ -211,7 +222,7 @@ MachineCheck: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -219,30 +230,16 @@ MachineCheck: EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. - * This is "never generated" by the MPC8xx. We jump to it for other - * translation errors. + * This is "never generated" by the MPC8xx. */ . = 0x300 DataAccess: - EXCEPTION_PROLOG - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 - mfspr r4,SPRN_DAR - li r10,0x00f0 - mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ - EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. - * This is "never generated" by the MPC8xx. We jump to it for other - * translation errors. + * This is "never generated" by the MPC8xx. */ . = 0x400 InstructionAccess: - EXCEPTION_PROLOG - mr r4,r12 - mr r5,r9 - EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -253,7 +250,7 @@ Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -292,8 +289,8 @@ SystemCall: . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction - * TLB. It is modelled after the example in the Motorola manual. The task - * switch loads the M_TWB register with the pointer to the first level table. + * TLB. The task switch loads the M_TW register with the pointer to the first + * level table. * If we discover there is no second level table (value is zero) or if there * is an invalid pte, we load that into the TLB, which causes another fault * into the TLB Error interrupt where we can handle such problems. @@ -302,20 +299,17 @@ SystemCall: */ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 - addi r11, r10, 0x1000 + addi r11, r10, PAGE_SIZE tlbie r11 - addi r11, r10, -0x1000 + addi r11, r10, -PAGE_SIZE tlbie r11 #endif - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -323,32 +317,37 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ + andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ +#endif + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ +#ifdef CONFIG_MODULES beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - lwz r11, 0(r10) /* Get the level 1 entry */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ /* We have a pte table, so load the MI_TWC with the attributes * for this "segment." */ - ori r11,r11,1 /* Set valid bit */ - DO_8xx_CPU6(0x2b80, r3) - mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ - lwz r10, 0(r11) /* Get the pte */ + MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + lwzx r10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT + li r11, RPN_PATTERN bne- cr0, 2f +#else + li r11, RPN_PATTERN #endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. @@ -356,62 +355,63 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - li r11, 0x00f0 rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ - DO_8xx_CPU6(0x2d80, r3) - mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ + MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 rfi 2: - mfspr r11, SPRN_SRR1 + mfspr r10, SPRN_SRR1 /* clear all error bits as TLB Miss * sets a few unconditionally */ - rlwinm r11, r11, 0, 0xffff - mtspr SPRN_SRR1, r11 + rlwinm r10, r10, 0, 0xffff + mtspr SPRN_SRR1, r10 /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 - b InstructionAccess + b InstructionTLBError1 . = 0x1200 DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andi. r11, r10, 0x0800 + andis. r11, r10, 0x8000 + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - lwz r11, 0(r10) /* Get the level 1 entry */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwzx r11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ /* We have a pte table, so load fetch the pte from the table. */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r10, SPRN_MD_TWC /* ....and get the pte address */ + mfspr r10, SPRN_MD_EPN /* Get address of fault */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ /* Insert the Guarded flag into the TWC from the Linux PTE. @@ -425,8 +425,7 @@ DataStoreTLBMiss: * It is bit 25 in the Linux PTE and bit 30 in the TWC */ rlwimi r11, r10, 32-5, 30, 30 - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 + MTSPR_CPU6(SPRN_MD_TWC, r11, r3) /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: @@ -442,14 +441,8 @@ DataStoreTLBMiss: and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif - /* Honour kernel RO, User NA */ - /* 0x200 == Extended encoding, bit 22 */ - rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ - /* r11 = (r10 & _PAGE_RW) >> 1 */ - rlwinm r11, r10, 32-1, 0x200 - or r10, r11, r10 - /* invert RW and 0x200 bits */ - xori r10, r10, _PAGE_RW | 0x200 + /* invert RW */ + xori r10, r10, _PAGE_RW /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 22 and 28 must be clear. @@ -457,14 +450,13 @@ DataStoreTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, 0x00f0 +2: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ - DO_8xx_CPU6(0x3d80, r3) - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) + mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 @@ -477,7 +469,17 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - b InstructionAccess + EXCEPTION_PROLOG_0 +InstructionTLBError1: + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 + mr r4,r12 + mr r5,r9 + andis. r10,r5,0x4000 + beq+ 1f + tlbie r4 + /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ +1: EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to @@ -488,11 +490,21 @@ DataTLBError: EXCEPTION_PROLOG_0 mfspr r11, SPRN_DAR - cmpwi cr0, r11, 0x00f0 + cmpwi cr0, r11, RPN_PATTERN beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ - EXCEPTION_EPILOG_0 - b DataAccess + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + mfspr r4,SPRN_DAR + andis. r10,r5,0x4000 + beq+ 1f + tlbie r4 +1: li r10,RPN_PATTERN + mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ + /* 0x300 is DataAccess exception, needed by bad_page_fault() */ + EXC_XFER_LITE(0x300, handle_page_fault) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) @@ -521,29 +533,30 @@ DARFixed:/* Return from dcbx instruction bug workaround */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ #ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) + mtspr SPRN_DAR, r3 #endif mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10 - mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */ + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l - rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */ -3: lwz r11, 0(r11) /* Get the level 1 entry */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ - lwz r11, 0(r11) /* Get the pte */ + /* Extract level 1 index */ +3: rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwzx r11, r10, r11 /* Get the level 1 entry */ + rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + lwzx r11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ + mfspr r3, SPRN_DAR #endif /* concat physical page address(r11) and page offset(r10) */ - rlwimi r11, r10, 0, 20, 31 + mfspr r10, SPRN_SRR0 + rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, @@ -698,11 +711,11 @@ start_here: #ifdef CONFIG_8xx_CPU6 lis r4, cpu6_errata_word@h ori r4, r4, cpu6_errata_word@l - li r3, 0x3980 + li r3, 0x3f80 stw r3, 12(r4) lwz r3, 12(r4) #endif - mtspr SPRN_M_TWB, r6 + mtspr SPRN_M_TW, r6 lis r4,2f@h ori r4,r4,2f@l tophys(r4,r4) @@ -876,10 +889,10 @@ _GLOBAL(set_context) lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l tophys (r4, r4) - li r7, 0x3980 + li r7, 0x3f80 stw r7, 12(r6) lwz r7, 12(r6) - mtspr SPRN_M_TWB, r4 /* Update MMU base address */ + mtspr SPRN_M_TW, r4 /* Update MMU base address */ li r7, 0x3380 stw r7, 12(r6) lwz r7, 12(r6) @@ -887,7 +900,7 @@ _GLOBAL(set_context) #else mtspr SPRN_M_CASID,r3 /* Update context */ tophys (r4, r4) - mtspr SPRN_M_TWB, r4 /* and pgd */ + mtspr SPRN_M_TW, r4 /* and pgd */ #endif SYNC blr @@ -919,12 +932,13 @@ set_dec_cpu6: .globl sdata sdata: .globl empty_zero_page + .align PAGE_SHIFT empty_zero_page: - .space 4096 + .space PAGE_SIZE .globl swapper_pg_dir swapper_pg_dir: - .space 4096 + .space PGD_TABLE_SIZE /* Room for two PTE table poiners, usually the kernel and current user * pointer to their respective root page table (pgdir). diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 1f7d84e2e8b2..05e804cdecaa 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -63,7 +63,7 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); *slot = bp; @@ -88,7 +88,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); if (*slot != bp) { WARN_ONCE(1, "Can't find the breakpoint"); @@ -226,7 +226,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __get_cpu_var(bp_per_reg); + bp = __this_cpu_read(bp_per_reg); if (!bp) goto out; info = counter_arch_bp(bp); diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index c0754bbf8118..05adc8bbdef8 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -18,9 +18,25 @@ #include <asm/hw_irq.h> #include <asm/kvm_book3s_asm.h> #include <asm/opal.h> +#include <asm/cpuidle.h> +#include <asm/mmu-hash64.h> #undef DEBUG +/* + * Use unused space in the interrupt stack to save and restore + * registers for winkle support. + */ +#define _SDR1 GPR3 +#define _RPR GPR4 +#define _SPURR GPR5 +#define _PURR GPR6 +#define _TSCR GPR7 +#define _DSCR GPR8 +#define _AMOR GPR9 +#define _WORT GPR10 +#define _WORC GPR11 + /* Idle state entry routines */ #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ @@ -37,8 +53,7 @@ /* * Pass requested state in r3: - * 0 - nap - * 1 - sleep + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE * * To check IRQ_HAPPENED in r4 * 0 - don't check @@ -101,18 +116,105 @@ _GLOBAL(power7_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) -_GLOBAL(power7_enter_nap_mode) + /* + * Go to real mode to do the nap, as required by the architecture. + * Also, we need to be in real mode before setting hwthread_state, + * because as soon as we do that, another thread can switch + * the MMU context to the guest. + */ + LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + li r6, MSR_RI + andc r6, r9, r6 + LOAD_REG_ADDR(r7, power7_enter_nap_mode) + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r5 + rfid + + .globl power7_enter_nap_mode +power7_enter_nap_mode: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP stb r4,HSTATE_HWTHREAD_STATE(r13) #endif - cmpwi cr0,r3,1 - beq 2f + stb r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr3,r3,PNV_THREAD_SLEEP + bge cr3,2f IDLE_STATE_ENTER_SEQ(PPC_NAP) /* No return */ -2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP) - /* No return */ +2: + /* Sleep or winkle */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop1: + lwarx r15,0,r14 + andc r15,r15,r7 /* Clear thread bit */ + + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + +/* + * If cr0 = 0, then current thread is the last thread of the core entering + * sleep. Last thread needs to execute the hardware bug workaround code if + * required by the platform. + * Make the workaround call unconditionally here. The below branch call is + * patched out when the idle states are discovered if the platform does not + * require it. + */ +.global pnv_fastsleep_workaround_at_entry +pnv_fastsleep_workaround_at_entry: + beq fastsleep_workaround_at_entry + + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + +common_enter: /* common code for all the threads entering sleep or winkle */ + bgt cr3,enter_winkle + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + +fastsleep_workaround_at_entry: + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + + /* Fast sleep workaround */ + li r3,1 + li r4,1 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + + /* Clear Lock bit */ + li r0,0 + lwsync + stw r0,0(r14) + b common_enter + +enter_winkle: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ @@ -125,48 +227,21 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 - li r3,0 + li r3,PNV_THREAD_NAP b power7_powersave_common /* No return */ _GLOBAL(power7_sleep) - li r3,1 + li r3,PNV_THREAD_SLEEP li r4,1 b power7_powersave_common /* No return */ -/* - * Make opal call in realmode. This is a generic function to be called - * from realmode from reset vector. It handles endianess. - * - * r13 - paca pointer - * r1 - stack pointer - * r3 - opal token - */ -opal_call_realmode: - mflr r12 - std r12,_LINK(r1) - ld r2,PACATOC(r13) - /* Set opal return address */ - LOAD_REG_ADDR(r0,return_from_opal_call) - mtlr r0 - /* Handle endian-ness */ - li r0,MSR_LE - mfmsr r12 - andc r12,r12,r0 - mtspr SPRN_HSRR1,r12 - mr r0,r3 /* Move opal token to r0 */ - LOAD_REG_ADDR(r11,opal) - ld r12,8(r11) - ld r2,0(r11) - mtspr SPRN_HSRR0,r12 - hrfid - -return_from_opal_call: - FIXUP_ENDIAN - ld r0,_LINK(r1) - mtlr r0 - blr +_GLOBAL(power7_winkle) + li r3,3 + li r4,1 + b power7_powersave_common + /* No return */ #define CHECK_HMI_INTERRUPT \ mfspr r0,SPRN_SRR1; \ @@ -181,7 +256,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r2,PACATOC(r13); \ ld r1,PACAR1(r13); \ std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ + li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ bl opal_call_realmode; \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; @@ -190,16 +265,190 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ _GLOBAL(power7_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) + /* + * Before entering any idle state, the NVGPRs are saved in the stack + * and they are restored before switching to the process context. Hence + * until they are restored, they are free to be used. + * + * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode + * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the + * wakeup reason if we branch to kvm_start_guest. + */ + mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop2: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + /* + * Lock bit is set in one of the 2 cases- + * a. In the sleep/winkle enter path, the last thread is executing + * fastsleep workaround code. + * b. In the wake up path, another thread is executing fastsleep + * workaround undo code or resyncing timebase or restoring context + * In either case loop until the lock bit is cleared. + */ + bne core_idle_lock_held + + cmpwi cr2,r15,0 + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi cr1,r4,0 /* Check if first in subcore */ + + /* + * At this stage + * cr1 - 0b0100 if first thread to wakeup in subcore + * cr2 - 0b0100 if first thread to wakeup in core + * cr3- 0b0010 if waking up from sleep or winkle + * cr4 - 0b0100 if waking up from winkle + */ + + or r15,r15,r7 /* Set thread bit */ + + beq cr1,first_thread_in_subcore + + /* Not first thread in subcore to wake up */ + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + b common_exit + +core_idle_lock_held: + HMT_LOW +core_idle_lock_loop: + lwz r15,0(14) + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bne core_idle_lock_loop + HMT_MEDIUM + b lwarx_loop2 + +first_thread_in_subcore: + /* First thread in subcore to wakeup */ + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + + /* + * If waking up from sleep, subcore state is not lost. Hence + * skip subcore state restore + */ + bne cr4,subcore_state_restored + + /* Restore per-subcore state */ + ld r4,_SDR1(r1) + mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 + ld r4,_AMOR(r1) + mtspr SPRN_AMOR,r4 + +subcore_state_restored: + /* + * Check if the thread is also the first thread in the core. If not, + * skip to clear_lock. + */ + bne cr2,clear_lock + +first_thread_in_core: + + /* + * First thread in the core waking up from fastsleep. It needs to + * call the fastsleep workaround code if the platform requires it. + * Call it unconditionally here. The below branch instruction will + * be patched out when the idle states are discovered if platform + * does not require workaround. + */ +.global pnv_fastsleep_workaround_at_exit +pnv_fastsleep_workaround_at_exit: + b fastsleep_workaround_at_exit + +timebase_resync: + /* Do timebase resync if we are waking up from sleep. Use cr3 value + * set in exceptions-64s.S */ + ble cr3,clear_lock /* Time base re-sync */ - li r3,OPAL_RESYNC_TIMEBASE + li r0,OPAL_RESYNC_TIMEBASE bl opal_call_realmode; - /* TODO: Check r3 for failure */ + /* + * If waking up from sleep, per core state is not lost, skip to + * clear_lock. + */ + bne cr4,clear_lock + + /* Restore per core state */ + ld r4,_TSCR(r1) + mtspr SPRN_TSCR,r4 + ld r4,_WORC(r1) + mtspr SPRN_WORC,r4 + +clear_lock: + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + lwsync + stw r15,0(r14) + +common_exit: + /* + * Common to all threads. + * + * If waking up from sleep, hypervisor state is not lost. Hence + * skip hypervisor state restore. + */ + bne cr4,hypervisor_state_restored + + /* Waking up from winkle */ + + /* Restore per thread state */ + bl __restore_cpu_power8 + + /* Restore SLB from PACA */ + ld r8,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r8,r8,16 + .endr + + ld r4,_SPURR(r1) + mtspr SPRN_SPURR,r4 + ld r4,_PURR(r1) + mtspr SPRN_PURR,r4 + ld r4,_DSCR(r1) + mtspr SPRN_DSCR,r4 + ld r4,_WORT(r1) + mtspr SPRN_WORT,r4 + +hypervisor_state_restored: + + li r5,PNV_THREAD_RUNNING + stb r5,PACA_THREAD_IDLE_STATE(r13) + + mtspr SPRN_SRR1,r16 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 6f + b kvm_start_guest +6: +#endif + REST_NVGPRS(r1) REST_GPR(2, r1) ld r3,_CCR(r1) @@ -212,6 +461,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) mtspr SPRN_SRR0,r5 rfid +fastsleep_workaround_at_exit: + li r3,1 + li r4,0 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + b timebase_resync + +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ _GLOBAL(power7_wakeup_loss) ld r1,PACAR1(r13) BEGIN_FTR_SECTION @@ -219,15 +479,19 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) REST_NVGPRS(r1) REST_GPR(2, r1) - ld r3,_CCR(r1) + ld r6,_CCR(r1) ld r4,_MSR(r1) ld r5,_NIP(r1) addi r1,r1,INT_FRAME_SIZE - mtcr r3 + mtcr r6 mtspr SPRN_SRR1,r4 mtspr SPRN_SRR0,r5 rfid +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ _GLOBAL(power7_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a10642a0d861..5d3968c4d799 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); @@ -428,10 +428,10 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, ppc_md.tce_flush(tbl); } -int iommu_map_sg(struct device *dev, struct iommu_table *tbl, - struct scatterlist *sglist, int nelems, - unsigned long mask, enum dma_data_direction direction, - struct dma_attrs *attrs) +int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, + struct scatterlist *sglist, int nelems, + unsigned long mask, enum dma_data_direction direction, + struct dma_attrs *attrs) { dma_addr_t dma_next = 0, dma_addr; struct scatterlist *s, *outs, *segstart; @@ -539,7 +539,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, DBG("mapped %d elements:\n", outcount); - /* For the sake of iommu_unmap_sg, we clear out the length in the + /* For the sake of ppc_iommu_unmap_sg, we clear out the length in the * next entry of the sglist if we didn't fill the list completely */ if (outcount < incount) { @@ -572,9 +572,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, } -void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - struct dma_attrs *attrs) +void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct scatterlist *sg; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c14383575fe8..45096033d37b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -50,7 +50,6 @@ #include <linux/list.h> #include <linux/radix-tree.h> #include <linux/mutex.h> -#include <linux/bootmem.h> #include <linux/pci.h> #include <linux/debugfs.h> #include <linux/of.h> @@ -114,7 +113,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); return now >= *next_tb; } @@ -499,7 +498,7 @@ void __do_irq(struct pt_regs *regs) /* And finally process it */ if (unlikely(irq == NO_IRQ)) - __get_cpu_var(irq_stat).spurious_irqs++; + __this_cpu_inc(irq_stat.spurious_irqs); else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 8504657379f1..e77c3ccf8dcf 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -155,7 +155,7 @@ static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; struct thread_info *backup_current_thread_info = - &__get_cpu_var(kgdb_thread_info); + this_cpu_ptr(&kgdb_thread_info); if (user_mode(regs)) return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2f72af82513c..7c053f281406 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -119,7 +119,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } @@ -127,7 +127,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_msr = regs->msr; } @@ -192,7 +192,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 879b3aacac32..f96d1ec24189 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -330,7 +330,7 @@ void default_machine_kexec(struct kimage *image) * using debugger IPI. */ - if (crashing_cpu == -1) + if (!kdump_in_progress()) kexec_prepare_cpus(); pr_debug("kexec: Starting switchover sequence.\n"); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a7fd4cb78b78..15c99b649b04 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,8 +73,8 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __get_cpu_var(mce_nest_count)++; - struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + int index = __this_cpu_inc_return(mce_nest_count); + struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); /* * Return if we don't have enough space to log mce event. @@ -143,7 +143,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __get_cpu_var(mce_nest_count) - 1; + int index = __this_cpu_read(mce_nest_count) - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -153,7 +153,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = &__get_cpu_var(mce_event[index]); + mc_evt = this_cpu_ptr(&mce_event[index]); /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -163,7 +163,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __get_cpu_var(mce_nest_count)--; + __this_cpu_dec(mce_nest_count); return ret; } @@ -184,13 +184,13 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __get_cpu_var(mce_queue_count)++; + index = __this_cpu_inc_return(mce_queue_count); /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __get_cpu_var(mce_queue_count)--; + __this_cpu_dec(mce_queue_count); return; } - __get_cpu_var(mce_event_queue[index]) = evt; + memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -208,11 +208,11 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__get_cpu_var(mce_queue_count) > 0) { - index = __get_cpu_var(mce_queue_count) - 1; + while (__this_cpu_read(mce_queue_count) > 0) { + index = __this_cpu_read(mce_queue_count) - 1; machine_check_print_event_info( - &__get_cpu_var(mce_event_queue[index])); - __get_cpu_var(mce_queue_count)--; + this_cpu_ptr(&mce_event_queue[index])); + __this_cpu_dec(mce_queue_count); } } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index aa9aff3d6ad3..b6f123ab90ed 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -79,7 +79,7 @@ static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) } if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { if (cur_cpu_spec && cur_cpu_spec->flush_tlb) - cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET); /* reset error bits */ dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; } @@ -110,7 +110,7 @@ static long mce_handle_common_ierror(uint64_t srr1) break; case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { - cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET); handled = 1; } break; diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index f87bc1b4bdda..2f35a72642c6 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -110,7 +110,6 @@ static struct platform_driver of_pci_phb_driver = { .probe = of_pci_phb_probe, .driver = { .name = "of-pci", - .owner = THIS_MODULE, .of_match_table = of_pci_phb_ids, }, }; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e5dad9a9edc0..37d512d35943 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -20,7 +20,6 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/of_address.h> @@ -1464,7 +1463,7 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, res = &hose->io_resource; if (!res->flags) { - printk(KERN_WARNING "PCI: I/O resource not set for host" + pr_info("PCI: I/O resource not set for host" " bridge %s (domain %d)\n", hose->dn->full_name, hose->global_number); } else { diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 432459c817fa..1f7930037cb7 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -199,9 +199,7 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256); - if (!of_prop) - return; + of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 155013da27e0..60bb187cb46a 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -17,7 +17,6 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/list.h> @@ -266,13 +265,3 @@ int pcibus_to_node(struct pci_bus *bus) } EXPORT_SYMBOL(pcibus_to_node); #endif - -static void quirk_radeon_32bit_msi(struct pci_dev *dev) -{ - struct pci_dn *pdn = pci_get_pdn(dev); - - if (pdn) - pdn->force_32bit_msi = true; -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x68f2, quirk_radeon_32bit_msi); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0xaa68, quirk_radeon_32bit_msi); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 923cd2daba89..b4cc7bef6b16 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -37,9 +37,9 @@ #include <linux/personality.h> #include <linux/random.h> #include <linux/hw_breakpoint.h> +#include <linux/uaccess.h> #include <asm/pgtable.h> -#include <asm/uaccess.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/mmu.h> @@ -499,7 +499,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) void __set_breakpoint(struct arch_hw_breakpoint *brk) { - __get_cpu_var(current_brk) = *brk; + memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); if (cpu_has_feature(CPU_FTR_DAWR)) set_dawr(brk); @@ -842,7 +842,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) + if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -856,7 +856,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Collect processor utilization data per process */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; cu->current_tb = current_tb = mfspr(SPRN_PURR); @@ -866,7 +866,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_BOOK3S_64 - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; if (batch->index) @@ -889,7 +889,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -921,12 +921,8 @@ static void show_instructions(struct pt_regs *regs) pc = (unsigned long)phys_to_virt(pc); #endif - /* We use __get_user here *only* to avoid an OOPS on a - * bad address because the pc *should* only be a - * kernel address. - */ if (!__kernel_text_address(pc) || - __get_user(instr, (unsigned int __user *)pc)) { + probe_kernel_address((unsigned int __user *)pc, instr)) { printk(KERN_CONT "XXXXXXXX "); } else { if (regs->nip == pc) @@ -1531,13 +1527,6 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) int curr_frame = current->curr_ret_stack; extern void return_to_handler(void); unsigned long rth = (unsigned long)return_to_handler; - unsigned long mrth = -1; -#ifdef CONFIG_PPC64 - extern void mod_return_to_handler(void); - rth = *(unsigned long *)rth; - mrth = (unsigned long)mod_return_to_handler; - mrth = *(unsigned long *)mrth; -#endif #endif sp = (unsigned long) stack; @@ -1562,7 +1551,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) if (!firstframe || ip != lr) { printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if ((ip == rth || ip == mrth) && curr_frame >= 0) { + if ((ip == rth) && curr_frame >= 0) { printk(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; @@ -1665,12 +1654,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) return ret; } -unsigned long randomize_et_dyn(unsigned long base) -{ - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < base) - return base; - - return ret; -} diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 099f27e6d1b0..6a799b3cc6b4 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -160,6 +160,12 @@ static struct ibm_pa_feature { {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + /* + * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), + * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP + * which is 0 if the kernel doesn't support TM. + */ + {CPU_FTR_TM_COMP, 0, 0, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -696,10 +702,7 @@ void __init early_init_devtree(void *params) reserve_crashkernel(); early_reserve_mem(); - /* - * Ensure that total memory size is page-aligned, because otherwise - * mark_bootmem() gets upset. - */ + /* Ensure that total memory size is page-aligned. */ limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); memblock_enforce_memory_limit(limit); diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 8777fb02349f..fb2fb3ea85e5 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -113,17 +113,6 @@ #define SENSOR_PREFIX "ibm,sensor-" #define cel_to_fahr(x) ((x*9/5)+32) - -/* Globals */ -static struct rtas_sensors sensors; -static struct device_node *rtas_node = NULL; -static unsigned long power_on_time = 0; /* Save the time the user set */ -static char progress_led[MAX_LINELENGTH]; - -static unsigned long rtas_tone_frequency = 1000; -static unsigned long rtas_tone_volume = 0; - -/* ****************STRUCTS******************************************* */ struct individual_sensor { unsigned int token; unsigned int quant; @@ -134,6 +123,15 @@ struct rtas_sensors { unsigned int quant; }; +/* Globals */ +static struct rtas_sensors sensors; +static struct device_node *rtas_node = NULL; +static unsigned long power_on_time = 0; /* Save the time the user set */ +static char progress_led[MAX_LINELENGTH]; + +static unsigned long rtas_tone_frequency = 1000; +static unsigned long rtas_tone_volume = 0; + /* ****************************************************************** */ /* Declarations */ static int ppc_rtas_sensors_show(struct seq_file *m, void *v); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8b4c857c1421..4af905e81ab0 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1091,8 +1091,8 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) } /* - * Call early during boot, before mem init or bootmem, to retrieve the RTAS - * informations from the device-tree and allocate the RMO buffer for userland + * Call early during boot, before mem init, to retrieve the RTAS + * information from the device-tree and allocate the RMO buffer for userland * accesses. */ void __init rtas_initialize(void) diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 7c55b86206b3..ce230da2c015 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -26,7 +26,6 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <asm/io.h> #include <asm/pgtable.h> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1362cd62b3fa..44c8d03558ac 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -139,8 +139,8 @@ void machine_restart(char *cmd) void machine_power_off(void) { machine_shutdown(); - if (ppc_md.power_off) - ppc_md.power_off(); + if (pm_power_off) + pm_power_off(); #ifdef CONFIG_SMP smp_send_stop(); #endif @@ -151,7 +151,7 @@ void machine_power_off(void) /* Used by the G5 thermal driver */ EXPORT_SYMBOL_GPL(machine_power_off); -void (*pm_power_off)(void) = machine_power_off; +void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off); void machine_halt(void) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 07831ed0d9ef..bb02e9f6944e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -11,7 +11,6 @@ #include <linux/delay.h> #include <linux/initrd.h> #include <linux/tty.h> -#include <linux/bootmem.h> #include <linux/seq_file.h> #include <linux/root_dev.h> #include <linux/cpu.h> @@ -53,11 +52,6 @@ unsigned long ISA_DMA_THRESHOLD; unsigned int DMA_MODE_READ; unsigned int DMA_MODE_WRITE; -#ifdef CONFIG_VGA_CONSOLE -unsigned long vgacon_remap_base; -EXPORT_SYMBOL(vgacon_remap_base); -#endif - /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. @@ -311,9 +305,8 @@ void __init setup_arch(char **cmdline_p) irqstack_early_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); - if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); + initmem_init(); + if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4f3cfe1b6a33..49f553bbb360 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -660,13 +660,11 @@ static void __init emergency_stack_init(void) } /* - * Called into from start_kernel this initializes bootmem, which is used + * Called into from start_kernel this initializes memblock, which is used * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) { - ppc64_boot_msg(0x12, "Setup Arch"); - *cmdline_p = boot_command_line; /* @@ -691,9 +689,7 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); - /* set up the bootmem stuff with available memory */ - do_init_bootmem(); - sparse_init(); + initmem_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; @@ -711,33 +707,6 @@ void __init setup_arch(char **cmdline_p) if ((unsigned long)_stext & 0xffff) panic("Kernelbase not 64K-aligned (0x%lx)!\n", (unsigned long)_stext); - - ppc64_boot_msg(0x15, "Setup Done"); -} - - -/* ToDo: do something useful if ppc_md is not yet setup. */ -#define PPC64_LINUX_FUNCTION 0x0f000000 -#define PPC64_IPL_MESSAGE 0xc0000000 -#define PPC64_TERM_MESSAGE 0xb0000000 - -static void ppc64_do_msg(unsigned int src, const char *msg) -{ - if (ppc_md.progress) { - char buf[128]; - - sprintf(buf, "%08X\n", src); - ppc_md.progress(buf, 0); - snprintf(buf, 128, "%s", msg); - ppc_md.progress(buf, 0); - } -} - -/* Print a boot progress message. */ -void ppc64_boot_msg(unsigned int src, const char *msg) -{ - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg); - printk("[boot]%04x %s\n", src, msg); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 71e186d5f331..8b2d2dc8ef10 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -243,7 +243,7 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = &__get_cpu_var(ipi_message); + struct cpu_messages *info = this_cpu_ptr(&ipi_message); unsigned int all; mb(); /* order any irq clear */ @@ -442,9 +442,9 @@ void generic_mach_cpu_die(void) idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); - __get_cpu_var(cpu_state) = CPU_DEAD; + __this_cpu_write(cpu_state, CPU_DEAD); smp_wmb(); - while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE) cpu_relax(); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 67fd2fd2620a..fa1fd8a0c867 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -394,10 +394,10 @@ void ppc_enable_pmcs(void) ppc_set_pmu_inuse(1); /* Only need to enable them once */ - if (__get_cpu_var(pmcs_enabled)) + if (__this_cpu_read(pmcs_enabled)) return; - __get_cpu_var(pmcs_enabled) = 1; + __this_cpu_write(pmcs_enabled, 1); if (ppc_md.enable_pmcs) ppc_md.enable_pmcs(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7505599c2593..fa7c4f12104f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,9 +458,9 @@ static inline void clear_irq_work_pending(void) DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 -#define test_irq_work_pending() __get_cpu_var(irq_work_pending) -#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 +#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) +#define test_irq_work_pending() __this_cpu_read(irq_work_pending) +#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) #endif /* 32 vs 64 bit */ @@ -482,8 +482,8 @@ void arch_irq_work_raise(void) static void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 now; trace_timer_interrupt_entry(regs); @@ -498,7 +498,7 @@ static void __timer_interrupt(void) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); - __get_cpu_var(irq_stat).timer_irqs_event++; + __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) @@ -506,13 +506,13 @@ static void __timer_interrupt(void) /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); - __get_cpu_var(irq_stat).timer_irqs_others++; + __this_cpu_inc(irq_stat.timer_irqs_others); } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } #endif @@ -527,7 +527,7 @@ static void __timer_interrupt(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -813,7 +813,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; + __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); set_dec(evt); /* We may have raced with new irq work */ @@ -833,7 +833,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, /* Interrupt handler for the timer broadcast IPI */ void tick_broadcast_ipi_handler(void) { - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); *next_tb = get_tb_or_rtc(); __timer_interrupt(); @@ -989,6 +989,7 @@ void GregorianDay(struct rtc_time * tm) tm->tm_wday = day % 7; } +EXPORT_SYMBOL_GPL(GregorianDay); void to_tm(int tim, struct rtc_time * tm) { diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0dc43f9932cf..e6595b72269b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,7 +295,7 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); @@ -304,7 +304,7 @@ long machine_check_early(struct pt_regs *regs) long hmi_exception_realmode(struct pt_regs *regs) { - __get_cpu_var(irq_stat).hmi_exceptions++; + __this_cpu_inc(irq_stat.hmi_exceptions); if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); @@ -700,7 +700,7 @@ void machine_check_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); int recover = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU @@ -1519,7 +1519,7 @@ void vsx_unavailable_tm(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { - __get_cpu_var(irq_stat).pmu_irqs++; + __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); } diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 6e7c4923b5ea..411116c38da4 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -69,8 +69,12 @@ static void udbg_uart_putc(char c) static int udbg_uart_getc_poll(void) { - if (!udbg_uart_in || !(udbg_uart_in(UART_LSR) & LSR_DR)) + if (!udbg_uart_in) + return -1; + + if (!(udbg_uart_in(UART_LSR) & LSR_DR)) return udbg_uart_in(UART_RBR); + return -1; } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f174351842cf..305eb0d9b768 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -20,7 +20,6 @@ #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> -#include <linux/bootmem.h> #include <linux/memblock.h> #include <asm/pgtable.h> diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index 23eb9a9441bd..c62be60c7274 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -30,8 +30,8 @@ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_SPRG_VDSO_READ - cmpdi cr0,r3,0 - cmpdi cr1,r4,0 + cmpwi cr0,r3,0 + cmpwi cr1,r4,0 clrlwi r6,r5,16 rlwinm r7,r5,16,31-15,31-0 beq cr0,1f diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 602eb51d20bc..f5769f19ae25 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -172,6 +172,7 @@ config KVM_XICS depends on KVM_BOOK3S_64 && !KVM_MPIC select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD + default y ---help--- Include support for the XICS (eXternal Interrupt Controller Specification) interrupt controller architecture used on diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index b32db4b95361..888bf466d8c6 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ -} - void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) { if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index cd0b0730e29e..a2eb6d354a57 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw) return (sr_raw & 0x20000000) ? true: false; } -static inline bool sr_nx(u32 sr_raw) -{ - return (sr_raw & 0x10000000) ? true: false; -} - static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data, bool iswrite); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d40770248b6a..534acb3c6c3d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,8 +37,7 @@ #include <asm/ppc-opcode.h> #include <asm/cputable.h> -/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ -#define MAX_LPID_970 63 +#include "trace_hv.h" /* Power architecture requires HPT is at least 256kB */ #define PPC_MIN_HPT_ORDER 18 @@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void) if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; - /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) { - host_lpid = mfspr(SPRN_LPID); /* POWER7 */ - rsvd_lpid = LPID_RSVD; - } else { - host_lpid = 0; /* PPC970 */ - rsvd_lpid = MAX_LPID_970; - } + /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ + host_lpid = mfspr(SPRN_LPID); + rsvd_lpid = LPID_RSVD; kvmppc_init_lpid(rsvd_lpid + 1); @@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, msr); } -/* - * This is called to get a reference to a guest page if there isn't - * one already in the memslot->arch.slot_phys[] array. - */ -static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, - struct kvm_memory_slot *memslot, - unsigned long psize) -{ - unsigned long start; - long np, err; - struct page *page, *hpage, *pages[1]; - unsigned long s, pgsize; - unsigned long *physp; - unsigned int is_io, got, pgorder; - struct vm_area_struct *vma; - unsigned long pfn, i, npages; - - physp = memslot->arch.slot_phys; - if (!physp) - return -EINVAL; - if (physp[gfn - memslot->base_gfn]) - return 0; - - is_io = 0; - got = 0; - page = NULL; - pgsize = psize; - err = -EINVAL; - start = gfn_to_hva_memslot(memslot, gfn); - - /* Instantiate and get the page we want access to */ - np = get_user_pages_fast(start, 1, 1, pages); - if (np != 1) { - /* Look up the vma for the page */ - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, start); - if (!vma || vma->vm_start > start || - start + psize > vma->vm_end || - !(vma->vm_flags & VM_PFNMAP)) - goto up_err; - is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); - pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); - /* check alignment of pfn vs. requested page size */ - if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) - goto up_err; - up_read(¤t->mm->mmap_sem); - - } else { - page = pages[0]; - got = KVMPPC_GOT_PAGE; - - /* See if this is a large page */ - s = PAGE_SIZE; - if (PageHuge(page)) { - hpage = compound_head(page); - s <<= compound_order(hpage); - /* Get the whole large page if slot alignment is ok */ - if (s > psize && slot_is_aligned(memslot, s) && - !(memslot->userspace_addr & (s - 1))) { - start &= ~(s - 1); - pgsize = s; - get_page(hpage); - put_page(page); - page = hpage; - } - } - if (s < psize) - goto out; - pfn = page_to_pfn(page); - } - - npages = pgsize >> PAGE_SHIFT; - pgorder = __ilog2(npages); - physp += (gfn - memslot->base_gfn) & ~(npages - 1); - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) { - if (!physp[i]) { - physp[i] = ((pfn + i) << PAGE_SHIFT) + - got + is_io + pgorder; - got = 0; - } - } - spin_unlock(&kvm->arch.slot_phys_lock); - err = 0; - - out: - if (got) - put_page(page); - return err; - - up_err: - up_read(¤t->mm->mmap_sem); - return err; -} - long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) { - unsigned long psize, gpa, gfn; - struct kvm_memory_slot *memslot; long ret; - if (kvm->arch.using_mmu_notifiers) - goto do_insert; - - psize = hpte_page_size(pteh, ptel); - if (!psize) - return H_PARAMETER; - - pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); - - /* Find the memslot (if any) for this address */ - gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); - gfn = gpa >> PAGE_SHIFT; - memslot = gfn_to_memslot(kvm, gfn); - if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { - if (!slot_is_aligned(memslot, psize)) - return H_PARAMETER; - if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) - return H_PARAMETER; - } - - do_insert: /* Protect linux PTE lookup from page table destruction */ rcu_read_lock_sched(); /* this disables preemption too */ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, @@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, } -/* - * We come here on a H_ENTER call from the guest when we are not - * using mmu notifiers and we don't have the requested page pinned - * already. - */ -long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, - unsigned long ptel) -{ - return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index, - pteh, ptel, &vcpu->arch.gpr[4]); -} - static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, gva_t eaddr) { @@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); /* Storage key permission check for POWER7 */ - if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { + if (data && virtmode) { int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); if (amrfield & 1) gpte->may_read = 0; @@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, gfn = gpa >> PAGE_SHIFT; memslot = gfn_to_memslot(kvm, gfn); + trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr); + /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, dsisr & DSISR_ISSTORE); - if (!kvm->arch.using_mmu_notifiers) - return -EFAULT; /* should never get here */ - /* * This should never happen, because of the slot_is_aligned() * check in kvmppc_do_h_enter(). @@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); + ret = -EFAULT; is_io = 0; pfn = 0; page = NULL; @@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } up_read(¤t->mm->mmap_sem); if (!pfn) - return -EFAULT; + goto out_put; } else { page = pages[0]; pfn = page_to_pfn(page); @@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - ret = -EFAULT; if (psize > pte_size) goto out_put; /* Check WIMG vs. the actual page we're accessing */ if (!hpte_cache_flags_ok(r, is_io)) { if (is_io) - return -EFAULT; + goto out_put; + /* * Allow guest to map emulated device memory as * uncacheable, but actually make it cacheable. @@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, SetPageDirty(page); out_put: + trace_kvm_page_fault_exit(vcpu, hpte, ret); + if (page) { /* * We drop pages[0] here, not page because page might @@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel); if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { - if (kvm->arch.using_mmu_notifiers) - hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); + hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); @@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) { - if (kvm->arch.using_mmu_notifiers) - kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); + kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); return 0; } int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) { - if (kvm->arch.using_mmu_notifiers) - kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); + kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); return 0; } @@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.using_mmu_notifiers) - return 0; return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); } @@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) { - if (!kvm->arch.using_mmu_notifiers) - return 0; return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); } void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) { - if (!kvm->arch.using_mmu_notifiers) - return; kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); } @@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) } /* Now check and modify the HPTE */ - if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) + if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) { + /* unlock and continue */ + hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); continue; + } /* need to make it temporarily absent so C is stable */ hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); @@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, struct page *page, *pages[1]; int npages; unsigned long hva, offset; - unsigned long pa; - unsigned long *physp; int srcu_idx; srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; - if (!kvm->arch.using_mmu_notifiers) { - physp = memslot->arch.slot_phys; - if (!physp) - goto err; - physp += gfn - memslot->base_gfn; - pa = *physp; - if (!pa) { - if (kvmppc_get_guest_page(kvm, gfn, memslot, - PAGE_SIZE) < 0) - goto err; - pa = *physp; - } - page = pfn_to_page(pa >> PAGE_SHIFT); - get_page(page); - } else { - hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); - if (npages < 1) - goto err; - page = pages[0]; - } + hva = gfn_to_hva_memslot(memslot, gfn); + npages = get_user_pages_fast(hva, 1, 1, pages); + if (npages < 1) + goto err; + page = pages[0]; srcu_read_unlock(&kvm->srcu, srcu_idx); offset = gpa & (PAGE_SIZE - 1); @@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, put_page(page); - if (!dirty || !kvm->arch.using_mmu_notifiers) + if (!dirty) return; /* We need to mark this page dirty in the rmap chain */ @@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); lbuf = (unsigned long __user *)buf; for (j = 0; j < hdr.n_valid; ++j) { + __be64 hpte_v; + __be64 hpte_r; + err = -EFAULT; - if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) + if (__get_user(hpte_v, lbuf) || + __get_user(hpte_r, lbuf + 1)) goto out; + v = be64_to_cpu(hpte_v); + r = be64_to_cpu(hpte_r); err = -EINVAL; if (!(v & HPTE_V_VALID)) goto out; @@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; - if (cpu_has_feature(CPU_FTR_ARCH_206)) - vcpu->arch.slb_nr = 32; /* POWER7 */ - else - vcpu->arch.slb_nr = 64; + vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e63587d30b70..de4018a1bc4b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -58,6 +58,9 @@ #include "book3s.h" +#define CREATE_TRACE_POINTS +#include "trace_hv.h" + /* #define EXIT_DEBUG */ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ @@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * stolen. * * Updates to busy_stolen are protected by arch.tbacct_lock; - * updates to vc->stolen_tb are protected by the arch.tbacct_lock - * of the vcpu that has taken responsibility for running the vcore - * (i.e. vc->runner). The stolen times are measured in units of - * timebase ticks. (Note that the != TB_NIL checks below are - * purely defensive; they should never fail.) + * updates to vc->stolen_tb are protected by the vcore->stoltb_lock + * lock. The stolen times are measured in units of timebase ticks. + * (Note that the != TB_NIL checks below are purely defensive; + * they should never fail.) */ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) @@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && - vc->preempt_tb != TB_NIL) { - vc->stolen_tb += mftb() - vc->preempt_tb; - vc->preempt_tb = TB_NIL; + /* + * We can test vc->runner without taking the vcore lock, + * because only this task ever sets vc->runner to this + * vcpu, and once it is set to this vcpu, only this task + * ever sets it to NULL. + */ + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { + spin_lock_irqsave(&vc->stoltb_lock, flags); + if (vc->preempt_tb != TB_NIL) { + vc->stolen_tb += mftb() - vc->preempt_tb; + vc->preempt_tb = TB_NIL; + } + spin_unlock_irqrestore(&vc->stoltb_lock, flags); } + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && vcpu->arch.busy_preempt != TB_NIL) { vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; @@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { + spin_lock_irqsave(&vc->stoltb_lock, flags); vc->preempt_tb = mftb(); + spin_unlock_irqrestore(&vc->stoltb_lock, flags); + } + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) vcpu->arch.busy_preempt = mftb(); spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); @@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) struct kvmppc_vcore *vc = vcpu->arch.vcore; if (arch_compat) { - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - return -EINVAL; /* 970 has no compat mode support */ - switch (arch_compat) { case PVR_ARCH_205: /* @@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) { u64 p; + unsigned long flags; - /* - * If we are the task running the vcore, then since we hold - * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb - * can't be updated, so we don't need the tbacct_lock. - * If the vcore is inactive, it can't become active (since we - * hold the vcore lock), so the vcpu load/put functions won't - * update stolen_tb/preempt_tb, and we don't need tbacct_lock. - */ + spin_lock_irqsave(&vc->stoltb_lock, flags); + p = vc->stolen_tb; if (vc->vcore_state != VCORE_INACTIVE && - vc->runner->arch.run_task != current) { - spin_lock_irq(&vc->runner->arch.tbacct_lock); - p = vc->stolen_tb; - if (vc->preempt_tb != TB_NIL) - p += now - vc->preempt_tb; - spin_unlock_irq(&vc->runner->arch.tbacct_lock); - } else { - p = vc->stolen_tb; - } + vc->preempt_tb != TB_NIL) + p += now - vc->preempt_tb; + spin_unlock_irqrestore(&vc->stoltb_lock, flags); return p; } @@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, } } +static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) +{ + struct kvmppc_vcore *vcore = target->arch.vcore; + + /* + * We expect to have been called by the real mode handler + * (kvmppc_rm_h_confer()) which would have directly returned + * H_SUCCESS if the source vcore wasn't idle (e.g. if it may + * have useful work to do and should not confer) so we don't + * recheck that here. + */ + + spin_lock(&vcore->lock); + if (target->arch.state == KVMPPC_VCPU_RUNNABLE && + vcore->vcore_state != VCORE_INACTIVE) + target = vcore->runner; + spin_unlock(&vcore->lock); + + return kvm_vcpu_yield_to(target); +} + +static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) +{ + int yield_count = 0; + struct lppaca *lppaca; + + spin_lock(&vcpu->arch.vpa_update_lock); + lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr; + if (lppaca) + yield_count = lppaca->yield_count; + spin_unlock(&vcpu->arch.vpa_update_lock); + return yield_count; +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; + int yield_count; struct kvm_vcpu *tvcpu; int idx, rc; @@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_HOST; switch (req) { - case H_ENTER: - idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), - kvmppc_get_gpr(vcpu, 5), - kvmppc_get_gpr(vcpu, 6), - kvmppc_get_gpr(vcpu, 7)); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - break; case H_CEDE: break; case H_PROD: @@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = H_PARAMETER; break; } - kvm_vcpu_yield_to(tvcpu); + yield_count = kvmppc_get_gpr(vcpu, 5); + if (kvmppc_get_yield_count(tvcpu) != yield_count) + break; + kvm_arch_vcpu_yield_to(tvcpu); break; case H_REGISTER_VPA: ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), @@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/ + case BOOK3S_INTERRUPT_HMI: case BOOK3S_INTERRUPT_PERFMON: r = RESUME_GUEST; break; @@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, * Accordingly return to Guest or Host. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: + if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED) + vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ? + swab32(vcpu->arch.emul_inst) : + vcpu->arch.emul_inst; if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { r = kvmppc_emulate_debug_inst(run, vcpu); } else { @@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); + spin_lock_init(&vcore->stoltb_lock); init_waitqueue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; @@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->n_woken = 0; vc->nap_count = 0; vc->entry_exit_count = 0; + vc->preempt_tb = TB_NIL; vc->vcore_state = VCORE_STARTING; vc->in_guest = 0; vc->napping_threads = 0; + vc->conferring_threads = 0; /* * Updating any of the vpas requires calling kvmppc_pin_guest_page, @@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { kvmppc_start_thread(vcpu); kvmppc_create_dtl_entry(vcpu, vc); + trace_kvm_guest_enter(vcpu); } /* Set this explicitly in case thread 0 doesn't have a vcpu */ @@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_RUNNING; preempt_disable(); + + trace_kvmppc_run_core(vc, 0); + spin_unlock(&vc->lock); kvm_guest_enter(); @@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_core_pending_dec(vcpu)) kvmppc_core_dequeue_dec(vcpu); + trace_kvm_guest_exit(vcpu); + ret = RESUME_GUEST; if (vcpu->arch.trap) ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, @@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) wake_up(&vcpu->arch.cpu_run); } } + + trace_kvmppc_run_core(vc, 1); } /* @@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) */ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { + struct kvm_vcpu *vcpu; + int do_sleep = 1; + DEFINE_WAIT(wait); prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + + /* + * Check one last time for pending exceptions and ceded state after + * we put ourselves on the wait queue + */ + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { + do_sleep = 0; + break; + } + } + + if (!do_sleep) { + finish_wait(&vc->wq, &wait); + return; + } + vc->vcore_state = VCORE_SLEEPING; + trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); finish_wait(&vc->wq, &wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; + trace_kvmppc_vcore_blocked(vc, 1); } static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) @@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) struct kvmppc_vcore *vc; struct kvm_vcpu *v, *vn; + trace_kvmppc_run_vcpu_enter(vcpu); + kvm_run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; @@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) VCORE_EXIT_COUNT(vc) == 0) { kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu); + trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { wake_up(&vc->wq); } @@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) wake_up(&v->arch.cpu_run); } + trace_kvmppc_run_vcpu_exit(vcpu, kvm_run); spin_unlock(&vc->lock); return vcpu->arch.ret; } @@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ smp_mb(); - /* On the first time here, set up HTAB and VRMA or RMA */ + /* On the first time here, set up HTAB and VRMA */ if (!vcpu->kvm->arch.rma_setup_done) { r = kvmppc_hv_setup_htab_rma(vcpu); if (r) @@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_PAPR_HCALL && !(vcpu->arch.shregs.msr & MSR_PR)) { + trace_kvm_hcall_enter(vcpu); r = kvmppc_pseries_do_hcall(vcpu); + trace_kvm_hcall_exit(vcpu, r); kvmppc_core_prepare_to_enter(vcpu); } else if (r == RESUME_PAGE_FAULT) { srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } - -/* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7 or PPC970. */ -static inline int lpcr_rmls(unsigned long rma_size) -{ - switch (rma_size) { - case 32ul << 20: /* 32 MB */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return 8; /* only supported on POWER7 */ - return -1; - case 64ul << 20: /* 64 MB */ - return 3; - case 128ul << 20: /* 128 MB */ - return 7; - case 256ul << 20: /* 256 MB */ - return 4; - case 1ul << 30: /* 1 GB */ - return 2; - case 16ul << 30: /* 16 GB */ - return 1; - case 256ul << 30: /* 256 GB */ - return 0; - default: - return -1; - } -} - -static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *page; - struct kvm_rma_info *ri = vma->vm_file->private_data; - - if (vmf->pgoff >= kvm_rma_pages) - return VM_FAULT_SIGBUS; - - page = pfn_to_page(ri->base_pfn + vmf->pgoff); - get_page(page); - vmf->page = page; - return 0; -} - -static const struct vm_operations_struct kvm_rma_vm_ops = { - .fault = kvm_rma_fault, -}; - -static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - vma->vm_ops = &kvm_rma_vm_ops; - return 0; -} - -static int kvm_rma_release(struct inode *inode, struct file *filp) -{ - struct kvm_rma_info *ri = filp->private_data; - - kvm_release_rma(ri); - return 0; -} - -static const struct file_operations kvm_rma_fops = { - .mmap = kvm_rma_mmap, - .release = kvm_rma_release, -}; - -static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, - struct kvm_allocate_rma *ret) -{ - long fd; - struct kvm_rma_info *ri; - /* - * Only do this on PPC970 in HV mode - */ - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_201)) - return -EINVAL; - - if (!kvm_rma_pages) - return -EINVAL; - - ri = kvm_alloc_rma(); - if (!ri) - return -ENOMEM; - - fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC); - if (fd < 0) - kvm_release_rma(ri); - - ret->rma_size = kvm_rma_pages << PAGE_SHIFT; - return fd; -} - static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, int linux_psize) { @@ -2167,26 +2150,6 @@ out: return r; } -static void unpin_slot(struct kvm_memory_slot *memslot) -{ - unsigned long *physp; - unsigned long j, npages, pfn; - struct page *page; - - physp = memslot->arch.slot_phys; - npages = memslot->npages; - if (!physp) - return; - for (j = 0; j < npages; j++) { - if (!(physp[j] & KVMPPC_GOT_PAGE)) - continue; - pfn = physp[j] >> PAGE_SHIFT; - page = pfn_to_page(pfn); - SetPageDirty(page); - put_page(page); - } -} - static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { @@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, vfree(free->arch.rmap); free->arch.rmap = NULL; } - if (!dont || free->arch.slot_phys != dont->arch.slot_phys) { - unpin_slot(free); - vfree(free->arch.slot_phys); - free->arch.slot_phys = NULL; - } } static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, @@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); if (!slot->arch.rmap) return -ENOMEM; - slot->arch.slot_phys = NULL; return 0; } @@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { - unsigned long *phys; - - /* Allocate a slot_phys array if needed */ - phys = memslot->arch.slot_phys; - if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) { - phys = vzalloc(memslot->npages * sizeof(unsigned long)); - if (!phys) - return -ENOMEM; - memslot->arch.slot_phys = phys; - } - return 0; } @@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; struct kvm *kvm = vcpu->kvm; - struct kvm_rma_info *ri = NULL; unsigned long hva; struct kvm_memory_slot *memslot; struct vm_area_struct *vma; unsigned long lpcr = 0, senc; - unsigned long lpcr_mask = 0; unsigned long psize, porder; - unsigned long rma_size; - unsigned long rmls; - unsigned long *physp; - unsigned long i, npages; int srcu_idx; mutex_lock(&kvm->lock); @@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) psize = vma_kernel_pagesize(vma); porder = __ilog2(psize); - /* Is this one of our preallocated RMAs? */ - if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && - hva == vma->vm_start) - ri = vma->vm_file->private_data; - up_read(¤t->mm->mmap_sem); - if (!ri) { - /* On POWER7, use VRMA; on PPC970, give up */ - err = -EPERM; - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - pr_err("KVM: CPU requires an RMO\n"); - goto out_srcu; - } + /* We can handle 4k, 64k or 16M pages in the VRMA */ + err = -EINVAL; + if (!(psize == 0x1000 || psize == 0x10000 || + psize == 0x1000000)) + goto out_srcu; - /* We can handle 4k, 64k or 16M pages in the VRMA */ - err = -EINVAL; - if (!(psize == 0x1000 || psize == 0x10000 || - psize == 0x1000000)) - goto out_srcu; + /* Update VRMASD field in the LPCR */ + senc = slb_pgsize_encoding(psize); + kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + /* the -4 is to account for senc values starting at 0x10 */ + lpcr = senc << (LPCR_VRMASD_SH - 4); - /* Update VRMASD field in the LPCR */ - senc = slb_pgsize_encoding(psize); - kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | - (VRMA_VSID << SLB_VSID_SHIFT_1T); - lpcr_mask = LPCR_VRMASD; - /* the -4 is to account for senc values starting at 0x10 */ - lpcr = senc << (LPCR_VRMASD_SH - 4); + /* Create HPTEs in the hash page table for the VRMA */ + kvmppc_map_vrma(vcpu, memslot, porder); - /* Create HPTEs in the hash page table for the VRMA */ - kvmppc_map_vrma(vcpu, memslot, porder); - - } else { - /* Set up to use an RMO region */ - rma_size = kvm_rma_pages; - if (rma_size > memslot->npages) - rma_size = memslot->npages; - rma_size <<= PAGE_SHIFT; - rmls = lpcr_rmls(rma_size); - err = -EINVAL; - if ((long)rmls < 0) { - pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); - goto out_srcu; - } - atomic_inc(&ri->use_count); - kvm->arch.rma = ri; - - /* Update LPCR and RMOR */ - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970; insert RMLS value (split field) in HID4 */ - lpcr_mask = (1ul << HID4_RMLS0_SH) | - (3ul << HID4_RMLS2_SH) | HID4_RMOR; - lpcr = ((rmls >> 2) << HID4_RMLS0_SH) | - ((rmls & 3) << HID4_RMLS2_SH); - /* RMOR is also in HID4 */ - lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) - << HID4_RMOR_SH; - } else { - /* POWER7 */ - lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS; - lpcr = rmls << LPCR_RMLS_SH; - kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT; - } - pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", - ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); - - /* Initialize phys addrs of pages in RMO */ - npages = kvm_rma_pages; - porder = __ilog2(npages); - physp = memslot->arch.slot_phys; - if (physp) { - if (npages > memslot->npages) - npages = memslot->npages; - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) - physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + - porder; - spin_unlock(&kvm->arch.slot_phys_lock); - } - } - - kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ smp_wmb(); @@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, sizeof(kvm->arch.enabled_hcalls)); - kvm->arch.rma = NULL; - kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970; HID4 is effectively the LPCR */ - kvm->arch.host_lpid = 0; - kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); - lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); - lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | - ((lpid & 0xf) << HID4_LPID5_SH); - } else { - /* POWER7; init LPCR for virtual RMA mode */ - kvm->arch.host_lpid = mfspr(SPRN_LPID); - kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); - lpcr &= LPCR_PECE | LPCR_LPES; - lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | - LPCR_VPM0 | LPCR_VPM1; - kvm->arch.vrma_slb_v = SLB_VSID_B_1T | - (VRMA_VSID << SLB_VSID_SHIFT_1T); - /* On POWER8 turn on online bit to enable PURR/SPURR */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - lpcr |= LPCR_ONL; - } + /* Init LPCR for virtual RMA mode */ + kvm->arch.host_lpid = mfspr(SPRN_LPID); + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); + lpcr &= LPCR_PECE | LPCR_LPES; + lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | + LPCR_VPM0 | LPCR_VPM1; + kvm->arch.vrma_slb_v = SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + /* On POWER8 turn on online bit to enable PURR/SPURR */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + lpcr |= LPCR_ONL; kvm->arch.lpcr = lpcr; - kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); - spin_lock_init(&kvm->arch.slot_phys_lock); - /* * Track that we now have a HV mode VM active. This blocks secondary * CPU threads from coming online. @@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvm_hv_vm_deactivated(); kvmppc_free_vcores(kvm); - if (kvm->arch.rma) { - kvm_release_rma(kvm->arch.rma); - kvm->arch.rma = NULL; - } kvmppc_free_hpt(kvm); } @@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn, static int kvmppc_core_check_processor_compat_hv(void) { - if (!cpu_has_feature(CPU_FTR_HVMODE)) + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_206)) return -EIO; return 0; } @@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, switch (ioctl) { - case KVM_ALLOCATE_RMA: { - struct kvm_allocate_rma rma; - struct kvm *kvm = filp->private_data; - - r = kvm_vm_ioctl_allocate_rma(kvm, &rma); - if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) - r = -EFAULT; - break; - } - case KVM_PPC_ALLOCATE_HTAB: { u32 htab_order; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 4fdc27c80f4c..1f083ff8a61a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -12,11 +12,11 @@ #include <linux/export.h> #include <linux/sched.h> #include <linux/spinlock.h> -#include <linux/bootmem.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/sizes.h> #include <linux/cma.h> +#include <linux/bitops.h> #include <asm/cputable.h> #include <asm/kvm_ppc.h> @@ -33,95 +33,9 @@ * By default we reserve 5% of memory for hash pagetable allocation. */ static unsigned long kvm_cma_resv_ratio = 5; -/* - * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area. - * Each RMA has to be physically contiguous and of a size that the - * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB, - * and other larger sizes. Since we are unlikely to be allocate that - * much physically contiguous memory after the system is up and running, - * we preallocate a set of RMAs in early boot using CMA. - * should be power of 2. - */ -unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */ -EXPORT_SYMBOL_GPL(kvm_rma_pages); static struct cma *kvm_cma; -/* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7 or PPC970. */ -static inline int lpcr_rmls(unsigned long rma_size) -{ - switch (rma_size) { - case 32ul << 20: /* 32 MB */ - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return 8; /* only supported on POWER7 */ - return -1; - case 64ul << 20: /* 64 MB */ - return 3; - case 128ul << 20: /* 128 MB */ - return 7; - case 256ul << 20: /* 256 MB */ - return 4; - case 1ul << 30: /* 1 GB */ - return 2; - case 16ul << 30: /* 16 GB */ - return 1; - case 256ul << 30: /* 256 GB */ - return 0; - default: - return -1; - } -} - -static int __init early_parse_rma_size(char *p) -{ - unsigned long kvm_rma_size; - - pr_debug("%s(%s)\n", __func__, p); - if (!p) - return -EINVAL; - kvm_rma_size = memparse(p, &p); - /* - * Check that the requested size is one supported in hardware - */ - if (lpcr_rmls(kvm_rma_size) < 0) { - pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); - return -EINVAL; - } - kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT; - return 0; -} -early_param("kvm_rma_size", early_parse_rma_size); - -struct kvm_rma_info *kvm_alloc_rma() -{ - struct page *page; - struct kvm_rma_info *ri; - - ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); - if (!ri) - return NULL; - page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages)); - if (!page) - goto err_out; - atomic_set(&ri->use_count, 1); - ri->base_pfn = page_to_pfn(page); - return ri; -err_out: - kfree(ri); - return NULL; -} -EXPORT_SYMBOL_GPL(kvm_alloc_rma); - -void kvm_release_rma(struct kvm_rma_info *ri) -{ - if (atomic_dec_and_test(&ri->use_count)) { - cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages); - kfree(ri); - } -} -EXPORT_SYMBOL_GPL(kvm_release_rma); - static int __init early_parse_kvm_cma_resv(char *p) { pr_debug("%s(%s)\n", __func__, p); @@ -133,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv); struct page *kvm_alloc_hpt(unsigned long nr_pages) { - unsigned long align_pages = HPT_ALIGN_PAGES; - VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); - /* Old CPUs require HPT aligned on a multiple of its size */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - align_pages = nr_pages; - return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages)); + return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES)); } EXPORT_SYMBOL_GPL(kvm_alloc_hpt); @@ -154,7 +63,7 @@ EXPORT_SYMBOL_GPL(kvm_release_hpt); * kvm_cma_reserve() - reserve area for kvm hash pagetable * * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) + * called by arch specific code once the memblock allocator * has been activated and all other subsystems have already allocated/reserved * memory. */ @@ -181,22 +90,44 @@ void __init kvm_cma_reserve(void) if (selected_size) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); - /* - * Old CPUs require HPT aligned on a multiple of its size. So for them - * make the alignment as max size we could request. - */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - align_size = __rounddown_pow_of_two(selected_size); - else - align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; - - align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size); + align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; cma_declare_contiguous(0, selected_size, 0, align_size, KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma); } } /* + * Real-mode H_CONFER implementation. + * We check if we are the only vcpu out of this virtual core + * still running in the guest and not ceded. If so, we pop up + * to the virtual-mode implementation; if not, just return to + * the guest. + */ +long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, + unsigned int yield_count) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + int threads_running; + int threads_ceded; + int threads_conferring; + u64 stop = get_tb() + 10 * tb_ticks_per_usec; + int rv = H_SUCCESS; /* => don't yield */ + + set_bit(vcpu->arch.ptid, &vc->conferring_threads); + while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) { + threads_running = VCORE_ENTRY_COUNT(vc); + threads_ceded = hweight32(vc->napping_threads); + threads_conferring = hweight32(vc->conferring_threads); + if (threads_ceded + threads_conferring >= threads_running) { + rv = H_TOO_HARD; /* => do yield */ + break; + } + } + clear_bit(vcpu->arch.ptid, &vc->conferring_threads); + return rv; +} + +/* * When running HV mode KVM we need to block certain operations while KVM VMs * exist in the system. We use a counter of VMs to track this. * diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 731be7478b27..36540a99d178 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry) std r3, _CCR(r1) /* Save host DSCR */ -BEGIN_FTR_SECTION mfspr r3, SPRN_DSCR std r3, HSTATE_DSCR(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION /* Save host DABR */ @@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r7, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ mfspr r6, SPRN_MMCRA -BEGIN_FTR_SECTION - /* On P7, clear MMCRA in order to disable SDAR updates */ + /* Clear MMCRA in order to disable SDAR updates */ li r5, 0 mtspr SPRN_MMCRA, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ lbz r5, LPPACA_PMCINUSE(r3) @@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r7, SPRN_PMC4 mfspr r8, SPRN_PMC5 mfspr r9, SPRN_PMC6 -BEGIN_FTR_SECTION - mfspr r10, SPRN_PMC7 - mfspr r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, HSTATE_PMC(r13) stw r5, HSTATE_PMC + 4(r13) stw r6, HSTATE_PMC + 8(r13) stw r7, HSTATE_PMC + 12(r13) stw r8, HSTATE_PMC + 16(r13) stw r9, HSTATE_PMC + 20(r13) -BEGIN_FTR_SECTION - stw r10, HSTATE_PMC + 24(r13) - stw r11, HSTATE_PMC + 28(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 31: /* @@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) add r8,r8,r7 std r8,HSTATE_DECEXP(r13) -#ifdef CONFIG_SMP - /* - * On PPC970, if the guest vcpu has an external interrupt pending, - * send ourselves an IPI so as to interrupt the guest once it - * enables interrupts. (It must have interrupts disabled, - * otherwise we would already have delivered the interrupt.) - * - * XXX If this is a UP build, smp_send_reschedule is not available, - * so the interrupt will be delayed until the next time the vcpu - * enters the guest with interrupts enabled. - */ -BEGIN_FTR_SECTION - ld r4, HSTATE_KVM_VCPU(r13) - ld r0, VCPU_PENDING_EXC(r4) - li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL) - oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h - and. r0, r0, r7 - beq 32f - lhz r3, PACAPACAINDEX(r13) - bl smp_send_reschedule - nop -32: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -#endif /* CONFIG_SMP */ - /* Jump to partition switch code */ bl kvmppc_hv_entry_trampoline nop diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index d562c8e2bc30..60081bd75847 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -138,8 +138,5 @@ out: long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) { - if (cpu_has_feature(CPU_FTR_ARCH_206)) - return kvmppc_realmode_mc_power7(vcpu); - - return 0; + return kvmppc_realmode_mc_power7(vcpu); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 084ad54c73cd..510bdfbc4073 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags) * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, * we can use tlbiel as long as we mark all other physical * cores as potentially having stale TLB entries for this lpid. - * If we're not using MMU notifiers, we never take pages away - * from the guest, so we can use tlbiel if requested. * Otherwise, don't use tlbiel. */ if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) global = 0; - else if (kvm->arch.using_mmu_notifiers) - global = 1; else - global = !(flags & H_LOCAL); + global = 1; if (!global) { /* any other core might now have stale TLB entries... */ @@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, struct revmap_entry *rev; unsigned long g_ptel; struct kvm_memory_slot *memslot; - unsigned long *physp, pte_size; + unsigned long pte_size; unsigned long is_io; unsigned long *rmap; pte_t pte; @@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, is_io = ~0ul; rmap = NULL; if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { - /* PPC970 can't do emulated MMIO */ - if (!cpu_has_feature(CPU_FTR_ARCH_206)) - return H_PARAMETER; /* Emulated MMIO - mark this with key=31 */ pteh |= HPTE_V_ABSENT; ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; @@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, slot_fn = gfn - memslot->base_gfn; rmap = &memslot->arch.rmap[slot_fn]; - if (!kvm->arch.using_mmu_notifiers) { - physp = memslot->arch.slot_phys; - if (!physp) - return H_PARAMETER; - physp += slot_fn; - if (realmode) - physp = real_vmalloc_addr(physp); - pa = *physp; - if (!pa) - return H_TOO_HARD; - is_io = pa & (HPTE_R_I | HPTE_R_W); - pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); - pa &= PAGE_MASK; + /* Translate to host virtual address */ + hva = __gfn_to_hva_memslot(memslot, gfn); + + /* Look up the Linux PTE for the backing page */ + pte_size = psize; + pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); + if (pte_present(pte) && !pte_numa(pte)) { + if (writing && !pte_write(pte)) + /* make the actual HPTE be read-only */ + ptel = hpte_make_readonly(ptel); + is_io = hpte_cache_bits(pte_val(pte)); + pa = pte_pfn(pte) << PAGE_SHIFT; + pa |= hva & (pte_size - 1); pa |= gpa & ~PAGE_MASK; - } else { - /* Translate to host virtual address */ - hva = __gfn_to_hva_memslot(memslot, gfn); - - /* Look up the Linux PTE for the backing page */ - pte_size = psize; - pte = lookup_linux_pte_and_update(pgdir, hva, writing, - &pte_size); - if (pte_present(pte) && !pte_numa(pte)) { - if (writing && !pte_write(pte)) - /* make the actual HPTE be read-only */ - ptel = hpte_make_readonly(ptel); - is_io = hpte_cache_bits(pte_val(pte)); - pa = pte_pfn(pte) << PAGE_SHIFT; - pa |= hva & (pte_size - 1); - pa |= gpa & ~PAGE_MASK; - } } if (pte_size < psize) @@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, rmap = real_vmalloc_addr(rmap); lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ - if (kvm->arch.using_mmu_notifiers && - mmu_notifier_retry(kvm, mmu_seq)) { + if (mmu_notifier_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; @@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock) return old == 0; } -/* - * tlbie/tlbiel is a bit different on the PPC970 compared to later - * processors such as POWER7; the large page bit is in the instruction - * not RB, and the top 16 bits and the bottom 12 bits of the VA - * in RB must be 0. - */ -static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues, - long npages, int global, bool need_sync) -{ - long i; - - if (global) { - while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) - cpu_relax(); - if (need_sync) - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < npages; ++i) { - unsigned long rb = rbvalues[i]; - - if (rb & 1) /* large page */ - asm volatile("tlbie %0,1" : : - "r" (rb & 0x0000fffffffff000ul)); - else - asm volatile("tlbie %0,0" : : - "r" (rb & 0x0000fffffffff000ul)); - } - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); - kvm->arch.tlbie_lock = 0; - } else { - if (need_sync) - asm volatile("ptesync" : : : "memory"); - for (i = 0; i < npages; ++i) { - unsigned long rb = rbvalues[i]; - - if (rb & 1) /* large page */ - asm volatile("tlbiel %0,1" : : - "r" (rb & 0x0000fffffffff000ul)); - else - asm volatile("tlbiel %0,0" : : - "r" (rb & 0x0000fffffffff000ul)); - } - asm volatile("ptesync" : : : "memory"); - } -} - static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, long npages, int global, bool need_sync) { long i; - if (cpu_has_feature(CPU_FTR_ARCH_201)) { - /* PPC970 tlbie instruction is a bit different */ - do_tlbies_970(kvm, rbvalues, npages, global, need_sync); - return; - } if (global) { while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) cpu_relax(); @@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rev->guest_rpte = r; note_hpte_modification(kvm, rev); } - r = (be64_to_cpu(hpte[1]) & ~mask) | bits; /* Update HPTE */ if (v & HPTE_V_VALID) { - rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* - * If the host has this page as readonly but the guest - * wants to make it read/write, reduce the permissions. - * Checking the host permissions involves finding the - * memslot and then the Linux PTE for the page. + * If the page is valid, don't let it transition from + * readonly to writable. If it should be writable, we'll + * take a trap and let the page fault code sort it out. */ - if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) { - unsigned long psize, gfn, hva; - struct kvm_memory_slot *memslot; - pgd_t *pgdir = vcpu->arch.pgdir; - pte_t pte; - - psize = hpte_page_size(v, r); - gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); - if (memslot) { - hva = __gfn_to_hva_memslot(memslot, gfn); - pte = lookup_linux_pte_and_update(pgdir, hva, - 1, &psize); - if (pte_present(pte) && !pte_write(pte)) - r = hpte_make_readonly(r); - } + pte = be64_to_cpu(hpte[1]); + r = (pte & ~mask) | bits; + if (hpte_is_writable(r) && !hpte_is_writable(pte)) + r = hpte_make_readonly(r); + /* If the PTE is changing, invalidate it first */ + if (r != pte) { + rb = compute_tlbie_rb(v, r, pte_index); + hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) | + HPTE_V_ABSENT); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), + true); + hpte[1] = cpu_to_be64(r); } } - hpte[1] = cpu_to_be64(r); - eieio(); - hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK); + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); return H_SUCCESS; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 3ee38e6e884f..7b066f6b02ad 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, * state update in HW (ie bus transactions) so we can handle them * separately here as well. */ - if (resend) + if (resend) { icp->rm_action |= XICS_RM_CHECK_RESEND; + icp->rm_resend_icp = icp; + } } @@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, * nothing needs to be done as there can be no XISR to * reject. * + * ICP state: Check_IPI + * * If the CPPR is less favored, then we might be replacing - * an interrupt, and thus need to possibly reject it as in + * an interrupt, and thus need to possibly reject it. * - * ICP state: Check_IPI + * ICP State: IPI + * + * Besides rejecting any pending interrupts, we also + * update XISR and pending_pri to mark IPI as pending. + * + * PAPR does not describe this state, but if the MFRR is being + * made less favored than its earlier value, there might be + * a previously-rejected interrupt needing to be resent. + * Ideally, we would want to resend only if + * prio(pending_interrupt) < mfrr && + * prio(pending_interrupt) < cppr + * where pending interrupt is the one that was rejected. But + * we don't have that state, so we simply trigger a resend + * whenever the MFRR is made less favored. */ do { old_state = new_state = ACCESS_ONCE(icp->state); @@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, resend = false; if (mfrr < new_state.cppr) { /* Reject a pending interrupt if not an IPI */ - if (mfrr <= new_state.pending_pri) + if (mfrr <= new_state.pending_pri) { reject = new_state.xisr; - new_state.pending_pri = mfrr; - new_state.xisr = XICS_IPI; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } } - if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + if (mfrr > old_state.mfrr) { resend = new_state.need_resend; new_state.need_resend = 0; } @@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, } /* Pass resends to virtual mode */ - if (resend) + if (resend) { this_icp->rm_action |= XICS_RM_CHECK_RESEND; + this_icp->rm_resend_icp = icp; + } return check_too_hard(xics, this_icp); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index edb2ccdbb2ba..10554df13852 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) lwz r6, HSTATE_PMC + 12(r13) lwz r8, HSTATE_PMC + 16(r13) lwz r9, HSTATE_PMC + 20(r13) -BEGIN_FTR_SECTION - lwz r10, HSTATE_PMC + 24(r13) - lwz r11, HSTATE_PMC + 28(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r4 mtspr SPRN_PMC3, r5 mtspr SPRN_PMC4, r6 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, HSTATE_MMCR(r13) ld r4, HSTATE_MMCR + 8(r13) ld r5, HSTATE_MMCR + 16(r13) @@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL -BEGIN_FTR_SECTION beq 11f cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI beq cr2, 14f /* HMI check */ -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* RFI into the highmem handler, or branch to interrupt handler */ mfmsr r6 @@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - beqa 0x500 /* external interrupt (PPC970) */ beq cr1, 13f /* machine check */ RFI @@ -201,8 +190,6 @@ kvmppc_primary_no_guest: bge kvm_novcpu_exit /* another thread already exiting */ li r3, NAPPING_NOVCPU stb r3, HSTATE_NAPPING(r13) - li r3, 1 - stb r3, HSTATE_HWTHREAD_REQ(r13) b kvm_do_nap @@ -293,6 +280,8 @@ kvm_start_guest: /* if we have no vcpu to run, go back to sleep */ beq kvm_no_guest +kvm_secondary_got_guest: + /* Set HSTATE_DSCR(r13) to something sensible */ ld r6, PACA_DSCR(r13) std r6, HSTATE_DSCR(r13) @@ -318,27 +307,46 @@ kvm_start_guest: stwcx. r3, 0, r4 bne 51b +/* + * At this point we have finished executing in the guest. + * We need to wait for hwthread_req to become zero, since + * we may not turn on the MMU while hwthread_req is non-zero. + * While waiting we also need to check if we get given a vcpu to run. + */ kvm_no_guest: - li r0, KVM_HWTHREAD_IN_NAP + lbz r3, HSTATE_HWTHREAD_REQ(r13) + cmpwi r3, 0 + bne 53f + HMT_MEDIUM + li r0, KVM_HWTHREAD_IN_KERNEL stb r0, HSTATE_HWTHREAD_STATE(r13) -kvm_do_nap: - /* Clear the runlatch bit before napping */ - mfspr r2, SPRN_CTRLF - clrrdi r2, r2, 1 - mtspr SPRN_CTRLT, r2 - + /* need to recheck hwthread_req after a barrier, to avoid race */ + sync + lbz r3, HSTATE_HWTHREAD_REQ(r13) + cmpwi r3, 0 + bne 54f +/* + * We jump to power7_wakeup_loss, which will return to the caller + * of power7_nap in the powernv cpu offline loop. The value we + * put in r3 becomes the return value for power7_nap. + */ li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - isync - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b - nap - b . + li r3, 0 + b power7_wakeup_loss + +53: HMT_LOW + ld r4, HSTATE_KVM_VCPU(r13) + cmpdi r4, 0 + beq kvm_no_guest + HMT_MEDIUM + b kvm_secondary_got_guest + +54: li r0, KVM_HWTHREAD_IN_KVM + stb r0, HSTATE_HWTHREAD_STATE(r13) + b kvm_no_guest /****************************************************************************** * * @@ -374,11 +382,8 @@ kvmppc_hv_entry: slbia ptesync -BEGIN_FTR_SECTION - b 30f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* - * POWER7 host -> guest partition switch code. + * POWER7/POWER8 host -> guest partition switch code. * We don't have to lock against concurrent tlbies, * but we do have to coordinate across hardware threads. */ @@ -486,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi r3,512 /* 1 microsecond */ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER blt hdec_soon - b 31f - - /* - * PPC970 host -> guest partition switch code. - * We have to lock against concurrent tlbies, - * using native_tlbie_lock to lock against host tlbies - * and kvm->arch.tlbie_lock to lock against guest tlbies. - * We also have to invalidate the TLB since its - * entries aren't tagged with the LPID. - */ -30: ld r5,HSTATE_KVM_VCORE(r13) - ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ - - /* first take native_tlbie_lock */ - .section ".toc","aw" -toc_tlbie_lock: - .tc native_tlbie_lock[TC],native_tlbie_lock - .previous - ld r3,toc_tlbie_lock@toc(r2) -#ifdef __BIG_ENDIAN__ - lwz r8,PACA_LOCK_TOKEN(r13) -#else - lwz r8,PACAPACAINDEX(r13) -#endif -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r5,HSTATE_KVM_VCORE(r13) - ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */ - li r0,0x18f - rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ - or r0,r7,r0 - ptesync - sync - mtspr SPRN_HID4,r0 /* switch to reserved LPID */ - isync - li r0,0 - stw r0,0(r3) /* drop native_tlbie_lock */ - - /* invalidate the whole TLB */ - li r0,256 - mtctr r0 - li r6,0 -25: tlbiel r6 - addi r6,r6,0x1000 - bdnz 25b - ptesync - - /* Take the guest's tlbie_lock */ - addi r3,r9,KVM_TLBIE_LOCK -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - ld r6,KVM_SDR1(r9) - mtspr SPRN_SDR1,r6 /* switch to partition page table */ - - /* Set up HID4 with the guest's LPID etc. */ - sync - mtspr SPRN_HID4,r7 - isync - - /* drop the guest's tlbie_lock */ - li r0,0 - stw r0,0(r3) - /* Check if HDEC expires soon */ - mfspr r3,SPRN_HDEC - cmpwi r3,10 - li r12,BOOK3S_INTERRUPT_HV_DECREMENTER - blt hdec_soon - - /* Enable HDEC interrupts */ - mfspr r0,SPRN_HID0 - li r3,1 - rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 - sync - mtspr SPRN_HID0,r0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 -31: /* Do we have a guest vcpu to run? */ cmpdi r4, 0 beq kvmppc_primary_no_guest @@ -606,7 +521,6 @@ kvmppc_got_guest: stb r6, VCPU_VPA_DIRTY(r4) 25: -BEGIN_FTR_SECTION /* Save purr/spurr */ mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR @@ -616,7 +530,6 @@ BEGIN_FTR_SECTION ld r8,VCPU_SPURR(r4) mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION /* Set partition DABR */ @@ -625,9 +538,7 @@ BEGIN_FTR_SECTION ld r6,VCPU_DABR(r4) mtspr SPRN_DABRX,r5 mtspr SPRN_DABR,r6 - BEGIN_FTR_SECTION_NESTED(89) isync - END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -758,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) lwz r7, VCPU_PMC + 12(r4) lwz r8, VCPU_PMC + 16(r4) lwz r9, VCPU_PMC + 20(r4) -BEGIN_FTR_SECTION - lwz r10, VCPU_PMC + 24(r4) - lwz r11, VCPU_PMC + 28(r4) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r5 mtspr SPRN_PMC3, r6 mtspr SPRN_PMC4, r7 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) ld r6, VCPU_MMCR + 16(r4) @@ -818,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ld r30, VCPU_GPR(R30)(r4) ld r31, VCPU_GPR(R31)(r4) -BEGIN_FTR_SECTION /* Switch DSCR to guest value */ ld r5, VCPU_DSCR(r4) mtspr SPRN_DSCR, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION - /* Skip next section on POWER7 or PPC970 */ + /* Skip next section on POWER7 */ b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ @@ -901,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_DAR, r5 mtspr SPRN_DSISR, r6 -BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) ld r6,VCPU_UAMOR(r4) @@ -909,7 +809,6 @@ BEGIN_FTR_SECTION mtspr SPRN_AMR,r5 mtspr SPRN_UAMOR,r6 mtspr SPRN_AMOR,r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Restore state of CTRL run bit; assume 1 on entry */ lwz r5,VCPU_CTRL(r4) @@ -944,13 +843,11 @@ deliver_guest_interrupt: rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 cmpdi cr1, r0, 0 andi. r8, r11, MSR_EE -BEGIN_FTR_SECTION mfspr r8, SPRN_LPCR /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH mtspr SPRN_LPCR, r8 isync -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) beq 5f li r0, BOOK3S_INTERRUPT_EXTERNAL bne cr1, 12f @@ -1105,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) stw r12,VCPU_TRAP(r9) - /* Save HEIR (HV emulation assist reg) in last_inst + /* Save HEIR (HV emulation assist reg) in emul_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,KVM_INST_FETCH_FAILED -BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST bne 11f mfspr r3,SPRN_HEIR -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) -11: stw r3,VCPU_LAST_INST(r9) +11: stw r3,VCPU_HEIR(r9) /* these are volatile across C function calls */ mfctr r3 @@ -1121,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r3, VCPU_CTR(r9) stw r4, VCPU_XER(r9) -BEGIN_FTR_SECTION /* If this is a page table miss then see if it's theirs or ours */ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE beq kvmppc_hdsi cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE beq kvmppc_hisi -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* See if this is a leftover HDEC interrupt */ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER @@ -1140,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) cmpwi r12,BOOK3S_INTERRUPT_SYSCALL beq hcall_try_real_mode - /* Only handle external interrupts here on arch 206 and later */ -BEGIN_FTR_SECTION - b ext_interrupt_to_host -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) - /* External interrupt ? */ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL bne+ ext_interrupt_to_host @@ -1174,11 +1062,9 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ mfdsisr r7 std r6, VCPU_DAR(r9) stw r7, VCPU_DSISR(r9) -BEGIN_FTR_SECTION /* don't overwrite fault_dar/fault_dsisr if HDSI */ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE beq 6f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r6, VCPU_FAULT_DAR(r9) stw r7, VCPU_FAULT_DSISR(r9) @@ -1217,7 +1103,6 @@ mc_cont: /* * Save the guest PURR/SPURR */ -BEGIN_FTR_SECTION mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR ld r7,VCPU_PURR(r9) @@ -1237,7 +1122,6 @@ BEGIN_FTR_SECTION add r4,r4,r6 mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) /* Save DEC */ mfspr r5,SPRN_DEC @@ -1287,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 8: /* Save and reset AMR and UAMOR before turning on the MMU */ -BEGIN_FTR_SECTION mfspr r5,SPRN_AMR mfspr r6,SPRN_UAMOR std r5,VCPU_AMR(r9) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Switch DSCR back to host value */ -BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR ld r7, HSTATE_DSCR(r13) std r8, VCPU_DSCR(r9) mtspr SPRN_DSCR, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save non-volatile GPRs */ std r14, VCPU_GPR(R14)(r9) @@ -1484,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r4, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ mfspr r6, SPRN_MMCRA -BEGIN_FTR_SECTION - /* On P7, clear MMCRA in order to disable SDAR updates */ + /* Clear MMCRA in order to disable SDAR updates */ li r7, 0 mtspr SPRN_MMCRA, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync beq 21f /* if no VPA, save PMU stuff anyway */ lbz r7, LPPACA_PMCINUSE(r8) @@ -1513,10 +1391,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r6, SPRN_PMC4 mfspr r7, SPRN_PMC5 mfspr r8, SPRN_PMC6 -BEGIN_FTR_SECTION - mfspr r10, SPRN_PMC7 - mfspr r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, VCPU_PMC(r9) stw r4, VCPU_PMC + 4(r9) stw r5, VCPU_PMC + 8(r9) @@ -1524,10 +1398,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r7, VCPU_PMC + 16(r9) stw r8, VCPU_PMC + 20(r9) BEGIN_FTR_SECTION - stw r10, VCPU_PMC + 24(r9) - stw r11, VCPU_PMC + 28(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -BEGIN_FTR_SECTION mfspr r5, SPRN_SIER mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 @@ -1547,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ptesync hdec_soon: /* r12 = trap, r13 = paca */ -BEGIN_FTR_SECTION - b 32f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* - * POWER7 guest -> host partition switch code. + * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do * have to coordinate the hardware threads. */ @@ -1679,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync - b 33f - - /* - * PPC970 guest -> host partition switch code. - * We have to lock against concurrent tlbies, and - * we have to flush the whole TLB. - */ -32: ld r5,HSTATE_KVM_VCORE(r13) - ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ - - /* Take the guest's tlbie_lock */ -#ifdef __BIG_ENDIAN__ - lwz r8,PACA_LOCK_TOKEN(r13) -#else - lwz r8,PACAPACAINDEX(r13) -#endif - addi r3,r4,KVM_TLBIE_LOCK -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */ - li r0,0x18f - rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ - or r0,r7,r0 - ptesync - sync - mtspr SPRN_HID4,r0 /* switch to reserved LPID */ - isync - li r0,0 - stw r0,0(r3) /* drop guest tlbie_lock */ - - /* invalidate the whole TLB */ - li r0,256 - mtctr r0 - li r6,0 -25: tlbiel r6 - addi r6,r6,0x1000 - bdnz 25b - ptesync - - /* take native_tlbie_lock */ - ld r3,toc_tlbie_lock@toc(2) -24: lwarx r0,0,r3 - cmpwi r0,0 - bne 24b - stwcx. r8,0,r3 - bne 24b - isync - - ld r6,KVM_HOST_SDR1(r4) - mtspr SPRN_SDR1,r6 /* switch to host page table */ - - /* Set up host HID4 value */ - sync - mtspr SPRN_HID4,r7 - isync - li r0,0 - stw r0,0(r3) /* drop native_tlbie_lock */ - - lis r8,0x7fff /* MAX_INT@h */ - mtspr SPRN_HDEC,r8 - - /* Disable HDEC interrupts */ - mfspr r0,SPRN_HID0 - li r3,0 - rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 - sync - mtspr SPRN_HID0,r0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 /* load host SLB entries */ -33: ld r8,PACA_SLBSHADOWPTR(r13) + ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED li r3, SLBSHADOW_SAVEAREA @@ -2028,7 +1817,7 @@ hcall_real_table: .long 0 /* 0xd8 */ .long 0 /* 0xdc */ .long DOTSYM(kvmppc_h_cede) - hcall_real_table - .long 0 /* 0xe4 */ + .long DOTSYM(kvmppc_rm_h_confer) - hcall_real_table .long 0 /* 0xe8 */ .long 0 /* 0xec */ .long 0 /* 0xf0 */ @@ -2107,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede) stw r0,VCPU_TRAP(r3) li r0,H_SUCCESS std r0,VCPU_GPR(R3)(r3) -BEGIN_FTR_SECTION - b kvm_cede_exit /* just send it up to host on 970 */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) /* * Set our bit in the bitmask of napping threads unless all the @@ -2172,6 +1958,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the * runlatch bit before napping. */ +kvm_do_nap: mfspr r2, SPRN_CTRLF clrrdi r2, r2, 1 mtspr SPRN_CTRLT, r2 @@ -2435,7 +2222,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif mtmsrd r8 - isync addi r3,r3,VCPU_FPRS bl store_fp_state #ifdef CONFIG_ALTIVEC @@ -2471,7 +2257,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif mtmsrd r8 - isync addi r3,r4,VCPU_FPRS bl load_fp_state #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index bfb8035314e3..bd6ab1672ae6 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb) return kvmppc_get_field(inst, msb + 32, lsb + 32); } -/* - * Replaces inst bits with ordering according to spec. - */ -static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) -{ - return kvmppc_set_field(inst, msb + 32, lsb + 32, value); -} - bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) { if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cf2eb16846d1..f57383941d03 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -static inline int get_fpr_index(int i) -{ - return i * TS_FPRWIDTH; -} - /* Give up external provider (FPU, Altivec, VSX) */ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) { diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index eaeb78047fb8..807351f76f84 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, * there might be a previously-rejected interrupt needing * to be resent. * + * ICP state: Check_IPI + * * If the CPPR is less favored, then we might be replacing - * an interrupt, and thus need to possibly reject it as in + * an interrupt, and thus need to possibly reject it. * - * ICP state: Check_IPI + * ICP State: IPI + * + * Besides rejecting any pending interrupts, we also + * update XISR and pending_pri to mark IPI as pending. + * + * PAPR does not describe this state, but if the MFRR is being + * made less favored than its earlier value, there might be + * a previously-rejected interrupt needing to be resent. + * Ideally, we would want to resend only if + * prio(pending_interrupt) < mfrr && + * prio(pending_interrupt) < cppr + * where pending interrupt is the one that was rejected. But + * we don't have that state, so we simply trigger a resend + * whenever the MFRR is made less favored. */ do { old_state = new_state = ACCESS_ONCE(icp->state); @@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, resend = false; if (mfrr < new_state.cppr) { /* Reject a pending interrupt if not an IPI */ - if (mfrr <= new_state.pending_pri) + if (mfrr <= new_state.pending_pri) { reject = new_state.xisr; - new_state.pending_pri = mfrr; - new_state.xisr = XICS_IPI; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } } - if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + if (mfrr > old_state.mfrr) { resend = new_state.need_resend; new_state.need_resend = 0; } @@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) if (icp->rm_action & XICS_RM_KICK_VCPU) kvmppc_fast_vcpu_kick(icp->rm_kick_target); if (icp->rm_action & XICS_RM_CHECK_RESEND) - icp_check_resend(xics, icp); + icp_check_resend(xics, icp->rm_resend_icp); if (icp->rm_action & XICS_RM_REJECT) icp_deliver_irq(xics, icp, icp->rm_reject); if (icp->rm_action & XICS_RM_NOTIFY_EOI) diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index e8aaa7a3f209..73f0f2723c07 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -74,6 +74,7 @@ struct kvmppc_icp { #define XICS_RM_NOTIFY_EOI 0x8 u32 rm_action; struct kvm_vcpu *rm_kick_target; + struct kvmppc_icp *rm_resend_icp; u32 rm_reject; u32 rm_eoied_irq; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 2e02ed849f36..b29ce752c7d6 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -76,11 +76,11 @@ static inline int local_sid_setup_one(struct id *entry) unsigned long sid; int ret = -1; - sid = ++(__get_cpu_var(pcpu_last_used_sid)); + sid = __this_cpu_inc_return(pcpu_last_used_sid); if (sid < NUM_TIDS) { - __get_cpu_var(pcpu_sids).entry[sid] = entry; + __this_cpu_write(pcpu_sids.entry[sid], entry); entry->val = sid; - entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); ret = sid; } @@ -108,8 +108,8 @@ static inline int local_sid_setup_one(struct id *entry) static inline int local_sid_lookup(struct id *entry) { if (entry && entry->val != 0 && - __get_cpu_var(pcpu_sids).entry[entry->val] == entry && - entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + __this_cpu_read(pcpu_sids.entry[entry->val]) == entry && + entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val])) return entry->val; return -1; } @@ -117,8 +117,8 @@ static inline int local_sid_lookup(struct id *entry) /* Invalidate all id mappings on local core -- call with preempt disabled */ static inline void local_sid_destroy_all(void) { - __get_cpu_var(pcpu_last_used_sid) = 0; - memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); + __this_cpu_write(pcpu_last_used_sid, 0); + memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids)); } static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) @@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); } -void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) -{ -} - -void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) -{ -} - static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu) { kvmppc_booke_vcpu_load(vcpu, cpu); diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 769778f855b0..cc536d4a75ef 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -661,7 +661,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, if (unlikely((pr && !(mas3 & MAS3_UX)) || (!pr && !(mas3 & MAS3_SX)))) { pr_err_ratelimited( - "%s: Instuction emulation from guest addres %08lx without execute permission\n", + "%s: Instruction emulation from guest address %08lx without execute permission\n", __func__, geaddr); return EMULATE_AGAIN; } @@ -673,7 +673,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, if (has_feature(vcpu, VCPU_FTR_MMU_V2) && unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) { pr_err_ratelimited( - "%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n", + "%s: Instruction emulation from guest address %08lx mismatches storage attributes\n", __func__, geaddr); return EMULATE_AGAIN; } @@ -686,7 +686,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, /* Guard against emulation from devices area */ if (unlikely(!page_is_ram(pfn))) { - pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n", + pr_err_ratelimited("%s: Instruction emulation from non-RAM host address %08llx is not supported\n", __func__, addr); return EMULATE_AGAIN; } diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 2fdc8722e324..cda695de8aa7 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -144,9 +144,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) { + __this_cpu_read(last_vcpu_of_lpid[get_lpid(vcpu)]) != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu; + __this_cpu_write(last_vcpu_of_lpid[get_lpid(vcpu)], vcpu); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c1f8f53cd312..c45eaab752b0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 0; break; case KVM_CAP_PPC_RMA: - r = hv_enabled; - /* PPC970 requires an RMA */ - if (r && cpu_has_feature(CPU_FTR_ARCH_201)) - r = 2; + r = 0; break; #endif case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - if (hv_enabled) - r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; - else - r = 0; + r = hv_enabled; #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) r = 1; #else diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h new file mode 100644 index 000000000000..f647ce0f428b --- /dev/null +++ b/arch/powerpc/kvm/trace_book3s.h @@ -0,0 +1,32 @@ +#if !defined(_TRACE_KVM_BOOK3S_H) +#define _TRACE_KVM_BOOK3S_H + +/* + * Common defines used by the trace macros in trace_pr.h and trace_hv.h + */ + +#define kvm_trace_symbol_exit \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} + +#endif diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index f7537cf26ce7..7ec534d1db9f 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release, __entry->pfn, __entry->flags) ); +#ifdef CONFIG_SPE_POSSIBLE +#define kvm_trace_symbol_irqprio_spe \ + {BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \ + {BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \ + {BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"}, +#else +#define kvm_trace_symbol_irqprio_spe +#endif + +#ifdef CONFIG_PPC_E500MC +#define kvm_trace_symbol_irqprio_e500mc \ + {BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \ + {BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"}, +#else +#define kvm_trace_symbol_irqprio_e500mc +#endif + +#define kvm_trace_symbol_irqprio \ + kvm_trace_symbol_irqprio_spe \ + kvm_trace_symbol_irqprio_e500mc \ + {BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \ + {BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \ + {BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \ + {BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \ + {BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \ + {BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \ + {BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \ + {BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \ + {BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \ + {BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \ + {BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \ + {BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \ + {BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \ + {BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \ + {BOOKE_IRQPRIO_FIT, "FIT"}, \ + {BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \ + {BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \ + {BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \ + {BOOKE_IRQPRIO_DBELL, "DBELL"}, \ + {BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \ + TRACE_EVENT(kvm_booke_queue_irqprio, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), TP_ARGS(vcpu, priority), @@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio, __entry->pending = vcpu->arch.pending_exceptions; ), - TP_printk("vcpu=%x prio=%x pending=%lx", - __entry->cpu_nr, __entry->priority, __entry->pending) + TP_printk("vcpu=%x prio=%s pending=%lx", + __entry->cpu_nr, + __print_symbolic(__entry->priority, kvm_trace_symbol_irqprio), + __entry->pending) ); #endif diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h new file mode 100644 index 000000000000..33d9daff5783 --- /dev/null +++ b/arch/powerpc/kvm/trace_hv.h @@ -0,0 +1,477 @@ +#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_HV_H + +#include <linux/tracepoint.h> +#include "trace_book3s.h" +#include <asm/hvcall.h> +#include <asm/kvm_asm.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm_hv +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + +#define kvm_trace_symbol_hcall \ + {H_REMOVE, "H_REMOVE"}, \ + {H_ENTER, "H_ENTER"}, \ + {H_READ, "H_READ"}, \ + {H_CLEAR_MOD, "H_CLEAR_MOD"}, \ + {H_CLEAR_REF, "H_CLEAR_REF"}, \ + {H_PROTECT, "H_PROTECT"}, \ + {H_GET_TCE, "H_GET_TCE"}, \ + {H_PUT_TCE, "H_PUT_TCE"}, \ + {H_SET_SPRG0, "H_SET_SPRG0"}, \ + {H_SET_DABR, "H_SET_DABR"}, \ + {H_PAGE_INIT, "H_PAGE_INIT"}, \ + {H_SET_ASR, "H_SET_ASR"}, \ + {H_ASR_ON, "H_ASR_ON"}, \ + {H_ASR_OFF, "H_ASR_OFF"}, \ + {H_LOGICAL_CI_LOAD, "H_LOGICAL_CI_LOAD"}, \ + {H_LOGICAL_CI_STORE, "H_LOGICAL_CI_STORE"}, \ + {H_LOGICAL_CACHE_LOAD, "H_LOGICAL_CACHE_LOAD"}, \ + {H_LOGICAL_CACHE_STORE, "H_LOGICAL_CACHE_STORE"}, \ + {H_LOGICAL_ICBI, "H_LOGICAL_ICBI"}, \ + {H_LOGICAL_DCBF, "H_LOGICAL_DCBF"}, \ + {H_GET_TERM_CHAR, "H_GET_TERM_CHAR"}, \ + {H_PUT_TERM_CHAR, "H_PUT_TERM_CHAR"}, \ + {H_REAL_TO_LOGICAL, "H_REAL_TO_LOGICAL"}, \ + {H_HYPERVISOR_DATA, "H_HYPERVISOR_DATA"}, \ + {H_EOI, "H_EOI"}, \ + {H_CPPR, "H_CPPR"}, \ + {H_IPI, "H_IPI"}, \ + {H_IPOLL, "H_IPOLL"}, \ + {H_XIRR, "H_XIRR"}, \ + {H_PERFMON, "H_PERFMON"}, \ + {H_MIGRATE_DMA, "H_MIGRATE_DMA"}, \ + {H_REGISTER_VPA, "H_REGISTER_VPA"}, \ + {H_CEDE, "H_CEDE"}, \ + {H_CONFER, "H_CONFER"}, \ + {H_PROD, "H_PROD"}, \ + {H_GET_PPP, "H_GET_PPP"}, \ + {H_SET_PPP, "H_SET_PPP"}, \ + {H_PURR, "H_PURR"}, \ + {H_PIC, "H_PIC"}, \ + {H_REG_CRQ, "H_REG_CRQ"}, \ + {H_FREE_CRQ, "H_FREE_CRQ"}, \ + {H_VIO_SIGNAL, "H_VIO_SIGNAL"}, \ + {H_SEND_CRQ, "H_SEND_CRQ"}, \ + {H_COPY_RDMA, "H_COPY_RDMA"}, \ + {H_REGISTER_LOGICAL_LAN, "H_REGISTER_LOGICAL_LAN"}, \ + {H_FREE_LOGICAL_LAN, "H_FREE_LOGICAL_LAN"}, \ + {H_ADD_LOGICAL_LAN_BUFFER, "H_ADD_LOGICAL_LAN_BUFFER"}, \ + {H_SEND_LOGICAL_LAN, "H_SEND_LOGICAL_LAN"}, \ + {H_BULK_REMOVE, "H_BULK_REMOVE"}, \ + {H_MULTICAST_CTRL, "H_MULTICAST_CTRL"}, \ + {H_SET_XDABR, "H_SET_XDABR"}, \ + {H_STUFF_TCE, "H_STUFF_TCE"}, \ + {H_PUT_TCE_INDIRECT, "H_PUT_TCE_INDIRECT"}, \ + {H_CHANGE_LOGICAL_LAN_MAC, "H_CHANGE_LOGICAL_LAN_MAC"}, \ + {H_VTERM_PARTNER_INFO, "H_VTERM_PARTNER_INFO"}, \ + {H_REGISTER_VTERM, "H_REGISTER_VTERM"}, \ + {H_FREE_VTERM, "H_FREE_VTERM"}, \ + {H_RESET_EVENTS, "H_RESET_EVENTS"}, \ + {H_ALLOC_RESOURCE, "H_ALLOC_RESOURCE"}, \ + {H_FREE_RESOURCE, "H_FREE_RESOURCE"}, \ + {H_MODIFY_QP, "H_MODIFY_QP"}, \ + {H_QUERY_QP, "H_QUERY_QP"}, \ + {H_REREGISTER_PMR, "H_REREGISTER_PMR"}, \ + {H_REGISTER_SMR, "H_REGISTER_SMR"}, \ + {H_QUERY_MR, "H_QUERY_MR"}, \ + {H_QUERY_MW, "H_QUERY_MW"}, \ + {H_QUERY_HCA, "H_QUERY_HCA"}, \ + {H_QUERY_PORT, "H_QUERY_PORT"}, \ + {H_MODIFY_PORT, "H_MODIFY_PORT"}, \ + {H_DEFINE_AQP1, "H_DEFINE_AQP1"}, \ + {H_GET_TRACE_BUFFER, "H_GET_TRACE_BUFFER"}, \ + {H_DEFINE_AQP0, "H_DEFINE_AQP0"}, \ + {H_RESIZE_MR, "H_RESIZE_MR"}, \ + {H_ATTACH_MCQP, "H_ATTACH_MCQP"}, \ + {H_DETACH_MCQP, "H_DETACH_MCQP"}, \ + {H_CREATE_RPT, "H_CREATE_RPT"}, \ + {H_REMOVE_RPT, "H_REMOVE_RPT"}, \ + {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \ + {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \ + {H_ERROR_DATA, "H_ERROR_DATA"}, \ + {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ + {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ + {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \ + {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \ + {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \ + {H_POLL_PENDING, "H_POLL_PENDING"}, \ + {H_ILLAN_ATTRIBUTES, "H_ILLAN_ATTRIBUTES"}, \ + {H_MODIFY_HEA_QP, "H_MODIFY_HEA_QP"}, \ + {H_QUERY_HEA_QP, "H_QUERY_HEA_QP"}, \ + {H_QUERY_HEA, "H_QUERY_HEA"}, \ + {H_QUERY_HEA_PORT, "H_QUERY_HEA_PORT"}, \ + {H_MODIFY_HEA_PORT, "H_MODIFY_HEA_PORT"}, \ + {H_REG_BCMC, "H_REG_BCMC"}, \ + {H_DEREG_BCMC, "H_DEREG_BCMC"}, \ + {H_REGISTER_HEA_RPAGES, "H_REGISTER_HEA_RPAGES"}, \ + {H_DISABLE_AND_GET_HEA, "H_DISABLE_AND_GET_HEA"}, \ + {H_GET_HEA_INFO, "H_GET_HEA_INFO"}, \ + {H_ALLOC_HEA_RESOURCE, "H_ALLOC_HEA_RESOURCE"}, \ + {H_ADD_CONN, "H_ADD_CONN"}, \ + {H_DEL_CONN, "H_DEL_CONN"}, \ + {H_JOIN, "H_JOIN"}, \ + {H_VASI_STATE, "H_VASI_STATE"}, \ + {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \ + {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \ + {H_SET_MPP, "H_SET_MPP"}, \ + {H_GET_MPP, "H_GET_MPP"}, \ + {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \ + {H_BEST_ENERGY, "H_BEST_ENERGY"}, \ + {H_XIRR_X, "H_XIRR_X"}, \ + {H_RANDOM, "H_RANDOM"}, \ + {H_COP, "H_COP"}, \ + {H_GET_MPP_X, "H_GET_MPP_X"}, \ + {H_SET_MODE, "H_SET_MODE"}, \ + {H_RTAS, "H_RTAS"} + +#define kvm_trace_symbol_kvmret \ + {RESUME_GUEST, "RESUME_GUEST"}, \ + {RESUME_GUEST_NV, "RESUME_GUEST_NV"}, \ + {RESUME_HOST, "RESUME_HOST"}, \ + {RESUME_HOST_NV, "RESUME_HOST_NV"} + +#define kvm_trace_symbol_hcall_rc \ + {H_SUCCESS, "H_SUCCESS"}, \ + {H_BUSY, "H_BUSY"}, \ + {H_CLOSED, "H_CLOSED"}, \ + {H_NOT_AVAILABLE, "H_NOT_AVAILABLE"}, \ + {H_CONSTRAINED, "H_CONSTRAINED"}, \ + {H_PARTIAL, "H_PARTIAL"}, \ + {H_IN_PROGRESS, "H_IN_PROGRESS"}, \ + {H_PAGE_REGISTERED, "H_PAGE_REGISTERED"}, \ + {H_PARTIAL_STORE, "H_PARTIAL_STORE"}, \ + {H_PENDING, "H_PENDING"}, \ + {H_CONTINUE, "H_CONTINUE"}, \ + {H_LONG_BUSY_START_RANGE, "H_LONG_BUSY_START_RANGE"}, \ + {H_LONG_BUSY_ORDER_1_MSEC, "H_LONG_BUSY_ORDER_1_MSEC"}, \ + {H_LONG_BUSY_ORDER_10_MSEC, "H_LONG_BUSY_ORDER_10_MSEC"}, \ + {H_LONG_BUSY_ORDER_100_MSEC, "H_LONG_BUSY_ORDER_100_MSEC"}, \ + {H_LONG_BUSY_ORDER_1_SEC, "H_LONG_BUSY_ORDER_1_SEC"}, \ + {H_LONG_BUSY_ORDER_10_SEC, "H_LONG_BUSY_ORDER_10_SEC"}, \ + {H_LONG_BUSY_ORDER_100_SEC, "H_LONG_BUSY_ORDER_100_SEC"}, \ + {H_LONG_BUSY_END_RANGE, "H_LONG_BUSY_END_RANGE"}, \ + {H_TOO_HARD, "H_TOO_HARD"}, \ + {H_HARDWARE, "H_HARDWARE"}, \ + {H_FUNCTION, "H_FUNCTION"}, \ + {H_PRIVILEGE, "H_PRIVILEGE"}, \ + {H_PARAMETER, "H_PARAMETER"}, \ + {H_BAD_MODE, "H_BAD_MODE"}, \ + {H_PTEG_FULL, "H_PTEG_FULL"}, \ + {H_NOT_FOUND, "H_NOT_FOUND"}, \ + {H_RESERVED_DABR, "H_RESERVED_DABR"}, \ + {H_NO_MEM, "H_NO_MEM"}, \ + {H_AUTHORITY, "H_AUTHORITY"}, \ + {H_PERMISSION, "H_PERMISSION"}, \ + {H_DROPPED, "H_DROPPED"}, \ + {H_SOURCE_PARM, "H_SOURCE_PARM"}, \ + {H_DEST_PARM, "H_DEST_PARM"}, \ + {H_REMOTE_PARM, "H_REMOTE_PARM"}, \ + {H_RESOURCE, "H_RESOURCE"}, \ + {H_ADAPTER_PARM, "H_ADAPTER_PARM"}, \ + {H_RH_PARM, "H_RH_PARM"}, \ + {H_RCQ_PARM, "H_RCQ_PARM"}, \ + {H_SCQ_PARM, "H_SCQ_PARM"}, \ + {H_EQ_PARM, "H_EQ_PARM"}, \ + {H_RT_PARM, "H_RT_PARM"}, \ + {H_ST_PARM, "H_ST_PARM"}, \ + {H_SIGT_PARM, "H_SIGT_PARM"}, \ + {H_TOKEN_PARM, "H_TOKEN_PARM"}, \ + {H_MLENGTH_PARM, "H_MLENGTH_PARM"}, \ + {H_MEM_PARM, "H_MEM_PARM"}, \ + {H_MEM_ACCESS_PARM, "H_MEM_ACCESS_PARM"}, \ + {H_ATTR_PARM, "H_ATTR_PARM"}, \ + {H_PORT_PARM, "H_PORT_PARM"}, \ + {H_MCG_PARM, "H_MCG_PARM"}, \ + {H_VL_PARM, "H_VL_PARM"}, \ + {H_TSIZE_PARM, "H_TSIZE_PARM"}, \ + {H_TRACE_PARM, "H_TRACE_PARM"}, \ + {H_MASK_PARM, "H_MASK_PARM"}, \ + {H_MCG_FULL, "H_MCG_FULL"}, \ + {H_ALIAS_EXIST, "H_ALIAS_EXIST"}, \ + {H_P_COUNTER, "H_P_COUNTER"}, \ + {H_TABLE_FULL, "H_TABLE_FULL"}, \ + {H_ALT_TABLE, "H_ALT_TABLE"}, \ + {H_MR_CONDITION, "H_MR_CONDITION"}, \ + {H_NOT_ENOUGH_RESOURCES, "H_NOT_ENOUGH_RESOURCES"}, \ + {H_R_STATE, "H_R_STATE"}, \ + {H_RESCINDED, "H_RESCINDED"}, \ + {H_P2, "H_P2"}, \ + {H_P3, "H_P3"}, \ + {H_P4, "H_P4"}, \ + {H_P5, "H_P5"}, \ + {H_P6, "H_P6"}, \ + {H_P7, "H_P7"}, \ + {H_P8, "H_P8"}, \ + {H_P9, "H_P9"}, \ + {H_TOO_BIG, "H_TOO_BIG"}, \ + {H_OVERLAP, "H_OVERLAP"}, \ + {H_INTERRUPT, "H_INTERRUPT"}, \ + {H_BAD_DATA, "H_BAD_DATA"}, \ + {H_NOT_ACTIVE, "H_NOT_ACTIVE"}, \ + {H_SG_LIST, "H_SG_LIST"}, \ + {H_OP_MODE, "H_OP_MODE"}, \ + {H_COP_HW, "H_COP_HW"}, \ + {H_UNSUPPORTED_FLAG_START, "H_UNSUPPORTED_FLAG_START"}, \ + {H_UNSUPPORTED_FLAG_END, "H_UNSUPPORTED_FLAG_END"}, \ + {H_MULTI_THREADS_ACTIVE, "H_MULTI_THREADS_ACTIVE"}, \ + {H_OUTSTANDING_COP_OPS, "H_OUTSTANDING_COP_OPS"} + +TRACE_EVENT(kvm_guest_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, pc) + __field(unsigned long, pending_exceptions) + __field(u8, ceded) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->ceded = vcpu->arch.ceded; + __entry->pending_exceptions = vcpu->arch.pending_exceptions; + ), + + TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d", + __entry->vcpu_id, + __entry->pc, + __entry->pending_exceptions, __entry->ceded) +); + +TRACE_EVENT(kvm_guest_exit, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, trap) + __field(unsigned long, pc) + __field(unsigned long, msr) + __field(u8, ceded) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->trap = vcpu->arch.trap; + __entry->ceded = vcpu->arch.ceded; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->msr = vcpu->arch.shregs.msr; + ), + + TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d", + __entry->vcpu_id, + __print_symbolic(__entry->trap, kvm_trace_symbol_exit), + __entry->pc, __entry->msr, __entry->ceded + ) +); + +TRACE_EVENT(kvm_page_fault_enter, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, + struct kvm_memory_slot *memslot, unsigned long ea, + unsigned long dsisr), + + TP_ARGS(vcpu, hptep, memslot, ea, dsisr), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, hpte_v) + __field(unsigned long, hpte_r) + __field(unsigned long, gpte_r) + __field(unsigned long, ea) + __field(u64, base_gfn) + __field(u32, slot_flags) + __field(u32, dsisr) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->hpte_v = hptep[0]; + __entry->hpte_r = hptep[1]; + __entry->gpte_r = hptep[2]; + __entry->ea = ea; + __entry->dsisr = dsisr; + __entry->base_gfn = memslot ? memslot->base_gfn : -1UL; + __entry->slot_flags = memslot ? memslot->flags : 0; + ), + + TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x", + __entry->vcpu_id, + __entry->hpte_v, __entry->hpte_r, __entry->gpte_r, + __entry->ea, __entry->dsisr, + __entry->base_gfn, __entry->slot_flags) +); + +TRACE_EVENT(kvm_page_fault_exit, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret), + + TP_ARGS(vcpu, hptep, ret), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, hpte_v) + __field(unsigned long, hpte_r) + __field(long, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->hpte_v = hptep[0]; + __entry->hpte_r = hptep[1]; + __entry->ret = ret; + ), + + TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx", + __entry->vcpu_id, + __entry->hpte_v, __entry->hpte_r, __entry->ret) +); + +TRACE_EVENT(kvm_hcall_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, req) + __field(unsigned long, gpr4) + __field(unsigned long, gpr5) + __field(unsigned long, gpr6) + __field(unsigned long, gpr7) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->req = kvmppc_get_gpr(vcpu, 3); + __entry->gpr4 = kvmppc_get_gpr(vcpu, 4); + __entry->gpr5 = kvmppc_get_gpr(vcpu, 5); + __entry->gpr6 = kvmppc_get_gpr(vcpu, 6); + __entry->gpr7 = kvmppc_get_gpr(vcpu, 7); + ), + + TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx", + __entry->vcpu_id, + __print_symbolic(__entry->req, kvm_trace_symbol_hcall), + __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7) +); + +TRACE_EVENT(kvm_hcall_exit, + TP_PROTO(struct kvm_vcpu *vcpu, int ret), + + TP_ARGS(vcpu, ret), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(unsigned long, ret) + __field(unsigned long, hcall_rc) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->ret = ret; + __entry->hcall_rc = kvmppc_get_gpr(vcpu, 3); + ), + + TP_printk("VCPU %d: ret=%s hcall_rc=%s", + __entry->vcpu_id, + __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret), + __print_symbolic(__entry->ret & RESUME_FLAG_HOST ? + H_TOO_HARD : __entry->hcall_rc, + kvm_trace_symbol_hcall_rc)) +); + +TRACE_EVENT(kvmppc_run_core, + TP_PROTO(struct kvmppc_vcore *vc, int where), + + TP_ARGS(vc, where), + + TP_STRUCT__entry( + __field(int, n_runnable) + __field(int, runner_vcpu) + __field(int, where) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->runner_vcpu = vc->runner->vcpu_id; + __entry->n_runnable = vc->n_runnable; + __entry->where = where; + __entry->tgid = current->tgid; + ), + + TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d", + __entry->where ? "Exit" : "Enter", + __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) +); + +TRACE_EVENT(kvmppc_vcore_blocked, + TP_PROTO(struct kvmppc_vcore *vc, int where), + + TP_ARGS(vc, where), + + TP_STRUCT__entry( + __field(int, n_runnable) + __field(int, runner_vcpu) + __field(int, where) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->runner_vcpu = vc->runner->vcpu_id; + __entry->n_runnable = vc->n_runnable; + __entry->where = where; + __entry->tgid = current->tgid; + ), + + TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d", + __entry->where ? "Exit" : "Enter", + __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) +); + +TRACE_EVENT(kvmppc_run_vcpu_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->tgid = current->tgid; + ), + + TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid) +); + +TRACE_EVENT(kvmppc_run_vcpu_exit, + TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run), + + TP_ARGS(vcpu, run), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, exit) + __field(int, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; + __entry->exit = run->exit_reason; + __entry->ret = vcpu->arch.ret; + ), + + TP_printk("VCPU %d: exit=%d, ret=%d", + __entry->vcpu_id, __entry->exit, __entry->ret) +); + +#endif /* _TRACE_KVM_HV_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index e1357cd8dc1f..810507cb688a 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -3,36 +3,13 @@ #define _TRACE_KVM_PR_H #include <linux/tracepoint.h> +#include "trace_book3s.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE trace_pr -#define kvm_trace_symbol_exit \ - {0x100, "SYSTEM_RESET"}, \ - {0x200, "MACHINE_CHECK"}, \ - {0x300, "DATA_STORAGE"}, \ - {0x380, "DATA_SEGMENT"}, \ - {0x400, "INST_STORAGE"}, \ - {0x480, "INST_SEGMENT"}, \ - {0x500, "EXTERNAL"}, \ - {0x501, "EXTERNAL_LEVEL"}, \ - {0x502, "EXTERNAL_HV"}, \ - {0x600, "ALIGNMENT"}, \ - {0x700, "PROGRAM"}, \ - {0x800, "FP_UNAVAIL"}, \ - {0x900, "DECREMENTER"}, \ - {0x980, "HV_DECREMENTER"}, \ - {0xc00, "SYSCALL"}, \ - {0xd00, "TRACE"}, \ - {0xe00, "H_DATA_STORAGE"}, \ - {0xe20, "H_INST_STORAGE"}, \ - {0xe40, "H_EMUL_ASSIST"}, \ - {0xf00, "PERFMON"}, \ - {0xf20, "ALTIVEC"}, \ - {0xf40, "VSX"} - TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), TP_ARGS(r, vcpu), diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9f342f134ae4..597562f69b2d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -12,7 +12,6 @@ CFLAGS_REMOVE_feature-fixups.o = -pg obj-y := string.o alloc.o \ crtsavres.o ppc_ksyms.o obj-$(CONFIG_PPC32) += div64.o copy_32.o -obj-$(CONFIG_HAS_IOMEM) += devres.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ usercopy_64.o mem_64.o string.o \ diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index da22c84a8fed..4a6c2cf890d9 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -13,9 +13,7 @@ void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) if (mem_init_done) p = kzalloc(size, mask); else { - p = alloc_bootmem(size); - if (p) - memset(p, 0, size); + p = memblock_virt_alloc(size, 0); } return p; } diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index c46c876ac96a..92ee840529bc 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -718,4 +718,4 @@ err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE b exit_vmx_usercopy /* tail call optimise */ -#endif /* CONFiG_ALTIVEC */ +#endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c deleted file mode 100644 index 8df55fc3aad6..000000000000 --- a/arch/powerpc/lib/devres.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2008 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/device.h> /* devres_*(), devm_ioremap_release() */ -#include <linux/gfp.h> -#include <linux/io.h> /* ioremap_prot() */ -#include <linux/export.h> /* EXPORT_SYMBOL() */ - -/** - * devm_ioremap_prot - Managed ioremap_prot() - * @dev: Generic device to remap IO address for - * @offset: BUS offset to map - * @size: Size of map - * @flags: Page flags - * - * Managed ioremap_prot(). Map is automatically unmapped on driver - * detach. - */ -void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, - size_t size, unsigned long flags) -{ - void __iomem **ptr, *addr; - - ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return NULL; - - addr = ioremap_prot(offset, size, flags); - if (addr) { - *ptr = addr; - devres_add(dev, ptr); - } else - devres_free(ptr); - - return addr; -} -EXPORT_SYMBOL(devm_ioremap_prot); diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 2ff5c142f87b..0830587df16e 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -653,4 +653,4 @@ _GLOBAL(memcpy_power7) 15: addi r1,r1,STACKFRAMESIZE ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) b exit_vmx_copy /* tail call optimise */ -#endif /* CONFiG_ALTIVEC */ +#endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 54651fc2d412..dc885b30f7a6 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1865,6 +1865,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) } goto ldst_done; +#ifdef CONFIG_PPC_FPU case LOAD_FP: if (regs->msr & MSR_LE) return 0; @@ -1873,7 +1874,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) else err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); goto ldst_done; - +#endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: if (regs->msr & MSR_LE) @@ -1919,6 +1920,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) err = write_mem(op.val, op.ea, size, regs); goto ldst_done; +#ifdef CONFIG_PPC_FPU case STORE_FP: if (regs->msr & MSR_LE) return 0; @@ -1927,7 +1929,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) else err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); goto ldst_done; - +#endif #ifdef CONFIG_ALTIVEC case STORE_VMX: if (regs->msr & MSR_LE) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 325e861616a1..438dcd3fd0d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 08d659a9fcdb..eb79907f34fa 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -43,7 +43,6 @@ #include <asm/tlbflush.h> #include <asm/siginfo.h> #include <asm/debug.h> -#include <mm/mmu_decl.h> #include "icswx.h" @@ -380,12 +379,6 @@ good_area: goto bad_area; #endif /* CONFIG_6xx */ #if defined(CONFIG_8xx) - /* 8xx sometimes need to load a invalid/non-present TLBs. - * These must be invalidated separately as linux mm don't. - */ - if (error_code & 0x40000000) /* no translation? */ - _tlbil_va(address, 0, 0, 0); - /* The MPC8xx seems to always set 0x80000000, which is * "undefined". Of those that can be set, this is the only * one which seems bad. diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c deleted file mode 100644 index d8746684f606..000000000000 --- a/arch/powerpc/mm/gup.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Lockless get_user_pages_fast for powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#undef DEBUG - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/hugetlb.h> -#include <linux/vmstat.h> -#include <linux/pagemap.h> -#include <linux/rwsem.h> -#include <asm/pgtable.h> - -#ifdef __HAVE_ARCH_PTE_SPECIAL - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - result = _PAGE_PRESENT|_PAGE_USER; - if (write) - result |= _PAGE_RW; - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - pte_t pte = ACCESS_ONCE(*ptep); - struct page *page; - /* - * Similar to the PMD case, NUMA hinting must take slow path - */ - if (pte_numa(pte)) - return 0; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - if (!page_cache_get_speculative(page)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(page); - return 0; - } - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = ACCESS_ONCE(*pmdp); - - next = pmd_addr_end(addr, end); - /* - * If we find a splitting transparent hugepage we - * return zero. That will result in taking the slow - * path which will call wait_split_huge_page() - * if the pmd is still in splitting state - */ - if (pmd_none(pmd) || pmd_trans_splitting(pmd)) - return 0; - if (pmd_huge(pmd) || pmd_large(pmd)) { - /* - * NUMA hinting faults need to be handled in the GUP - * slowpath for accounting purposes and so that they - * can be serialised against THP migration. - */ - if (pmd_numa(pmd)) - return 0; - - if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pmdp)) { - if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = ACCESS_ONCE(*pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (pud_huge(pud)) { - if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, - write, pages, nr)) - return 0; - } else if (is_hugepd(pudp)) { - if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, - addr, next, write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) - return 0; - - pr_devel(" aligned: %lx .. %lx\n", start, end); - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on powerpc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_save(flags); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = ACCESS_ONCE(*pgdp); - - pr_devel(" %016lx: normal pgd %p\n", addr, - (void *)pgd_val(pgd)); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (pgd_huge(pgd)) { - if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, - write, pages, &nr)) - break; - } else if (is_hugepd(pgdp)) { - if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, - addr, next, write, pages, &nr)) - break; - } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - int nr, ret; - - start &= PAGE_MASK; - nr = __get_user_pages_fast(start, nr_pages, write, pages); - ret = nr; - - if (nr < nr_pages) { - pr_devel(" slow path ! nr = %d\n", nr); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - } - - return ret; -} - -#endif /* __HAVE_ARCH_PTE_SPECIAL */ diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 057cbbb4c576..463174a4a647 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -46,7 +46,8 @@ /* * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize) + * pte_t *ptep, unsigned long trap, unsigned long flags, + * int ssize) * * Adds a 4K page to the hash table in a segment of 4K pages only */ @@ -298,7 +299,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* Patched by htab_finish_init() */ @@ -338,8 +339,8 @@ htab_pte_insert_failure: *****************************************************************************/ /* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize, - * int subpg_prot) + * pte_t *ptep, unsigned long trap, unsigned local flags, + * int ssize, int subpg_prot) */ /* @@ -514,7 +515,7 @@ htab_insert_pte: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- htab_special_pfn - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 htab_special_pfn: sldi r5,r5,HW_PAGE_SHIFT @@ -544,7 +545,7 @@ htab_call_hpte_insert1: andis. r0,r31,_PAGE_4K_PFN@h srdi r5,r31,PTE_RPN_SHIFT bne- 3f - sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + sldi r5,r5,PAGE_FACTOR add r5,r5,r25 3: sldi r5,r5,HW_PAGE_SHIFT @@ -594,7 +595,7 @@ htab_inval_old_hpte: li r5,0 /* PTE.hidx */ li r6,MMU_PAGE_64K /* psize */ ld r7,STK_PARAM(R9)(r1) /* ssize */ - ld r8,STK_PARAM(R8)(r1) /* local */ + ld r8,STK_PARAM(R8)(r1) /* flags */ bl flush_hash_page /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */ lis r0,_PAGE_HPTE_SUB@h @@ -666,7 +667,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ @@ -962,7 +963,7 @@ ht64_modify_pte: li r6,MMU_PAGE_64K /* base page size */ li r7,MMU_PAGE_64K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl ht64_call_hpte_updatepp ht64_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ae4962a06476..9c4880ddecd6 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -283,19 +283,17 @@ static long native_hpte_remove(unsigned long hpte_group) static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int bpsize, - int apsize, int ssize, int local) + int apsize, int ssize, unsigned long flags) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; - int ret = 0; + int ret = 0, local = 0; want_v = hpte_encode_avpn(vpn, bpsize, ssize); DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", vpn, want_v & HPTE_V_AVPN, slot, newpp); - native_lock_hpte(hptep); - hpte_v = be64_to_cpu(hptep->v); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -308,15 +306,30 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> miss\n"); ret = -1; } else { - DBG_LOW(" -> hit\n"); - /* Update the HPTE */ - hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C))); + native_lock_hpte(hptep); + /* recheck with locks held */ + hpte_v = be64_to_cpu(hptep->v); + if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || + !(hpte_v & HPTE_V_VALID))) { + ret = -1; + } else { + DBG_LOW(" -> hit\n"); + /* Update the HPTE */ + hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & + ~(HPTE_R_PP | HPTE_R_N)) | + (newpp & (HPTE_R_PP | HPTE_R_N | + HPTE_R_C))); + } + native_unlock_hpte(hptep); } - native_unlock_hpte(hptep); - /* Ensure it is out of the tlb too. */ - tlbie(vpn, bpsize, apsize, ssize, local); + if (flags & HPTE_LOCAL_UPDATE) + local = 1; + /* + * Ensure it is out of the tlb too if it is not a nohpte fault + */ + if (!(flags & HPTE_NOHPTE_UPDATE)) + tlbie(vpn, bpsize, apsize, ssize, local); return ret; } @@ -419,7 +432,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, static void native_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i; struct hash_pte *hptep; @@ -465,7 +478,7 @@ static void native_hugepage_invalidate(unsigned long vsid, * instruction compares entry_VA in tlb with the VA specified * here */ - tlbie(vpn, psize, actual_psize, ssize, 0); + tlbie(vpn, psize, actual_psize, ssize, local); } local_irq_restore(flags); } @@ -629,7 +642,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index d5339a3b9945..2c2022d16059 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; @@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; - int rc, user_region = 0, local = 0; + int rc, user_region = 0; int psize, ssize; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", @@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; #ifndef CONFIG_PPC_64K_PAGES /* If we use 4K pages and our psize is not 4K, then we might @@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u if (hugeshift) { if (pmd_trans_huge(*(pmd_t *)ptep)) rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, - trap, local, ssize, psize); + trap, flags, ssize, psize); #ifdef CONFIG_HUGETLB_PAGE else rc = __hash_page_huge(ea, access, vsid, ptep, trap, - local, ssize, hugeshift, psize); + flags, ssize, hugeshift, psize); #else else { /* @@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u #ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ { @@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u rc = -2; else rc = __hash_page_4K(ea, access, vsid, ptep, trap, - local, ssize, spp); + flags, ssize, spp); } /* Dump some info in case of hash insertion failure, they should @@ -1181,14 +1184,19 @@ bail: } EXPORT_SYMBOL_GPL(hash_page_mm); -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr) { + unsigned long flags = 0; struct mm_struct *mm = current->mm; if (REGION_ID(ea) == VMALLOC_REGION_ID) mm = &init_mm; - return hash_page_mm(mm, ea, access, trap); + if (dsisr & DSISR_NOHPTE) + flags |= HPTE_NOHPTE_UPDATE; + + return hash_page_mm(mm, ea, access, trap, flags); } EXPORT_SYMBOL_GPL(hash_page); @@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, pgd_t *pgdir; pte_t *ptep; unsigned long flags; - int rc, ssize, local = 0; + int rc, ssize, update_flags = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Is that local to this CPU ? */ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - local = 1; + update_flags |= HPTE_LOCAL_UPDATE; /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + update_flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(mm, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags, + ssize, subpage_protection(mm, ea)); /* Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do @@ -1278,9 +1287,10 @@ out_exit: * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, - int local) + unsigned long flags) { unsigned long hash, index, shift, hidx, slot; + int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { @@ -1315,6 +1325,78 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #endif } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_hash_hugepage(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + int local = flags & HPTE_LOCAL_UPDATE; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) { + ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); + goto tm_abort; + } + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, local); + } +tm_abort: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Transactions are not aborted by tlbiel, only tlbie. + * Without, syncing a page back to a block device w/ PIO could pick up + * transactional data (bad!) so we force an abort here. Before the + * sync the page will be made read-only, which will flush_hash_page. + * BIG ISSUE here: if the kernel uses a page from userspace without + * unmapping it first, it may see the speculated version. + */ + if (local && cpu_has_feature(CPU_FTR_TM) && + current->thread.regs && + MSR_TM_ACTIVE(current->thread.regs->msr)) { + tm_enable(); + tm_abort(TM_CAUSE_TLBI); + } +#endif +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + void flush_hash_range(unsigned long number, int local) { if (ppc_md.flush_hash_range) @@ -1322,7 +1404,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - &__get_cpu_var(ppc64_tlb_batch); + this_cpu_ptr(&ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], @@ -1432,7 +1514,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) mmu_kernel_ssize, 0); } -void kernel_map_pages(struct page *page, int numpages, int enable) +void __kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long flags, vaddr, lmi; int i; diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 5f5e6328c21c..86686514ae13 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,60 +18,9 @@ #include <linux/mm.h> #include <asm/machdep.h> -static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize) -{ - int i, max_hpte_count, valid; - unsigned long s_addr; - unsigned char *hpte_slot_array; - unsigned long hidx, shift, vpn, hash, slot; - - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; - - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } -} - - int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, - pmd_t *pmdp, unsigned long trap, int local, int ssize, - unsigned int psize) + pmd_t *pmdp, unsigned long trap, unsigned long flags, + int ssize, unsigned int psize) { unsigned int index, valid; unsigned char *hpte_slot_array; @@ -145,7 +94,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * hash page table entries. */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) - invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, + ssize, flags); } valid = hpte_valid(hpte_slot_array, index); @@ -158,7 +108,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot += hidx & _PTEIDX_GROUP_IX; ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, local); + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 5e4ee2573903..ba47aaf33a4b 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = __get_cpu_var(next_tlbcam_idx); + index = this_cpu_read(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + __this_cpu_write(next_tlbcam_idx, tlbcam_index); else - __get_cpu_var(next_tlbcam_idx)++; + __this_cpu_inc(next_tlbcam_idx); return index; } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index a5bcf9301196..d94b1af53a93 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -19,8 +19,8 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long vflags, int psize, int ssize); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize) + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize) { unsigned long vpn; unsigned long old_pte, new_pte; @@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, local) == -1) + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..5ff4e07d920a 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -62,6 +62,9 @@ static unsigned nr_gpages; /* * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD; + * + * Defined in such a way that we can optimize away code block at build time + * if CONFIG_HUGETLB_PAGE=n. */ int pmd_huge(pmd_t pmd) { @@ -230,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #else @@ -270,13 +273,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) return NULL; - return hugepte_offset(hpdp, addr, pdshift); + return hugepte_offset(*hpdp, addr, pdshift); } #endif #ifdef CONFIG_PPC_FSL_BOOK3E /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { @@ -312,7 +315,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) * If gpages can be in highmem we can't use the trick of storing the * data structure in the page; allocate space for this */ - m = alloc_bootmem(sizeof(struct huge_bootmem_page)); + m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0); m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; #else m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); @@ -352,6 +355,13 @@ static int __init do_gpage_early_setup(char *param, char *val, if (size != 0) { if (sscanf(val, "%lu", &npages) <= 0) npages = 0; + if (npages > MAX_NUMBER_GPAGES) { + pr_warn("MMU: %lu pages requested for page " + "size %llu KB, limiting to " + __stringify(MAX_NUMBER_GPAGES) "\n", + npages, size / 1024); + npages = MAX_NUMBER_GPAGES; + } gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; size = 0; } @@ -399,7 +409,7 @@ void __init reserve_hugetlb_gpages(void) #else /* !PPC_FSL_BOOK3E */ /* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy or bootmem allocator is setup. + * boot before the buddy allocator is setup. */ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { @@ -462,7 +472,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = &get_cpu_var(hugepd_freelist_cur); + batchp = this_cpu_ptr(&hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), @@ -517,8 +527,6 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif for (i = 0; i < num_hugepd; i++, hpdp++) hpdp->pd = 0; - tlb->need_flush = 1; - #ifdef CONFIG_PPC_FSL_BOOK3E hugepd_free(tlb, hugepte); #else @@ -538,7 +546,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, do { pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); - if (!is_hugepd(pmd)) { + if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { /* * if it is not hugepd pointer, we should already find * it cleared. @@ -587,7 +595,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, do { pud = pud_offset(pgd, addr); next = pud_addr_end(addr, end); - if (!is_hugepd(pud)) { + if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) continue; hugetlb_free_pmd_range(tlb, pud, addr, next, floor, @@ -653,7 +661,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); - if (!is_hugepd(pgd)) { + if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); @@ -713,12 +721,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, - unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, + unsigned long end, int write, struct page **pages, int *nr) { pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(*hugepd); + unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); @@ -961,7 +968,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pgd_huge(pgd)) { ret_pte = (pte_t *) pgdp; goto out; - } else if (is_hugepd(&pgd)) + } else if (is_hugepd(__hugepd(pgd_val(pgd)))) hpdp = (hugepd_t *)&pgd; else { /* @@ -978,7 +985,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift else if (pud_huge(pud)) { ret_pte = (pte_t *) pudp; goto out; - } else if (is_hugepd(&pud)) + } else if (is_hugepd(__hugepd(pud_val(pud)))) hpdp = (hugepd_t *)&pud; else { pdshift = PMD_SHIFT; @@ -999,7 +1006,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (pmd_huge(pmd) || pmd_large(pmd)) { ret_pte = (pte_t *) pmdp; goto out; - } else if (is_hugepd(&pmd)) + } else if (is_hugepd(__hugepd(pmd_val(pmd)))) hpdp = (hugepd_t *)&pmd; else return pte_offset_kernel(&pmd, ea); @@ -1008,7 +1015,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (!hpdp) return NULL; - ret_pte = hugepte_offset(hpdp, ea, pdshift); + ret_pte = hugepte_offset(*hpdp, ea, pdshift); pdshift = hugepd_shift(*hpdp); out: if (shift) @@ -1038,14 +1045,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, if ((pte_val(pte) & mask) != mask) return 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* - * check for splitting here - */ - if (pmd_trans_splitting(pte_pmd(pte))) - return 0; -#endif - /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index cad68ff8eca5..a10be665b645 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -26,7 +26,6 @@ #include <linux/mm.h> #include <linux/stddef.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/pagemap.h> @@ -103,7 +102,7 @@ unsigned long __max_low_memory = MAX_LOW_MEM; /* * Check for command-line options that affect what MMU_init will do. */ -void MMU_setup(void) +void __init MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ if (strstr(boot_command_line, "nobats")) { @@ -195,15 +194,6 @@ void __init MMU_init(void) memblock_set_current_limit(lowmem_end_addr); } -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - if (init_bootmem_done) - return alloc_bootmem_pages(PAGE_SIZE); - else - return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); -} - #ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 3481556a1880..10471f9bb63f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -34,7 +34,6 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/delay.h> -#include <linux/bootmem.h> #include <linux/highmem.h> #include <linux/idr.h> #include <linux/nodemask.h> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8ebaac75c940..b7285a5870f8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -35,6 +35,7 @@ #include <linux/memblock.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -60,7 +61,6 @@ #define CPU_FTR_NOEXECUTE 0 #endif -int init_bootmem_done; int mem_init_done; unsigned long long memory_limit; @@ -144,8 +144,17 @@ int arch_remove_memory(u64 start, u64 size) zone = page_zone(pfn_to_page(start_pfn)); ret = __remove_pages(zone, start_pfn, nr_pages); - if (!ret && (ppc_md.remove_memory)) - ret = ppc_md.remove_memory(start, size); + if (ret) + return ret; + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); + ret = remove_section_mapping(start, start + size); + + /* Ensure all vmalloc mappings are flushed in case they also + * hit that section of memory + */ + vm_unmap_aliases(); return ret; } @@ -180,70 +189,23 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, } EXPORT_SYMBOL_GPL(walk_system_ram_range); -/* - * Initialize the bootmem system and give it all the memory we - * have available. If we are using highmem, we only put the - * lowmem into the bootmem system. - */ #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init do_init_bootmem(void) +void __init initmem_init(void) { - unsigned long start, bootmap_pages; - unsigned long total_pages; - struct memblock_region *reg; - int boot_mapsize; - max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; - total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; + min_low_pfn = MEMORY_START >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM - total_pages = total_lowmem >> PAGE_SHIFT; max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; #endif - /* - * Find an area to use for the bootmem bitmap. Calculate the size of - * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. - * Add 1 additional page in case the address isn't page-aligned. - */ - bootmap_pages = bootmem_bootmap_pages(total_pages); - - start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); - - min_low_pfn = MEMORY_START >> PAGE_SHIFT; - boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); - /* Place all memblock_regions in the same node and merge contiguous * memblock_regions */ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); - /* Add all physical memory to the bootmem map, mark each area - * present. - */ -#ifdef CONFIG_HIGHMEM - free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) { - unsigned long top = reg->base + reg->size - 1; - if (top < lowmem_end_addr) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - else if (reg->base < lowmem_end_addr) { - unsigned long trunc_size = lowmem_end_addr - reg->base; - reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT); - } - } -#else - free_bootmem_with_active_regions(0, max_pfn); - - /* reserve the sections we're already using */ - for_each_memblock(reserved, reg) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); -#endif /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); - - init_bootmem_done = 1; + sparse_init(); } /* mark pages that don't exist as nosave */ @@ -359,14 +321,6 @@ void __init paging_init(void) mark_nonram_nosave(); } -static void __init register_page_bootmem_info(void) -{ - int i; - - for_each_online_node(i) - register_page_bootmem_info_node(NODE_DATA(i)); -} - void __init mem_init(void) { /* @@ -379,7 +333,6 @@ void __init mem_init(void) swiotlb_init(0); #endif - register_page_bootmem_info(); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); set_max_mapnr(max_pfn); free_all_bootmem(); diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 928ebe79668b..9cba6cba2e50 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -421,12 +421,12 @@ void __init mmu_context_init(void) /* * Allocate the maps used by context management */ - context_map = alloc_bootmem(CTX_MAP_SIZE); - context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); + context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0); + context_mm = memblock_virt_alloc(sizeof(void *) * (last_context + 1), 0); #ifndef CONFIG_SMP - stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[0] = memblock_virt_alloc(CTX_MAP_SIZE, 0); #else - stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE); + stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); register_cpu_notifier(&mmu_context_cpu_nb); #endif diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 9615d82919b8..78c45f392f5b 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid, { __tlbil_va(address, pid); } -#endif /* CONIFG_8xx */ +#endif /* CONFIG_8xx */ #if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x) extern void _tlbivax_bcast(unsigned long address, unsigned int pid, diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b9d1dfdbe5bb..0257a7d659ef 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -134,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } -/* - * get_node_active_region - Return active region containing pfn - * Active range returned is empty if none found. - * @pfn: The page to return the region for - * @node_ar: Returned set to the active region containing @pfn - */ -static void __init get_node_active_region(unsigned long pfn, - struct node_active_region *node_ar) -{ - unsigned long start_pfn, end_pfn; - int i, nid; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - if (pfn >= start_pfn && pfn < end_pfn) { - node_ar->nid = nid; - node_ar->start_pfn = start_pfn; - node_ar->end_pfn = end_pfn; - break; - } - } -} - static void reset_numa_cpu_lookup_table(void) { unsigned int cpu; @@ -928,134 +906,48 @@ static void __init dump_numa_memory_topology(void) } } -/* - * Allocate some memory, satisfying the memblock or bootmem allocator where - * required. nid is the preferred node and end is the physical address of - * the highest address in the node. - * - * Returns the virtual address of the memory. - */ -static void __init *careful_zallocation(int nid, unsigned long size, - unsigned long align, - unsigned long end_pfn) -{ - void *ret; - int new_nid; - unsigned long ret_paddr; - - ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); - - /* retry over all memory */ - if (!ret_paddr) - ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); - - if (!ret_paddr) - panic("numa.c: cannot allocate %lu bytes for node %d", - size, nid); - - ret = __va(ret_paddr); - - /* - * We initialize the nodes in numeric order: 0, 1, 2... - * and hand over control from the MEMBLOCK allocator to the - * bootmem allocator. If this function is called for - * node 5, then we know that all nodes <5 are using the - * bootmem allocator instead of the MEMBLOCK allocator. - * - * So, check the nid from which this allocation came - * and double check to see if we need to use bootmem - * instead of the MEMBLOCK. We don't free the MEMBLOCK memory - * since it would be useless. - */ - new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); - if (new_nid < nid) { - ret = __alloc_bootmem_node(NODE_DATA(new_nid), - size, align, 0); - - dbg("alloc_bootmem %p %lx\n", ret, size); - } - - memset(ret, 0, size); - return ret; -} - static struct notifier_block ppc64_numa_nb = { .notifier_call = cpu_numa_callback, .priority = 1 /* Must run before sched domains notifier. */ }; -static void __init mark_reserved_regions_for_nid(int nid) +/* Initialize NODE_DATA for a node on the local memory */ +static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { - struct pglist_data *node = NODE_DATA(nid); - struct memblock_region *reg; - - for_each_memblock(reserved, reg) { - unsigned long physbase = reg->base; - unsigned long size = reg->size; - unsigned long start_pfn = physbase >> PAGE_SHIFT; - unsigned long end_pfn = PFN_UP(physbase + size); - struct node_active_region node_ar; - unsigned long node_end_pfn = pgdat_end_pfn(node); - - /* - * Check to make sure that this memblock.reserved area is - * within the bounds of the node that we care about. - * Checking the nid of the start and end points is not - * sufficient because the reserved area could span the - * entire node. - */ - if (end_pfn <= node->node_start_pfn || - start_pfn >= node_end_pfn) - continue; - - get_node_active_region(start_pfn, &node_ar); - while (start_pfn < end_pfn && - node_ar.start_pfn < node_ar.end_pfn) { - unsigned long reserve_size = size; - /* - * if reserved region extends past active region - * then trim size to active region - */ - if (end_pfn > node_ar.end_pfn) - reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - - physbase; - /* - * Only worry about *this* node, others may not - * yet have valid NODE_DATA(). - */ - if (node_ar.nid == nid) { - dbg("reserve_bootmem %lx %lx nid=%d\n", - physbase, reserve_size, node_ar.nid); - reserve_bootmem_node(NODE_DATA(node_ar.nid), - physbase, reserve_size, - BOOTMEM_DEFAULT); - } - /* - * if reserved region is contained in the active region - * then done. - */ - if (end_pfn <= node_ar.end_pfn) - break; - - /* - * reserved region extends past the active region - * get next active region that contains this - * reserved region - */ - start_pfn = node_ar.end_pfn; - physbase = start_pfn << PAGE_SHIFT; - size = size - reserve_size; - get_node_active_region(start_pfn, &node_ar); - } - } + u64 spanned_pages = end_pfn - start_pfn; + const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES); + u64 nd_pa; + void *nd; + int tnid; + + if (spanned_pages) + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", + nid, start_pfn << PAGE_SHIFT, + (end_pfn << PAGE_SHIFT) - 1); + else + pr_info("Initmem setup node %d\n", nid); + + nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd = __va(nd_pa); + + /* report and initialize */ + pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); + tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); + if (tnid != nid) + pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid); + + node_data[nid] = nd; + memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); + NODE_DATA(nid)->node_id = nid; + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = spanned_pages; } - -void __init do_init_bootmem(void) +void __init initmem_init(void) { int nid, cpu; - min_low_pfn = 0; max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn; @@ -1064,64 +956,18 @@ void __init do_init_bootmem(void) else dump_numa_memory_topology(); + memblock_dump_all(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; - void *bootmem_vaddr; - unsigned long bootmap_pages; get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); - - /* - * Allocate the node structure node local if possible - * - * Be careful moving this around, as it relies on all - * previous nodes' bootmem to be initialized and have - * all reserved areas marked. - */ - NODE_DATA(nid) = careful_zallocation(nid, - sizeof(struct pglist_data), - SMP_CACHE_BYTES, end_pfn); - - dbg("node %d\n", nid); - dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; - NODE_DATA(nid)->node_start_pfn = start_pfn; - NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; - - if (NODE_DATA(nid)->node_spanned_pages == 0) - continue; - - dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT); - dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT); - - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_vaddr = careful_zallocation(nid, - bootmap_pages << PAGE_SHIFT, - PAGE_SIZE, end_pfn); - - dbg("bootmap_vaddr = %p\n", bootmem_vaddr); - - init_bootmem_node(NODE_DATA(nid), - __pa(bootmem_vaddr) >> PAGE_SHIFT, - start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - /* - * Be very careful about moving this around. Future - * calls to careful_zallocation() depend on this getting - * done correctly. - */ - mark_reserved_regions_for_nid(nid); + setup_node_data(nid, start_pfn, end_pfn); sparse_memory_present_with_active_regions(nid); } - init_bootmem_done = 1; + sparse_init(); - /* - * Now bootmem is initialised we can create the node to cpumask - * lookup tables and setup the cpu callback to populate them. - */ setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); @@ -1711,12 +1557,11 @@ static void stage_topology_update(int core_id) static int dt_update_callback(struct notifier_block *nb, unsigned long action, void *data) { - struct of_prop_reconfig *update; + struct of_reconfig_data *update = data; int rc = NOTIFY_DONE; switch (action) { case OF_RECONFIG_UPDATE_PROPERTY: - update = (struct of_prop_reconfig *)data; if (!of_prop_cmp(update->dn->type, "cpu") && !of_prop_cmp(update->prop->name, "ibm,associativity")) { u32 core_id; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cf11342bf519..50fad3801f30 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -100,12 +100,11 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add { pte_t *pte; extern int mem_init_done; - extern void *early_get_page(void); if (mem_init_done) { pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); } else { - pte = (pte_t *)early_get_page(); + pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) clear_page(pte); } @@ -430,7 +429,7 @@ static int change_page_attr(struct page *page, int numpages, pgprot_t prot) } -void kernel_map_pages(struct page *page, int numpages, int enable) +void __kernel_map_pages(struct page *page, int numpages, int enable) { if (PageHighMem(page)) return; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index c8d709ab489d..4fe5f64cc179 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,9 +33,9 @@ #include <linux/swap.h> #include <linux/stddef.h> #include <linux/vmalloc.h> -#include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/slab.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -51,6 +51,7 @@ #include <asm/cputable.h> #include <asm/sections.h> #include <asm/firmware.h> +#include <asm/dma.h> #include "mmu_decl.h" @@ -75,11 +76,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) { void *pt; - if (init_bootmem_done) - pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); - else - pt = __va(memblock_alloc_base(size, size, - __pa(MAX_DMA_ADDRESS))); + pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); memset(pt, 0, size); return pt; @@ -113,10 +110,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, int flags) __pgprot(flags))); } else { #ifdef CONFIG_PPC_MMU_NOHASH - /* Warning ! This will blow up if bootmem is not initialized - * which our ppc64 code is keen to do that, we'll need to - * fix it and/or be more careful - */ pgdp = pgd_offset_k(ea); #ifdef PUD_TABLE_SIZE if (pgd_none(*pgdp)) { @@ -352,16 +345,31 @@ EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(__iounmap); EXPORT_SYMBOL(__iounmap_at); +#ifndef __PAGETABLE_PUD_FOLDED +/* 4 level page table */ +struct page *pgd_page(pgd_t pgd) +{ + if (pgd_huge(pgd)) + return pte_page(pgd_pte(pgd)); + return virt_to_page(pgd_page_vaddr(pgd)); +} +#endif + +struct page *pud_page(pud_t pud) +{ + if (pud_huge(pud)) + return pte_page(pud_pte(pud)); + return virt_to_page(pud_page_vaddr(pud)); +} + /* * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. */ struct page *pmd_page(pmd_t pmd) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(pmd)) + if (pmd_trans_huge(pmd) || pmd_huge(pmd)) return pfn_to_page(pmd_pfn(pmd)); -#endif return virt_to_page(pmd_page_vaddr(pmd)); } @@ -731,29 +739,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, i; - unsigned long s_addr; - int max_hpte_count; - unsigned int psize, valid; - unsigned char *hpte_slot_array; - unsigned long hidx, vpn, vsid, hash, shift, slot; - - /* - * Flush all the hptes mapping this hugepage - */ - s_addr = addr & HPAGE_PMD_MASK; - hpte_slot_array = get_hpte_slot_array(pmdp); - /* - * IF we try to do a HUGE PTE update after a withdraw is done. - * we will find the below NULL. This happens when we do - * split_huge_page_pmd - */ - if (!hpte_slot_array) - return; + int ssize; + unsigned int psize; + unsigned long vsid; + unsigned long flags = 0; + const struct cpumask *tmp; /* get the base page size,vsid and segment size */ #ifdef CONFIG_DEBUG_VM - psize = get_slice_psize(mm, s_addr); + psize = get_slice_psize(mm, addr); BUG_ON(psize == MMU_PAGE_16M); #endif if (old_pmd & _PAGE_COMBO) @@ -761,46 +755,20 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, else psize = MMU_PAGE_64K; - if (!is_kernel_addr(s_addr)) { - ssize = user_segment_size(s_addr); - vsid = get_vsid(mm->context.id, s_addr, ssize); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); WARN_ON(vsid == 0); } else { - vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } - if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(vsid, s_addr, - hpte_slot_array, - psize, ssize); - /* - * No bluk hpte removal support, invalidate each entry - */ - shift = mmu_psize_defs[psize].shift; - max_hpte_count = HPAGE_PMD_SIZE >> shift; - for (i = 0; i < max_hpte_count; i++) { - /* - * 8 bits per each hpte entries - * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] - */ - valid = hpte_valid(hpte_slot_array, i); - if (!valid) - continue; - hidx = hpte_hash_index(hpte_slot_array, i); - - /* get the vpn */ - addr = s_addr + (i * (1ul << shift)); - vpn = hpt_vpn(addr, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, 0); - } + tmp = cpumask_of(smp_processor_id()); + if (cpumask_equal(mm_cpumask(mm), tmp)) + flags |= HPTE_LOCAL_UPDATE; + + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 9aee27c582dc..c406aa95b2bc 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -87,6 +87,9 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) + +#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) #define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \ @@ -96,6 +99,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \ ___PPC_RA(base) | ___PPC_RB(b)) /* Convenience helpers for the above with 'far' offsets: */ +#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i); \ + else { PPC_ADDIS(r, base, IMM_HA(i)); \ + PPC_LBZ(r, r, IMM_L(i)); } } while(0) + #define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \ else { PPC_ADDIS(r, base, IMM_HA(i)); \ PPC_LD(r, r, IMM_L(i)); } } while(0) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index cbae2dfd053c..1ca125b9c226 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -181,6 +181,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, } break; case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ ctx->seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); if (ctx->pc_ret0 != -1) { @@ -190,9 +191,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LI(r_ret, 0); PPC_JMP(exit_addr); } - PPC_DIVWU(r_scratch1, r_A, r_X); - PPC_MUL(r_scratch1, r_X, r_scratch1); - PPC_SUB(r_A, r_A, r_scratch1); + if (code == (BPF_ALU | BPF_MOD | BPF_X)) { + PPC_DIVWU(r_scratch1, r_A, r_X); + PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_SUB(r_A, r_A, r_scratch1); + } else { + PPC_DIVWU(r_A, r_A, r_X); + } break; case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ PPC_LI32(r_scratch2, K); @@ -200,22 +205,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_MUL(r_scratch1, r_scratch2, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); break; - case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ - ctx->seen |= SEEN_XREG; - PPC_CMPWI(r_X, 0); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - /* - * Exit, returning 0; first pass hits here - * (longer worst-case code size). - */ - PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); - PPC_LI(r_ret, 0); - PPC_JMP(exit_addr); - } - PPC_DIVWU(r_A, r_A, r_X); - break; case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; @@ -361,6 +350,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, protocol)); break; case BPF_ANC | SKF_AD_IFINDEX: + case BPF_ANC | SKF_AD_HATYPE: + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + ifindex) != 4); + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, + type) != 2); PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); PPC_CMPDI(r_scratch1, 0); @@ -368,14 +362,18 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); } else { /* Exit, returning 0; first pass hits here. */ - PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); + PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); PPC_LI(r_ret, 0); PPC_JMP(exit_addr); } - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, - ifindex) != 4); - PPC_LWZ_OFFS(r_A, r_scratch1, + if (code == (BPF_ANC | SKF_AD_IFINDEX)) { + PPC_LWZ_OFFS(r_A, r_scratch1, offsetof(struct net_device, ifindex)); + } else { + PPC_LHZ_OFFS(r_A, r_scratch1, + offsetof(struct net_device, type)); + } + break; case BPF_ANC | SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); @@ -407,6 +405,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, queue_mapping)); break; + case BPF_ANC | SKF_AD_PKTTYPE: + PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); + PPC_ANDI(r_A, r_A, PKT_TYPE_MAX); + PPC_SRWI(r_A, r_A, 5); + break; case BPF_ANC | SKF_AD_CPU: #ifdef CONFIG_SMP /* diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6adf55fa5d88..ecc66d5f02c9 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -10,7 +10,7 @@ #include <linux/oprofile.h> #include <linux/sched.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/compat.h> #include <asm/oprofile_impl.h> @@ -105,6 +105,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) first_frame = 0; } } else { + pagefault_disable(); #ifdef CONFIG_PPC64 if (!is_32bit_task()) { while (depth--) { @@ -113,7 +114,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) break; first_frame = 0; } - + pagefault_enable(); return; } #endif @@ -124,5 +125,6 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) break; first_frame = 0; } + pagefault_enable(); } } diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 28f1af2db1f5..1c27831df1ac 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -331,8 +331,7 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, if (mm->exe_file) { app_cookie = fast_get_dcookie(&mm->exe_file->f_path); - pr_debug("got dcookie for %s\n", - mm->exe_file->f_dentry->d_name.name); + pr_debug("got dcookie for %pD\n", mm->exe_file); } for (vma = mm->mmap; vma; vma = vma->vm_next) { @@ -342,15 +341,14 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, if (!vma->vm_file) goto fail_no_image_cookie; - pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n", - my_offset, spu_ref, - vma->vm_file->f_dentry->d_name.name); + pr_debug("Found spu ELF at %X(object-id:%lx) for file %pD\n", + my_offset, spu_ref, vma->vm_file); *offsetp = my_offset; break; } *spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path); - pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name); + pr_debug("got dcookie for %pD\n", vma->vm_file); up_read(&mm->mmap_sem); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a6995d4e93d4..7c4f6690533a 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -339,7 +339,7 @@ static void power_pmu_bhrb_reset(void) static void power_pmu_bhrb_enable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -354,7 +354,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) static void power_pmu_bhrb_disable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -1144,7 +1144,7 @@ static void power_pmu_disable(struct pmu *pmu) if (!ppmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { /* @@ -1211,7 +1211,7 @@ static void power_pmu_enable(struct pmu *pmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -1403,7 +1403,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * Add the event to the list (if there is room) * and check whether the total set is still feasible. */ - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n0 = cpuhw->n_events; if (n0 >= ppmu->n_counter) goto out; @@ -1469,7 +1469,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) power_pmu_read(event); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); for (i = 0; i < cpuhw->n_events; ++i) { if (event == cpuhw->event[i]) { while (++i < cpuhw->n_events) { @@ -1575,7 +1575,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) */ static void power_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1589,7 +1589,7 @@ static void power_pmu_start_txn(struct pmu *pmu) */ static void power_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1607,7 +1607,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1964,7 +1964,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(cpuhw); data.br_stack = &cpuhw->bhrb_stack; } @@ -2037,7 +2037,7 @@ static bool pmc_overflow(unsigned long val) static void perf_event_interrupt(struct pt_regs *regs) { int i, j; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val[8]; int found, active; diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index d35ae52c69dc..4acaea01fe03 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -210,7 +210,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { cpuhw->disabled = 1; @@ -249,7 +249,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -653,7 +653,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, static void perf_event_interrupt(struct pt_regs *regs) { int i; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val; int found = 0; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index dba34088da28..f162d0b8eea3 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -177,7 +177,7 @@ static ssize_t _name##_show(struct device *dev, \ } \ ret = sprintf(buf, _fmt, _expr); \ e_free: \ - kfree(page); \ + kmem_cache_free(hv_page_cache, page); \ return ret; \ } \ static DEVICE_ATTR_RO(_name) @@ -217,11 +217,14 @@ static bool is_physical_domain(int domain) domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE; } +DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096); +DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096); + static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, u16 lpar, u64 *res, bool success_expected) { - unsigned long ret = -ENOMEM; + unsigned long ret; /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, BUILD_BUG_ON(sizeof(*request_buffer) > 4096); BUILD_BUG_ON(sizeof(*result_buffer) > 4096); - request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); - if (!request_buffer) - goto out; + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); - result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER); - if (!result_buffer) - goto out_free_request_buffer; + memset(request_buffer, 0, 4096); + memset(result_buffer, 0, 4096); *request_buffer = (struct reqb) { .buf = { @@ -278,15 +279,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, domain, offset, ix, lpar, ret, ret, result_buffer->buf.detailed_rc, result_buffer->buf.failing_request_ix); - goto out_free_result_buffer; + goto out; } *res = be64_to_cpu(result_buffer->result); -out_free_result_buffer: - kfree(result_buffer); -out_free_request_buffer: - kfree(request_buffer); out: return ret; } diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 82f2da28cd27..d2ac1c116454 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -220,7 +220,6 @@ config AKEBONO select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD select MMC_SDHCI select MMC_SDHCI_PLTFM - select MMC_SDHCI_OF_476GTR select ATA select SATA_AHCI_PLATFORM help diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 58db9d083969..c11ce6516c8f 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -94,7 +94,7 @@ static int avr_probe(struct i2c_client *client, { avr_i2c_client = client; ppc_md.restart = avr_reset_system; - ppc_md.power_off = avr_power_off_system; + pm_power_off = avr_power_off_system; return 0; } diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index e996e007bc44..711f3d352af7 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -18,7 +18,7 @@ #include <linux/irq.h> #include <linux/of_platform.h> #include <linux/fsl-diu-fb.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <sysdev/fsl_soc.h> #include <asm/cacheflush.h> @@ -297,14 +297,13 @@ static void __init mpc512x_setup_diu(void) * and so negatively affect boot time. Instead we reserve the * already configured frame buffer area so that it won't be * destroyed. The starting address of the area to reserve and - * also it's length is passed to reserve_bootmem(). It will be + * also it's length is passed to memblock_reserve(). It will be * freed later on first open of fbdev, when splash image is not * needed any more. */ if (diu_shared_fb.in_use) { - ret = reserve_bootmem(diu_shared_fb.fb_phys, - diu_shared_fb.fb_len, - BOOTMEM_EXCLUSIVE); + ret = memblock_reserve(diu_shared_fb.fb_phys, + diu_shared_fb.fb_len); if (ret) { pr_err("%s: reserve bootmem failed\n", __func__); diu_shared_fb.in_use = false; diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 3feffde9128d..6af651e69129 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -212,6 +212,8 @@ static int __init efika_probe(void) DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; + pm_power_off = rtas_power_off; + return 1; } @@ -225,7 +227,6 @@ define_machine(efika) .init_IRQ = mpc52xx_init_irq, .get_irq = mpc52xx_get_irq, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .set_rtc_time = rtas_set_rtc_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 692998244d2c..c949ca055712 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -782,7 +782,6 @@ static const struct of_device_id mpc52xx_gpt_match[] = { static struct platform_driver mpc52xx_gpt_driver = { .driver = { .name = "mpc52xx-gpt", - .owner = THIS_MODULE, .of_match_table = mpc52xx_gpt_match, }, .probe = mpc52xx_gpt_probe, diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index f8f0081759fb..251dcb90ef34 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -572,7 +572,6 @@ static const struct of_device_id mpc52xx_lpbfifo_match[] = { static struct platform_driver mpc52xx_lpbfifo_driver = { .driver = { .name = "mpc52xx-lpbfifo", - .owner = THIS_MODULE, .of_match_table = mpc52xx_lpbfifo_match, }, .probe = mpc52xx_lpbfifo_probe, diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 3d0c3a01143d..a0cb8bd41958 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -169,7 +169,6 @@ static const struct of_device_id ep8248e_mdio_match[] = { static struct platform_driver ep8248e_mdio_driver = { .driver = { .name = "ep8248e-mdio-bitbang", - .owner = THIS_MODULE, .of_match_table = ep8248e_mdio_match, }, .probe = ep8248e_mdio_probe, diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 463fa91ee5b6..15e8021ddef9 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -167,10 +167,10 @@ static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id) if (ret) goto err; - /* XXX: this is potentially racy, but there is no lock for ppc_md */ - if (!ppc_md.power_off) { + /* XXX: this is potentially racy, but there is no lock for pm_power_off */ + if (!pm_power_off) { glob_mcu = mcu; - ppc_md.power_off = mcu_power_off; + pm_power_off = mcu_power_off; dev_info(&client->dev, "will provide power-off service\n"); } @@ -197,7 +197,7 @@ static int mcu_remove(struct i2c_client *client) device_remove_file(&client->dev, &dev_attr_status); if (glob_mcu == mcu) { - ppc_md.power_off = NULL; + pm_power_off = NULL; glob_mcu = NULL; } diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index eeb80e25214d..c9adbfb65006 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -435,7 +435,6 @@ static const struct of_device_id pmc_match[] = { static struct platform_driver pmc_driver = { .driver = { .name = "mpc83xx-pmc", - .owner = THIS_MODULE, .of_match_table = pmc_match, }, .probe = pmc_probe, diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index e56b89a792ed..1f309ccb096e 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -170,7 +170,7 @@ static int __init corenet_generic_probe(void) ppc_md.get_irq = ehv_pic_get_irq; ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; + pm_power_off = fsl_hv_halt; ppc_md.halt = fsl_hv_halt; #ifdef CONFIG_SMP /* diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index 8162b0412117..79fd0dfd4b82 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -120,7 +120,7 @@ static int gpio_halt_probe(struct platform_device *pdev) /* Register our halt function */ ppc_md.halt = gpio_halt_cb; - ppc_md.power_off = gpio_halt_cb; + pm_power_off = gpio_halt_cb; printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d" " irq).\n", gpio, trigger, irq); @@ -137,7 +137,7 @@ static int gpio_halt_remove(struct platform_device *pdev) free_irq(irq, halt_node); ppc_md.halt = NULL; - ppc_md.power_off = NULL; + pm_power_off = NULL; gpio_free(gpio); @@ -161,7 +161,6 @@ MODULE_DEVICE_TABLE(of, gpio_halt_match); static struct platform_driver gpio_halt_driver = { .driver = { .name = "gpio-halt", - .owner = THIS_MODULE, .of_match_table = gpio_halt_match, }, .probe = gpio_halt_probe, diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index bd6f1a1cf922..157250426b56 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -1,6 +1,3 @@ -config FADS - bool - config CPM1 bool select CPM @@ -13,7 +10,6 @@ choice config MPC8XXFADS bool "FADS" - select FADS config MPC86XADS bool "MPC86XADS" diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 862b32702d29..623bd961465a 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -279,7 +279,7 @@ static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) irq_set_msi_desc(virq, entry); msg.data = virq; - write_msi_msg(virq, &msg); + pci_write_msi_msg(virq, &msg); } return 0; @@ -301,9 +301,9 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) } static struct irq_chip msic_irq_chip = { - .irq_mask = mask_msi_irq, - .irq_unmask = unmask_msi_irq, - .irq_shutdown = mask_msi_irq, + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, + .irq_shutdown = pci_msi_mask_irq, .name = "AXON-MSI", }; @@ -437,7 +437,6 @@ static struct platform_driver axon_msi_driver = { .shutdown = axon_msi_shutdown, .driver = { .name = "axon-msi", - .owner = THIS_MODULE, .of_match_table = axon_msi_device_id, }, }; diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index d4d245c0d787..bee9232fe619 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -186,7 +186,7 @@ static long beat_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; u64 dummy0, dummy1; @@ -369,7 +369,7 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; unsigned long want_v; diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c index 2b98a36ef8fb..3ce70ded2d6a 100644 --- a/arch/powerpc/platforms/cell/celleb_pci.c +++ b/arch/powerpc/platforms/cell/celleb_pci.c @@ -29,7 +29,7 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/pci_regs.h> #include <linux/of.h> #include <linux/of_device.h> @@ -401,11 +401,11 @@ error: } else { if (config && *config) { size = 256; - free_bootmem(__pa(*config), size); + memblock_free(__pa(*config), size); } if (res && *res) { size = sizeof(struct celleb_pci_resource); - free_bootmem(__pa(*res), size); + memblock_free(__pa(*res), size); } } diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c index 844c0facb4f7..9438bbed402f 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_epci.c +++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c @@ -25,7 +25,6 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/pci_regs.h> -#include <linux/bootmem.h> #include <asm/io.h> #include <asm/irq.h> diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c index 4278acfa2ede..f22387598040 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -25,7 +25,6 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <linux/delay.h> #include <linux/interrupt.h> diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index 34e8ce2976aa..90be8ec51686 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -142,6 +142,7 @@ static int __init celleb_probe_beat(void) powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS | FW_FEATURE_BEAT | FW_FEATURE_LPAR; hpte_init_beat_v3(); + pm_power_off = beat_power_off; return 1; } @@ -190,6 +191,7 @@ static int __init celleb_probe_native(void) powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -204,7 +206,6 @@ define_machine(celleb_beat) { .setup_arch = celleb_setup_arch_beat, .show_cpuinfo = celleb_show_cpuinfo, .restart = beat_restart, - .power_off = beat_power_off, .halt = beat_halt, .get_rtc_time = beat_get_rtc_time, .set_rtc_time = beat_set_rtc_time, @@ -230,7 +231,6 @@ define_machine(celleb_native) { .setup_arch = celleb_setup_arch_native, .show_cpuinfo = celleb_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 8a106b4172e0..4c11421847be 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -82,7 +82,7 @@ static void iic_unmask(struct irq_data *d) static void iic_eoi(struct irq_data *d) { - struct iic *iic = &__get_cpu_var(cpu_iic); + struct iic *iic = this_cpu_ptr(&cpu_iic); out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); BUG_ON(iic->eoi_ptr < 0); } @@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void) struct iic *iic; unsigned int virq; - iic = &__get_cpu_var(cpu_iic); + iic = this_cpu_ptr(&cpu_iic); *(unsigned long *) &pending = in_be64((u64 __iomem *) &iic->regs->pending_destr); if (!(pending.flags & CBE_IIC_IRQ_VALID)) @@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void) void iic_setup_cpu(void) { - out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff); + out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff); } u8 iic_get_target_id(int cpu) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 2b90ff8a93be..c7c8720aa39f 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -621,8 +621,9 @@ static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg, if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs); else - return iommu_map_sg(dev, cell_get_iommu_table(dev), sg, nents, - device_to_mask(dev), direction, attrs); + return ppc_iommu_map_sg(dev, cell_get_iommu_table(dev), sg, + nents, device_to_mask(dev), + direction, attrs); } static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg, @@ -632,8 +633,8 @@ static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg, if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)) dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs); else - iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, direction, - attrs); + ppc_iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, + direction, attrs); } static int dma_fixed_dma_supported(struct device *dev, u64 mask) diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c index 6e3409d590ac..d328140dc6f5 100644 --- a/arch/powerpc/platforms/cell/qpace_setup.c +++ b/arch/powerpc/platforms/cell/qpace_setup.c @@ -127,6 +127,7 @@ static int __init qpace_probe(void) return 0; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -137,7 +138,6 @@ define_machine(qpace) { .setup_arch = qpace_setup_arch, .show_cpuinfo = qpace_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 6ae25fb62015..d62aa982d530 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -259,6 +259,7 @@ static int __init cell_probe(void) return 0; hpte_init_native(); + pm_power_off = rtas_power_off; return 1; } @@ -269,7 +270,6 @@ define_machine(cell) { .setup_arch = cell_setup_arch, .show_cpuinfo = cell_show_cpuinfo, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index ffcbd242e669..f7af74f83693 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -181,7 +181,8 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) return 0; } -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX +extern int hash_page(unsigned long ea, unsigned long access, + unsigned long trap, unsigned long dsisr); //XXX static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) { int ret; @@ -196,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT, 0x300); + ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e45894a08118..d98f845ac777 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -144,7 +144,7 @@ int spufs_handle_class1(struct spu_context *ctx) access = (_PAGE_PRESENT | _PAGE_USER); access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; local_irq_save(flags); - ret = hash_page(ea, access, 0x300); + ret = hash_page(ea, access, 0x300, dsisr); local_irq_restore(flags); /* hashing failed, so try the actual fault handler */ diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 87ba7cf99cd7..1a3429e1ccb5 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -164,7 +164,7 @@ static void spufs_prune_dir(struct dentry *dir) struct dentry *dentry, *tmp; mutex_lock(&dir->d_inode->i_mutex); - list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { dget_dlock(dentry); @@ -301,7 +301,7 @@ static int spufs_context_open(struct path *path) int ret; struct file *filp; - ret = get_unused_fd(); + ret = get_unused_fd_flags(0); if (ret < 0) return ret; @@ -518,7 +518,7 @@ static int spufs_gang_open(struct path *path) int ret; struct file *filp; - ret = get_unused_fd(); + ret = get_unused_fd_flags(0); if (ret < 0) return ret; diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 5b77b1919fd2..860a59eb8ea2 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -585,6 +585,8 @@ static int __init chrp_probe(void) DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; + pm_power_off = rtas_power_off; + return 1; } @@ -597,7 +599,6 @@ define_machine(chrp) { .show_cpuinfo = chrp_show_cpuinfo, .init_IRQ = chrp_init_IRQ, .restart = rtas_restart, - .power_off = rtas_power_off, .halt = rtas_halt, .time_init = chrp_time_init, .set_rtc_time = chrp_set_rtc_time, diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index bd4ba5d7d568..fe0ed6ee285e 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -67,6 +67,8 @@ static int __init gamecube_probe(void) if (!of_flat_dt_is_compatible(dt_root, "nintendo,gamecube")) return 0; + pm_power_off = gamecube_power_off; + return 1; } @@ -80,7 +82,6 @@ define_machine(gamecube) { .probe = gamecube_probe, .init_early = gamecube_init_early, .restart = gamecube_restart, - .power_off = gamecube_power_off, .halt = gamecube_halt, .init_IRQ = flipper_pic_probe, .get_irq = flipper_pic_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 168e1d80b2e5..540eeb58d3f0 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -147,6 +147,9 @@ static int __init linkstation_probe(void) if (!of_flat_dt_is_compatible(root, "linkstation")) return 0; + + pm_power_off = linkstation_power_off; + return 1; } @@ -158,7 +161,6 @@ define_machine(linkstation){ .show_cpuinfo = linkstation_show_cpuinfo, .get_irq = mpic_get_irq, .restart = linkstation_restart, - .power_off = linkstation_power_off, .halt = linkstation_halt, .calibrate_decr = generic_calibrate_decr, }; diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c index 20a8ed91962e..7feb325b636b 100644 --- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -247,7 +247,7 @@ void __init ug_udbg_init(void) np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-exi"); if (!np) { udbg_printf("%s: EXI node not found\n", __func__); - goto done; + goto out; } exi_io_base = ug_udbg_setup_exi_io_base(np); @@ -267,8 +267,8 @@ void __init ug_udbg_init(void) } done: - if (np) - of_node_put(np); + of_node_put(np); +out: return; } diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 388e29bab8f6..352592d3e44e 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -211,6 +211,8 @@ static int __init wii_probe(void) if (!of_flat_dt_is_compatible(dt_root, "nintendo,wii")) return 0; + pm_power_off = wii_power_off; + return 1; } @@ -226,7 +228,6 @@ define_machine(wii) { .init_early = wii_init_early, .setup_arch = wii_setup_arch, .restart = wii_restart, - .power_off = wii_power_off, .halt = wii_halt, .init_IRQ = wii_pic_probe, .get_irq = flipper_pic_get_irq, diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index f7136aae8bbf..d3a13067ec42 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -15,7 +15,6 @@ #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <linux/irq.h> #include <asm/sections.h> diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index cb1b0b35a0c6..56b85cd61aaf 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -169,7 +169,7 @@ static void __init maple_use_rtas_reboot_and_halt_if_present(void) if (rtas_service_present("system-reboot") && rtas_service_present("power-off")) { ppc_md.restart = rtas_restart; - ppc_md.power_off = rtas_power_off; + pm_power_off = rtas_power_off; ppc_md.halt = rtas_halt; } } @@ -312,6 +312,7 @@ static int __init maple_probe(void) alloc_dart_table(); hpte_init_native(); + pm_power_off = maple_power_off; return 1; } @@ -325,7 +326,6 @@ define_machine(maple) { .pci_irq_fixup = maple_pci_irq_fixup, .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, .restart = maple_restart, - .power_off = maple_power_off, .halt = maple_halt, .get_boot_time = maple_get_boot_time, .set_rtc_time = maple_set_rtc_time, diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index ada33358950d..ae3f47b25b18 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -305,7 +305,6 @@ static struct platform_driver gpio_mdio_driver = .remove = gpio_mdio_remove, .driver = { .name = "gpio-mdio-bitbang", - .owner = THIS_MODULE, .of_match_table = gpio_mdio_match, }, }; diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 014d06e6d46b..60b03a1703d1 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -513,11 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr) printk(KERN_ERR "nvram: no address\n"); return -EINVAL; } - nvram_image = alloc_bootmem(NVRAM_SIZE); - if (nvram_image == NULL) { - printk(KERN_ERR "nvram: can't allocate ram image\n"); - return -ENOMEM; - } + nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0); nvram_data = ioremap(addr, NVRAM_SIZE*2); nvram_naddrs = 1; /* Make sure we get the correct case */ diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 7e868ccf3b0d..04702db35d45 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -15,7 +15,6 @@ #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <linux/irq.h> #include <linux/of_pci.h> diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index b127a29ac526..713d36d45d1d 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -632,6 +632,8 @@ static int __init pmac_probe(void) smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL); #endif /* CONFIG_PMAC_SMU */ + pm_power_off = pmac_power_off; + return 1; } @@ -663,7 +665,6 @@ define_machine(powermac) { .get_irq = NULL, /* changed later */ .pci_irq_fixup = pmac_pci_irq_fixup, .restart = pmac_restart, - .power_off = pmac_power_off, .halt = pmac_halt, .time_init = pmac_time_init, .get_boot_time = pmac_get_boot_time, diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index eba9cb10619c..2809c9895288 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include <linux/bootmem.h> #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/io.h> @@ -354,6 +353,9 @@ static int ioda_eeh_get_phb_state(struct eeh_pe *pe) } else if (!(pe->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); } return result; @@ -373,7 +375,7 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) * moving forward, we have to return operational * state during PE reset. */ - if (pe->state & EEH_PE_CFG_BLOCKED) { + if (pe->state & EEH_PE_RESET) { result = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE | EEH_STATE_MMIO_ENABLED | @@ -452,6 +454,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) eeh_pe_state_mark(pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); } return result; @@ -731,7 +736,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) { - pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); return 0; } @@ -1087,6 +1093,10 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) !((*pe)->state & EEH_PE_ISOLATED)) { eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); ioda_eeh_phb_diag(*pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data((*pe)->phb, + (*pe)->data); } /* diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index e462ab947d16..693b6cdac691 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -71,6 +71,7 @@ int opal_async_get_token_interruptible(void) return token; } +EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible); int __opal_async_release_token(int token) { @@ -102,6 +103,7 @@ int opal_async_release_token(int token) return 0; } +EXPORT_SYMBOL_GPL(opal_async_release_token); int opal_async_wait_response(uint64_t token, struct opal_msg *msg) { @@ -120,6 +122,7 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg) return 0; } +EXPORT_SYMBOL_GPL(opal_async_wait_response); static int opal_async_comp_event(struct notifier_block *nb, unsigned long msg_type, void *msg) diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 5e1ed1575aab..b322bfb51343 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -57,7 +57,7 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) }; /* Print things out */ - if (hmi_evt->version != OpalHMIEvt_V1) { + if (hmi_evt->version < OpalHMIEvt_V1) { pr_err("HMI Interrupt, Unknown event version %d !\n", hmi_evt->version); return; diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index ad4b31df779a..e4169d68cb32 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -216,14 +216,54 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, &data, len); if (rc) return -ENXIO; + + /* + * Now there is some trickery with the data returned by OPAL + * as it's the desired data right justified in a 32-bit BE + * word. + * + * This is a very bad interface and I'm to blame for it :-( + * + * So we can't just apply a 32-bit swap to what comes from OPAL, + * because user space expects the *bytes* to be in their proper + * respective positions (ie, LPC position). + * + * So what we really want to do here is to shift data right + * appropriately on a LE kernel. + * + * IE. If the LPC transaction has bytes B0, B1, B2 and B3 in that + * order, we have in memory written to by OPAL at the "data" + * pointer: + * + * Bytes: OPAL "data" LE "data" + * 32-bit: B0 B1 B2 B3 B0B1B2B3 B3B2B1B0 + * 16-bit: B0 B1 0000B0B1 B1B00000 + * 8-bit: B0 000000B0 B0000000 + * + * So a BE kernel will have the leftmost of the above in the MSB + * and rightmost in the LSB and can just then "cast" the u32 "data" + * down to the appropriate quantity and write it. + * + * However, an LE kernel can't. It doesn't need to swap because a + * load from data followed by a store to user are going to preserve + * the byte ordering which is the wire byte order which is what the + * user wants, but in order to "crop" to the right size, we need to + * shift right first. + */ switch(len) { case 4: rc = __put_user((u32)data, (u32 __user *)ubuf); break; case 2: +#ifdef __LITTLE_ENDIAN__ + data >>= 16; +#endif rc = __put_user((u16)data, (u16 __user *)ubuf); break; default: +#ifdef __LITTLE_ENDIAN__ + data >>= 24; +#endif rc = __put_user((u8)data, (u8 __user *)ubuf); break; } @@ -263,12 +303,31 @@ static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf, else if (todo > 1 && (pos & 1) == 0) len = 2; } + + /* + * Similarly to the read case, we have some trickery here but + * it's different to handle. We need to pass the value to OPAL in + * a register whose layout depends on the access size. We want + * to reproduce the memory layout of the user, however we aren't + * doing a load from user and a store to another memory location + * which would achieve that. Here we pass the value to OPAL via + * a register which is expected to contain the "BE" interpretation + * of the byte sequence. IE: for a 32-bit access, byte 0 should be + * in the MSB. So here we *do* need to byteswap on LE. + * + * User bytes: LE "data" OPAL "data" + * 32-bit: B0 B1 B2 B3 B3B2B1B0 B0B1B2B3 + * 16-bit: B0 B1 0000B1B0 0000B0B1 + * 8-bit: B0 000000B0 000000B0 + */ switch(len) { case 4: rc = __get_user(data, (u32 __user *)ubuf); + data = cpu_to_be32(data); break; case 2: rc = __get_user(data, (u16 __user *)ubuf); + data = cpu_to_be16(data); break; default: rc = __get_user(data, (u8 __user *)ubuf); diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 499707ddaa9c..37dbee15769f 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -15,6 +15,8 @@ #include <linux/bcd.h> #include <linux/rtc.h> #include <linux/delay.h> +#include <linux/platform_device.h> +#include <linux/of_platform.h> #include <asm/opal.h> #include <asm/firmware.h> @@ -43,7 +45,7 @@ unsigned long __init opal_get_boot_time(void) long rc = OPAL_BUSY; if (!opal_check_token(OPAL_RTC_READ)) - goto out; + return 0; while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); @@ -53,62 +55,33 @@ unsigned long __init opal_get_boot_time(void) mdelay(10); } if (rc != OPAL_SUCCESS) - goto out; + return 0; y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); -out: - ppc_md.get_rtc_time = NULL; - ppc_md.set_rtc_time = NULL; - return 0; } -void opal_get_rtc_time(struct rtc_time *tm) +static __init int opal_time_init(void) { - long rc = OPAL_BUSY; - u32 y_m_d; - u64 h_m_s_ms; - __be32 __y_m_d; - __be64 __h_m_s_ms; + struct platform_device *pdev; + struct device_node *rtc; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); + rtc = of_find_node_by_path("/ibm,opal/rtc"); + if (rtc) { + pdev = of_platform_device_create(rtc, "opal-rtc", NULL); + of_node_put(rtc); + } else { + if (opal_check_token(OPAL_RTC_READ) || + opal_check_token(OPAL_READ_TPO)) + pdev = platform_device_register_simple("opal-rtc", -1, + NULL, 0); else - mdelay(10); + return -ENODEV; } - if (rc != OPAL_SUCCESS) - return; - y_m_d = be32_to_cpu(__y_m_d); - h_m_s_ms = be64_to_cpu(__h_m_s_ms); - opal_to_tm(y_m_d, h_m_s_ms, tm); -} - -int opal_set_rtc_time(struct rtc_time *tm) -{ - long rc = OPAL_BUSY; - u32 y_m_d = 0; - u64 h_m_s_ms = 0; - - y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24; - y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16; - y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8; - y_m_d |= ((u32)bin2bcd(tm->tm_mday)); - - h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56; - h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48; - h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40; - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_write(y_m_d, h_m_s_ms); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); - else - mdelay(10); - } - return rc == OPAL_SUCCESS ? 0 : -EIO; + return PTR_ERR_OR_ZERO(pdev); } +machine_subsys_initcall(powernv, opal_time_init); diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index 10271ad1fac4..4ab67ef7abc9 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -20,7 +20,9 @@ #include <linux/delay.h> #include <linux/mutex.h> +#include <linux/of_platform.h> #include <asm/opal.h> +#include <asm/machdep.h> static DEFINE_MUTEX(opal_sensor_mutex); @@ -64,3 +66,21 @@ out: return ret; } EXPORT_SYMBOL_GPL(opal_get_sensor_data); + +static __init int opal_sensor_init(void) +{ + struct platform_device *pdev; + struct device_node *sensor; + + sensor = of_find_node_by_path("/ibm,opal/sensors"); + if (!sensor) { + pr_err("Opal node 'sensors' not found\n"); + return -ENODEV; + } + + pdev = of_platform_device_create(sensor, "opal-sensor", NULL); + of_node_put(sensor); + + return PTR_ERR_OR_ZERO(pdev); +} +machine_subsys_initcall(powernv, opal_sensor_init); diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index ae14c40b4b1c..e11273b2386d 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; @@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index feb549aa3eea..54eca8b3b288 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -18,7 +18,7 @@ .section ".text" #ifdef CONFIG_TRACEPOINTS -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL #define OPAL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) #else @@ -158,6 +158,43 @@ opal_tracepoint_return: blr #endif +/* + * Make opal call in realmode. This is a generic function to be called + * from realmode. It handles endianness. + * + * r13 - paca pointer + * r1 - stack pointer + * r0 - opal token + */ +_GLOBAL(opal_call_realmode) + mflr r12 + std r12,PPC_LR_STKOFF(r1) + ld r2,PACATOC(r13) + /* Set opal return address */ + LOAD_REG_ADDR(r12,return_from_opal_call) + mtlr r12 + + mfmsr r12 +#ifdef __LITTLE_ENDIAN__ + /* Handle endian-ness */ + li r11,MSR_LE + andc r12,r12,r11 +#endif + mtspr SPRN_HSRR1,r12 + LOAD_REG_ADDR(r11,opal) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +return_from_opal_call: +#ifdef __LITTLE_ENDIAN__ + FIXUP_ENDIAN +#endif + ld r12,PPC_LR_STKOFF(r1) + mtlr r12 + blr + OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); @@ -247,6 +284,12 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE); +OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); +OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); +OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); +OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); +OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d019b081df9d..f10b9ec8c1f5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -9,8 +9,9 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG +#define pr_fmt(fmt) "opal: " fmt +#include <linux/printk.h> #include <linux/types.h> #include <linux/of.h> #include <linux/of_fdt.h> @@ -50,7 +51,6 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; static unsigned int opal_irq_count; static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); @@ -626,6 +626,39 @@ static int opal_sysfs_init(void) return 0; } +static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + return memory_read_from_buffer(buf, count, &off, bin_attr->private, + bin_attr->size); +} + +static BIN_ATTR_RO(symbol_map, 0); + +static void opal_export_symmap(void) +{ + const __be64 *syms; + unsigned int size; + struct device_node *fw; + int rc; + + fw = of_find_node_by_path("/ibm,opal/firmware"); + if (!fw) + return; + syms = of_get_property(fw, "symbol-map", &size); + if (!syms || size != 2 * sizeof(__be64)) + return; + + /* Setup attributes */ + bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0])); + bin_attr_symbol_map.size = be64_to_cpu(syms[1]); + + rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map); + if (rc) + pr_warn("Error %d creating OPAL symbols file\n", rc); +} + static void __init opal_dump_region_init(void) { void *addr; @@ -644,6 +677,24 @@ static void __init opal_dump_region_init(void) pr_warn("DUMP: Failed to register kernel log buffer. " "rc = %d\n", rc); } + +static void opal_ipmi_init(struct device_node *opal_node) +{ + struct device_node *np; + + for_each_child_of_node(opal_node, np) + if (of_device_is_compatible(np, "ibm,opal-ipmi")) + of_platform_device_create(np, NULL, NULL); +} + +static void opal_i2c_create_devs(void) +{ + struct device_node *np; + + for_each_compatible_node(np, NULL, "ibm,opal-i2c") + of_platform_device_create(np, NULL, NULL); +} + static int __init opal_init(void) { struct device_node *np, *consoles; @@ -670,6 +721,9 @@ static int __init opal_init(void) of_node_put(consoles); } + /* Create i2c platform devices */ + opal_i2c_create_devs(); + /* Find all OPAL interrupts and request them */ irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); pr_debug("opal: Found %d interrupts reserved for OPAL\n", @@ -693,6 +747,8 @@ static int __init opal_init(void) /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { + /* Export symbol map to userspace */ + opal_export_symmap(); /* Setup dump region interface */ opal_dump_region_init(); /* Setup error log interface */ @@ -707,6 +763,8 @@ static int __init opal_init(void) opal_msglog_init(); } + opal_ipmi_init(opal_node); + return 0; } machine_subsys_initcall(powernv, opal_init); @@ -742,6 +800,8 @@ void opal_shutdown(void) /* Export this so that test modules can use it */ EXPORT_SYMBOL_GPL(opal_invalid_call); +EXPORT_SYMBOL_GPL(opal_ipmi_send); +EXPORT_SYMBOL_GPL(opal_ipmi_recv); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, @@ -805,3 +865,10 @@ void opal_free_sg_list(struct opal_sg_list *sg) sg = NULL; } } + +EXPORT_SYMBOL_GPL(opal_poll_events); +EXPORT_SYMBOL_GPL(opal_rtc_read); +EXPORT_SYMBOL_GPL(opal_rtc_write); +EXPORT_SYMBOL_GPL(opal_tpo_read); +EXPORT_SYMBOL_GPL(opal_tpo_write); +EXPORT_SYMBOL_GPL(opal_i2c_request); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 468a0f23c7f2..fac88ed8a915 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -91,6 +91,24 @@ static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); } +static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) +{ + if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe)) { + pr_warn("%s: Invalid PE %d on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) { + pr_warn("%s: PE %d was assigned on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + phb->ioda.pe_array[pe_no].phb = phb; + phb->ioda.pe_array[pe_no].pe_number = pe_no; +} + static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -172,7 +190,7 @@ fail: return -EIO; } -static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) +static void pnv_ioda2_reserve_m64_pe(struct pnv_phb *phb) { resource_size_t sgsz = phb->ioda.m64_segsize; struct pci_dev *pdev; @@ -185,16 +203,15 @@ static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb) * instead of root bus. */ list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) { - for (i = PCI_BRIDGE_RESOURCES; - i <= PCI_BRIDGE_RESOURCE_END; i++) { - r = &pdev->resource[i]; + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { + r = &pdev->resource[PCI_BRIDGE_RESOURCES + i]; if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) continue; base = (r->start - phb->ioda.m64_base) / sgsz; for (step = 0; step < resource_size(r) / sgsz; step++) - set_bit(base + step, phb->ioda.pe_alloc); + pnv_ioda_reserve_pe(phb, base + step); } } } @@ -287,8 +304,6 @@ done: while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) < phb->ioda.total_pe) { pe = &phb->ioda.pe_array[i]; - pe->phb = phb; - pe->pe_number = i; if (!master_pe) { pe->flags |= PNV_IODA_PE_MASTER; @@ -313,6 +328,12 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) const u32 *r; u64 pci_addr; + /* FIXME: Support M64 for P7IOC */ + if (phb->type != PNV_PHB_IODA2) { + pr_info(" Not support M64 window\n"); + return; + } + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { pr_info(" Firmware too old to support M64 window\n"); return; @@ -325,12 +346,6 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) return; } - /* FIXME: Support M64 for P7IOC */ - if (phb->type != PNV_PHB_IODA2) { - pr_info(" Not support M64 window\n"); - return; - } - res = &hose->mem_resources[1]; res->start = of_translate_address(dn, r + 2); res->end = res->start + of_read_number(r + 4, 2) - 1; @@ -345,7 +360,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) /* Use last M64 BAR to cover M64 window */ phb->ioda.m64_bar_idx = 15; phb->init_m64 = pnv_ioda2_init_m64; - phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe; + phb->reserve_m64_pe = pnv_ioda2_reserve_m64_pe; phb->pick_m64_pe = pnv_ioda2_pick_m64_pe; } @@ -358,7 +373,9 @@ static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) /* Fetch master PE */ if (pe->flags & PNV_IODA_PE_SLAVE) { pe = pe->master; - WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER))) + return; + pe_no = pe->pe_number; } @@ -507,6 +524,106 @@ static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) } #endif /* CONFIG_PCI_MSI */ +static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *parent, + struct pnv_ioda_pe *child, + bool is_add) +{ + const char *desc = is_add ? "adding" : "removing"; + uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN : + OPAL_REMOVE_PE_FROM_DOMAIN; + struct pnv_ioda_pe *slave; + long rc; + + /* Parent PE affects child PE */ + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + child->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(child, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + + if (!(child->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Compound case: parent PE affects slave PEs */ + list_for_each_entry(slave, &child->slaves, list) { + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + slave->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(slave, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + } + + return 0; +} + +static int pnv_ioda_set_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + bool is_add) +{ + struct pnv_ioda_pe *slave; + struct pci_dev *pdev; + int ret; + + /* + * Clear PE frozen state. If it's master PE, we need + * clear slave PE frozen state as well. + */ + if (is_add) { + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) + opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + } + } + + /* + * Associate PE in PELT. We need add the PE into the + * corresponding PELT-V as well. Otherwise, the error + * originated from the PE might contribute to other + * PEs. + */ + ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add); + if (ret) + return ret; + + /* For compound PEs, any one affects all of them */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) { + ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add); + if (ret) + return ret; + } + } + + if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) + pdev = pe->pbus->self; + else + pdev = pe->pdev->bus->self; + while (pdev) { + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *parent; + + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + parent = &phb->ioda.pe_array[pdn->pe_number]; + ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add); + if (ret) + return ret; + } + + pdev = pdev->bus->self; + } + + return 0; +} + static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -561,48 +678,36 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return -ENXIO; } - rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, - pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); - if (rc) - pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); - opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + /* Configure PELTV */ + pnv_ioda_set_peltv(phb, pe, true); - /* Add to all parents PELT-V */ - while (parent) { - struct pci_dn *pdn = pci_get_pdn(parent); - if (pdn && pdn->pe_number != IODA_INVALID_PE) { - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, - pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); - /* XXX What to do in case of error ? */ - } - parent = parent->bus->self; - } /* Setup reverse map */ for (rid = pe->rid; rid < rid_end; rid++) phb->ioda.pe_rmap[rid] = pe->pe_number; /* Setup one MVTs on IODA1 */ - if (phb->type == PNV_PHB_IODA1) { - pe->mve_number = pe->pe_number; - rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, - pe->pe_number); + if (phb->type != PNV_PHB_IODA1) { + pe->mve_number = 0; + goto out; + } + + pe->mve_number = pe->pe_number; + rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number); + if (rc != OPAL_SUCCESS) { + pe_err(pe, "OPAL error %ld setting up MVE %d\n", + rc, pe->mve_number); + pe->mve_number = -1; + } else { + rc = opal_pci_set_mve_enable(phb->opal_id, + pe->mve_number, OPAL_ENABLE_MVE); if (rc) { - pe_err(pe, "OPAL error %ld setting up MVE %d\n", + pe_err(pe, "OPAL error %ld enabling MVE %d\n", rc, pe->mve_number); pe->mve_number = -1; - } else { - rc = opal_pci_set_mve_enable(phb->opal_id, - pe->mve_number, OPAL_ENABLE_MVE); - if (rc) { - pe_err(pe, "OPAL error %ld enabling MVE %d\n", - rc, pe->mve_number); - pe->mve_number = -1; - } } - } else if (phb->type == PNV_PHB_IODA2) - pe->mve_number = 0; + } +out: return 0; } @@ -837,8 +942,8 @@ static void pnv_pci_ioda_setup_PEs(void) phb = hose->private_data; /* M64 layout might affect PE allocation */ - if (phb->alloc_m64_pe) - phb->alloc_m64_pe(phb); + if (phb->reserve_m64_pe) + phb->reserve_m64_pe(phb); pnv_ioda_setup_PEs(hose->bus); } @@ -1509,7 +1614,6 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); - struct pci_dn *pdn = pci_get_pdn(dev); unsigned int xive_num = hwirq - phb->msi_base; __be32 data; int rc; @@ -1523,7 +1627,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, return -ENXIO; /* Force 32-bit MSI on some broken devices */ - if (pdn && pdn->force_32bit_msi) + if (dev->no_64bit_msi) is_64 = 0; /* Assign XIVE to PE */ @@ -1835,19 +1939,14 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb_id = be64_to_cpup(prop64); pr_debug(" PHB-ID : 0x%016llx\n", phb_id); - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (!phb) { - pr_err(" Out of memory !\n"); - return; - } + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); /* Allocate PCI controller */ - memset(phb, 0, sizeof(struct pnv_phb)); phb->hose = hose = pcibios_alloc_controller(np); if (!phb->hose) { pr_err(" Can't allocate PCI controller for %s\n", np->full_name); - free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); + memblock_free(__pa(phb), sizeof(struct pnv_phb)); return; } @@ -1914,8 +2013,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, } pemap_off = size; size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); - aux = alloc_bootmem(size); - memset(aux, 0, size); + aux = memblock_virt_alloc(size, 0); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; if (phb->type == PNV_PHB_IODA1) @@ -1997,11 +2095,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (is_kdump_kernel()) { pr_info(" Issue PHB reset ...\n"); ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); - ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET); + ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); } - /* Configure M64 window */ - if (phb->init_m64 && phb->init_m64(phb)) + /* Remove M64 resource if we can't configure it successfully */ + if (!phb->init_m64 || phb->init_m64(phb)) hose->mem_resources[1].flags = 0; } diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 94ce3481490b..6ef6d4d8e7e2 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -122,12 +122,9 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, return; } - phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (phb) { - memset(phb, 0, sizeof(struct pnv_phb)); - phb->hose = pcibios_alloc_controller(np); - } - if (!phb || !phb->hose) { + phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); + phb->hose = pcibios_alloc_controller(np); + if (!phb->hose) { pr_err(" Failed to allocate PCI controller\n"); return; } @@ -196,16 +193,27 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) hub_id = be64_to_cpup(prop64); pr_info(" HUB-ID : 0x%016llx\n", hub_id); + /* Count child PHBs and calculate TCE space per PHB */ + for_each_child_of_node(np, phbn) { + if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || + of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) + phb_count++; + } + + if (phb_count <= 0) { + pr_info(" No PHBs for Hub %s\n", np->full_name); + return; + } + + tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); + pr_info(" Allocating %lld MB of TCE memory per PHB\n", + tce_per_phb >> 20); + /* Currently allocate 16M of TCE memory for every Hub * * XXX TODO: Make it chip local if possible */ - tce_mem = __alloc_bootmem(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY, - __pa(MAX_DMA_ADDRESS)); - if (!tce_mem) { - pr_err(" Failed to allocate TCE Memory !\n"); - return; - } + tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY); pr_debug(" TCE : 0x%016lx..0x%016lx\n", __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1); rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem), @@ -215,18 +223,6 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) return; } - /* Count child PHBs */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) - phb_count++; - } - - /* Calculate how much TCE space we can give per PHB */ - tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); - pr_info(" Allocating %lld MB of TCE memory per PHB\n", - tce_per_phb >> 20); - /* Initialize PHBs */ for_each_child_of_node(np, phbn) { if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b2187d0068b8..4945e87f12dc 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -16,7 +16,6 @@ #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <linux/irq.h> #include <linux/io.h> #include <linux/msi.h> @@ -50,7 +49,6 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); struct msi_desc *entry; struct msi_msg msg; int hwirq; @@ -60,7 +58,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) return -ENODEV; - if (pdn && pdn->force_32bit_msi && !phb->msi32_support) + if (pdev->no_64bit_msi && !phb->msi32_support) return -ENODEV; list_for_each_entry(entry, &pdev->msi_list, list) { @@ -91,7 +89,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return rc; } irq_set_msi_desc(virq, entry); - write_msi_msg(virq, &msg); + pci_write_msi_msg(virq, &msg); } return 0; } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 34d29eb2a4de..6c02ff8dd69f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -130,7 +130,7 @@ struct pnv_phb { u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); void (*shutdown)(struct pnv_phb *phb); int (*init_m64)(struct pnv_phb *phb); - void (*alloc_m64_pe)(struct pnv_phb *phb); + void (*reserve_m64_pe)(struct pnv_phb *phb); int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 6c8e2d188cd0..604c48e7879a 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) } #endif +extern u32 pnv_get_supported_cpuidle_states(void); + extern void pnv_lpc_init(void); bool cpu_core_split_required(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 3f9546d8a51f..b700a329c31d 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -36,8 +36,12 @@ #include <asm/opal.h> #include <asm/kexec.h> #include <asm/smp.h> +#include <asm/cputhreads.h> +#include <asm/cpuidle.h> +#include <asm/code-patching.h> #include "powernv.h" +#include "subcore.h" static void __init pnv_setup_arch(void) { @@ -265,10 +269,8 @@ static unsigned long pnv_memory_block_size(void) static void __init pnv_setup_machdep_opal(void) { ppc_md.get_boot_time = opal_get_boot_time; - ppc_md.get_rtc_time = opal_get_rtc_time; - ppc_md.set_rtc_time = opal_set_rtc_time; ppc_md.restart = pnv_restart; - ppc_md.power_off = pnv_power_off; + pm_power_off = pnv_power_off; ppc_md.halt = pnv_halt; ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; @@ -285,11 +287,173 @@ static void __init pnv_setup_machdep_rtas(void) ppc_md.set_rtc_time = rtas_set_rtc_time; } ppc_md.restart = rtas_restart; - ppc_md.power_off = rtas_power_off; + pm_power_off = rtas_power_off; ppc_md.halt = rtas_halt; } #endif /* CONFIG_PPC_POWERNV_RTAS */ +static u32 supported_cpuidle_states; + +int pnv_save_sprs_for_winkle(void) +{ + int cpu; + int rc; + + /* + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * all cpus at boot. Get these reg values of current cpu and use the + * same accross all cpus. + */ + uint64_t lpcr_val = mfspr(SPRN_LPCR); + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + uint64_t hmeer_val = mfspr(SPRN_HMEER); + + for_each_possible_cpu(cpu) { + uint64_t pir = get_hard_smp_processor_id(cpu); + uint64_t hsprg0_val = (uint64_t)&paca[cpu]; + + /* + * HSPRG0 is used to store the cpu's pointer to paca. Hence last + * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 + * with 63rd bit set, so that when a thread wakes up at 0x100 we + * can use this bit to distinguish between fastsleep and + * deep winkle. + */ + hsprg0_val |= 1; + + rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); + if (rc != 0) + return rc; + + /* HIDs are per core registers */ + if (cpu_thread_in_core(cpu) == 0) { + + rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); + if (rc != 0) + return rc; + } + } + + return 0; +} + +static void pnv_alloc_idle_core_states(void) +{ + int i, j; + int nr_cores = cpu_nr_cores(); + u32 *core_idle_state; + + /* + * core_idle_state - First 8 bits track the idle state of each thread + * of the core. The 8th bit is the lock bit. Initially all thread bits + * are set. They are cleared when the thread enters deep idle state + * like sleep and winkle. Initially the lock bit is cleared. + * The lock bit has 2 purposes + * a. While the first thread is restoring core state, it prevents + * other threads in the core from switching to process context. + * b. While the last thread in the core is saving the core state, it + * prevents a different thread from waking up. + */ + for (i = 0; i < nr_cores; i++) { + int first_cpu = i * threads_per_core; + int node = cpu_to_node(first_cpu); + + core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); + *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; + + for (j = 0; j < threads_per_core; j++) { + int cpu = first_cpu + j; + + paca[cpu].core_idle_state_ptr = core_idle_state; + paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; + paca[cpu].thread_mask = 1 << j; + } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) + pnv_save_sprs_for_winkle(); +} + +u32 pnv_get_supported_cpuidle_states(void) +{ + return supported_cpuidle_states; +} +EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + +static int __init pnv_init_idle_states(void) +{ + struct device_node *power_mgt; + int dt_idle_states; + const __be32 *idle_state_flags; + u32 len_flags, flags; + int i; + + supported_cpuidle_states = 0; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + return 0; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) + return 0; + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + return 0; + } + + idle_state_flags = of_get_property(power_mgt, + "ibm,cpu-idle-state-flags", &len_flags); + if (!idle_state_flags) { + pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n"); + return 0; + } + + dt_idle_states = len_flags / sizeof(u32); + + for (i = 0; i < dt_idle_states; i++) { + flags = be32_to_cpu(idle_state_flags[i]); + supported_cpuidle_states |= flags; + } + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + } + pnv_alloc_idle_core_states(); + return 0; +} + +subsys_initcall(pnv_init_idle_states); + static int __init pnv_probe(void) { unsigned long root = of_get_flat_dt_root(); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 4753958cd509..fc34025ef822 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -149,6 +149,8 @@ static int pnv_smp_cpu_disable(void) static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; + unsigned long srr1; + u32 idle_states; /* Standard hot unplug procedure */ local_irq_disable(); @@ -159,19 +161,41 @@ static void pnv_smp_cpu_kill_self(void) generic_set_cpu_dead(cpu); smp_wmb(); + idle_states = pnv_get_supported_cpuidle_states(); /* We don't want to take decrementer interrupts while we are offline, * so clear LPCR:PECE1. We keep PECE2 enabled. */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { + ppc64_runlatch_off(); - power7_nap(1); + + if (idle_states & OPAL_PM_WINKLE_ENABLED) + srr1 = power7_winkle(); + else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || + (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) + srr1 = power7_sleep(); + else + srr1 = power7_nap(1); + ppc64_runlatch_on(); - /* Clear the IPI that woke us up */ - icp_native_flush_interrupt(); - local_paca->irq_happened &= PACA_IRQ_HARD_DIS; - mb(); + /* + * If the SRR1 value indicates that we woke up due to + * an external interrupt, then clear the interrupt. + * We clear the interrupt before checking for the + * reason, so as to avoid a race where we wake up for + * some other reason, find nothing and clear the interrupt + * just as some other cpu is sending us an interrupt. + * If we returned from power7_nap as a result of + * having finished executing in a KVM guest, then srr1 + * contains 0. + */ + if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) { + icp_native_flush_interrupt(); + local_paca->irq_happened &= PACA_IRQ_HARD_DIS; + smp_mb(); + } if (cpu_core_split_required()) continue; @@ -185,13 +209,27 @@ static void pnv_smp_cpu_kill_self(void) #endif /* CONFIG_HOTPLUG_CPU */ +static int pnv_cpu_bootable(unsigned int nr) +{ + /* + * Starting with POWER8, the subcore logic relies on all threads of a + * core being booted so that they can participate in split mode + * switches. So on those machines we ignore the smt_enabled_at_boot + * setting (smt-enabled on the kernel command line). + */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return 1; + + return smp_generic_cpu_bootable(nr); +} + static struct smp_ops_t pnv_smp_ops = { .message_pass = smp_muxed_ipi_message_pass, .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */ .probe = xics_smp_probe, .kick_cpu = pnv_smp_kick_cpu, .setup_cpu = pnv_smp_setup_cpu, - .cpu_bootable = smp_generic_cpu_bootable, + .cpu_bootable = pnv_cpu_bootable, #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = pnv_smp_cpu_disable, .cpu_die = generic_cpu_die, diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index c87f96b79d1a..f60f80ada903 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -160,6 +160,18 @@ static void wait_for_sync_step(int step) mb(); } +static void update_hid_in_slw(u64 hid0) +{ + u64 idle_states = pnv_get_supported_cpuidle_states(); + + if (idle_states & OPAL_PM_WINKLE_ENABLED) { + /* OPAL call to patch slw with the new HID0 value */ + u64 cpu_pir = hard_smp_processor_id(); + + opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0); + } +} + static void unsplit_core(void) { u64 hid0, mask; @@ -179,6 +191,7 @@ static void unsplit_core(void) hid0 = mfspr(SPRN_HID0); hid0 &= ~HID0_POWER8_DYNLPARDIS; mtspr(SPRN_HID0, hid0); + update_hid_in_slw(hid0); while (mfspr(SPRN_HID0) & mask) cpu_relax(); @@ -215,6 +228,7 @@ static void split_core(int new_mode) hid0 = mfspr(SPRN_HID0); hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; mtspr(SPRN_HID0, hid0); + update_hid_in_slw(hid0); /* Wait for it to happen */ while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) @@ -251,6 +265,25 @@ bool cpu_core_split_required(void) return true; } +void update_subcore_sibling_mask(void) +{ + int cpu; + /* + * sibling mask for the first cpu. Left shift this by required bits + * to get sibling mask for the rest of the cpus. + */ + int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1; + + for_each_possible_cpu(cpu) { + int tid = cpu_thread_in_core(cpu); + int offset = (tid / threads_per_subcore) * threads_per_subcore; + int mask = sibling_mask_first_cpu << offset; + + paca[cpu].subcore_sibling_mask = mask; + + } +} + static int cpu_update_split_mode(void *data) { int cpu, new_mode = *(int *)data; @@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data) /* Make the new mode public */ subcores_per_core = new_mode; threads_per_subcore = threads_per_core / subcores_per_core; + update_subcore_sibling_mask(); /* Make sure the new mode is written before we exit */ mb(); diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h index 148abc91debf..84e02ae52895 100644 --- a/arch/powerpc/platforms/powernv/subcore.h +++ b/arch/powerpc/platforms/powernv/subcore.h @@ -14,5 +14,12 @@ #define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */ #ifndef __ASSEMBLY__ + +#ifdef CONFIG_SMP void split_core_secondary_loop(u8 *state); -#endif +extern void update_subcore_sibling_mask(void); +#else +static inline void update_subcore_sibling_mask(void) { }; +#endif /* CONFIG_SMP */ + +#endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 3e270e3412ae..2f95d33cf34a 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -110,7 +110,7 @@ static long ps3_hpte_remove(unsigned long hpte_group) static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { int result; u64 hpte_v, want_v, hpte_rs; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 5f3b23220b8e..a6c42f34303a 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -711,7 +711,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq) static unsigned int ps3_get_irq(void) { - struct ps3_private *pd = &__get_cpu_var(ps3_private); + struct ps3_private *pd = this_cpu_ptr(&ps3_private); u64 x = (pd->bmp.status & pd->bmp.mask); unsigned int plug; diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 3f509f86432c..799c8580ab09 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -125,12 +125,7 @@ static void __init prealloc(struct ps3_prealloc *p) if (!p->size) return; - p->address = __alloc_bootmem(p->size, p->align, __pa(MAX_DMA_ADDRESS)); - if (!p->address) { - printk(KERN_ERR "%s: Cannot allocate %s\n", __func__, - p->name); - return; - } + p->address = memblock_virt_alloc(p->size, p->align); printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size, p->address); @@ -248,6 +243,7 @@ static int __init ps3_probe(void) ps3_mm_init(); ps3_mm_vas_create(&htab_size); ps3_hpte_init(htab_size); + pm_power_off = ps3_power_off; DBG(" <- %s:%d\n", __func__, __LINE__); return 1; @@ -278,7 +274,6 @@ define_machine(ps3) { .calibrate_decr = ps3_calibrate_decr, .progress = ps3_progress, .restart = ps3_restart, - .power_off = ps3_power_off, .halt = ps3_halt, #if defined(CONFIG_KEXEC) .kexec_cpu_down = ps3_kexec_cpu_down, diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 6ad83bd11fe2..c22bb1b4beb8 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -382,7 +382,7 @@ static int dlpar_online_cpu(struct device_node *dn) BUG_ON(get_cpu_current_state(cpu) != CPU_STATE_OFFLINE); cpu_maps_update_done(); - rc = cpu_up(cpu); + rc = device_online(get_cpu_device(cpu)); if (rc) goto out; cpu_maps_update_begin(); @@ -467,7 +467,7 @@ static int dlpar_offline_cpu(struct device_node *dn) if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); cpu_maps_update_done(); - rc = cpu_down(cpu); + rc = device_offline(get_cpu_device(cpu)); if (rc) goto out; cpu_maps_update_begin(); diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 1062f71f5a85..39049e4884fb 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -75,7 +75,7 @@ static atomic_t dtl_count; */ static void consume_dtle(struct dtl_entry *dtle, u64 index) { - struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); + struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); struct dtl_entry *wp = dtlr->write_ptr; struct lppaca *vpa = local_paca->lppaca_ptr; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 5c375f93c669..f30cf4d136a4 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -340,16 +340,17 @@ static void pseries_remove_processor(struct device_node *np) } static int pseries_smp_notifier(struct notifier_block *nb, - unsigned long action, void *node) + unsigned long action, void *data) { + struct of_reconfig_data *rd = data; int err = 0; switch (action) { case OF_RECONFIG_ATTACH_NODE: - err = pseries_add_processor(node); + err = pseries_add_processor(rd->dn); break; case OF_RECONFIG_DETACH_NODE: - pseries_remove_processor(node); + pseries_remove_processor(rd->dn); break; } return notifier_from_errno(err); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 3c4c0dcd90d3..fa41f0da5b6f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -12,7 +12,6 @@ #include <linux/of.h> #include <linux/of_address.h> #include <linux/memblock.h> -#include <linux/vmalloc.h> #include <linux/memory.h> #include <linux/memory_hotplug.h> @@ -66,22 +65,6 @@ unsigned long pseries_memory_block_size(void) } #ifdef CONFIG_MEMORY_HOTREMOVE -static int pseries_remove_memory(u64 start, u64 size) -{ - int ret; - - /* Remove htab bolted mappings for this section of memory */ - start = (unsigned long)__va(start); - ret = remove_section_mapping(start, start + size); - - /* Ensure all vmalloc mappings are flushed in case they also - * hit that section of memory - */ - vm_unmap_aliases(); - - return ret; -} - static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long block_sz, start_pfn; @@ -183,7 +166,7 @@ static int pseries_add_mem_node(struct device_node *np) return (ret < 0) ? -EINVAL : 0; } -static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) +static int pseries_update_drconf_memory(struct of_reconfig_data *pr) { struct of_drconf_cell *new_drmem, *old_drmem; unsigned long memblock_size; @@ -232,22 +215,21 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) } static int pseries_memory_notifier(struct notifier_block *nb, - unsigned long action, void *node) + unsigned long action, void *data) { - struct of_prop_reconfig *pr; + struct of_reconfig_data *rd = data; int err = 0; switch (action) { case OF_RECONFIG_ATTACH_NODE: - err = pseries_add_mem_node(node); + err = pseries_add_mem_node(rd->dn); break; case OF_RECONFIG_DETACH_NODE: - err = pseries_remove_mem_node(node); + err = pseries_remove_mem_node(rd->dn); break; case OF_RECONFIG_UPDATE_PROPERTY: - pr = (struct of_prop_reconfig *)node; - if (!strcmp(pr->prop->name, "ibm,dynamic-memory")) - err = pseries_update_drconf_memory(pr); + if (!strcmp(rd->prop->name, "ibm,dynamic-memory")) + err = pseries_update_drconf_memory(rd); break; } return notifier_from_errno(err); @@ -262,10 +244,6 @@ static int __init pseries_memory_hotplug_init(void) if (firmware_has_feature(FW_FEATURE_LPAR)) of_reconfig_notifier_register(&pseries_mem_nb); -#ifdef CONFIG_MEMORY_HOTREMOVE - ppc_md.remove_memory = pseries_remove_memory; -#endif - return 0; } machine_device_initcall(pseries, pseries_memory_hotplug_init); diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 3fda3f17b84e..ccd53f91e8aa 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -18,7 +18,7 @@ #ifdef CONFIG_TRACEPOINTS -#ifndef CONFIG_JUMP_LABEL +#ifndef HAVE_JUMP_LABEL .section ".toc","aw" .globl hcall_tracepoint_refcount @@ -78,7 +78,7 @@ hcall_tracepoint_refcount: mr r5,BUFREG; \ __HCALL_INST_POSTCALL -#ifdef CONFIG_JUMP_LABEL +#ifdef HAVE_JUMP_LABEL #define HCALL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) #else diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 4575f0c9e521..f02ec3ab428c 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -110,7 +110,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->tb_start = mftb(); h->purr_start = mfspr(SPRN_PURR); } @@ -123,7 +123,7 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->num_calls++; h->tb_total += mftb() - h->tb_start; h->purr_total += mfspr(SPRN_PURR) - h->purr_start; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e32e00976a94..1d3d52dc3ff3 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -199,7 +199,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, local_irq_save(flags); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -212,7 +212,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } rpn = __pa(uaddr) >> TCE_SHIFT; @@ -398,7 +398,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, long l, limit; local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); @@ -406,7 +406,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, local_irq_enable(); return -ENOMEM; } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; @@ -574,8 +574,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) while (isa_dn && isa_dn != dn) isa_dn = isa_dn->parent; - if (isa_dn_orig) - of_node_put(isa_dn_orig); + of_node_put(isa_dn_orig); /* Count number of direct PCI children of the PHB. */ for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) @@ -1251,10 +1250,11 @@ static struct notifier_block iommu_mem_nb = { .notifier_call = iommu_mem_notifier, }; -static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) { int err = NOTIFY_OK; - struct device_node *np = node; + struct of_reconfig_data *rd = data; + struct device_node *np = rd->dn; struct pci_dn *pci = PCI_DN(np); struct direct_window *window; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 8c509d5397c6..b5682fd6c984 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -43,6 +43,8 @@ #include <asm/trace.h> #include <asm/firmware.h> #include <asm/plpar_wrappers.h> +#include <asm/kexec.h> +#include <asm/fadump.h> #include "pseries.h" @@ -247,8 +249,17 @@ static void pSeries_lpar_hptab_clear(void) } #ifdef __LITTLE_ENDIAN__ - /* Reset exceptions to big endian */ - if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* + * Reset exceptions to big endian. + * + * FIXME this is a hack for kexec, we need to reset the exception + * endian before starting the new kernel and this is a convenient place + * to do it. + * + * This is also called on boot when a fadump happens. In that case we + * must not change the exception endian mode. + */ + if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active()) { long rc; rc = pseries_big_endian_exceptions(); @@ -257,8 +268,13 @@ static void pSeries_lpar_hptab_clear(void) * out to the user, but at least this will stop us from * continuing on further and creating an even more * difficult to debug situation. + * + * There is a known problem when kdump'ing, if cpus are offline + * the above call will fail. Rather than panicking again, keep + * going and hope the kdump kernel is also little endian, which + * it usually is. */ - if (rc) + if (rc && !kdump_in_progress()) panic("Could not enable big endian exceptions"); } #endif @@ -274,7 +290,7 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; @@ -432,7 +448,7 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, unsigned long addr, unsigned char *hpte_slot_array, - int psize, int ssize) + int psize, int ssize, int local) { int i, index = 0; unsigned long s_addr = addr; @@ -505,7 +521,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long hash, index, shift, hidx, slot; @@ -695,7 +711,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; @@ -720,7 +736,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval, local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 8ab5add4ac82..691a154c286d 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -420,7 +420,7 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) */ again: if (type == PCI_CAP_ID_MSI) { - if (pdn->force_32bit_msi) { + if (pdev->no_64bit_msi) { rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); if (rc < 0) { /* @@ -476,7 +476,7 @@ again: irq_set_msi_desc(virq, entry); /* Read config space back so we can restore after reset */ - __read_msi_msg(entry, &msg); + __pci_read_msi_msg(entry, &msg); entry->msg = msg; } diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 11a3b617ef5d..054a0ed5c7ee 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -715,6 +715,8 @@ static int nvram_pstore_init(void) nvram_pstore_info.buf = oops_data; nvram_pstore_info.bufsize = oops_data_sz; + spin_lock_init(&nvram_pstore_info.buf_lock); + rc = pstore_register(&nvram_pstore_info); if (rc != 0) pr_err("nvram: pstore_register() failed, defaults to " diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 67e48594040c..fe16a50700de 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -134,7 +134,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) of_node_put(pdn); if (rc) { - pr_err("no ibm,pcie-link-speed-stats property\n"); + pr_debug("no ibm,pcie-link-speed-stats property\n"); return 0; } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 5a4d0fc03b03..c3b2a7e81ddb 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -302,8 +302,8 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; if (!rtas_error_extended(h)) { - memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); + memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); + errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); } else { int len, error_log_length; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 125c589eeef5..e445b6701f50 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -251,9 +251,10 @@ static void __init pseries_discover_pic(void) " interrupt-controller\n"); } -static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct device_node *np = node; + struct of_reconfig_data *rd = data; + struct device_node *np = rd->dn; struct pci_dn *pci = NULL; int err = NOTIFY_OK; @@ -499,7 +500,11 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_SET_MODE)) { long rc; - if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) { + + rc = pSeries_enable_reloc_on_exc(); + if (rc == H_P2) { + pr_info("Relocation on exceptions not supported\n"); + } else if (rc != H_SUCCESS) { pr_warn("Unable to enable relocation on exceptions: " "%ld\n", rc); } @@ -659,6 +664,34 @@ static void __init pSeries_init_early(void) pr_debug(" <- pSeries_init_early()\n"); } +/** + * pseries_power_off - tell firmware about how to power off the system. + * + * This function calls either the power-off rtas token in normal cases + * or the ibm,power-off-ups token (if present & requested) in case of + * a power failure. If power-off token is used, power on will only be + * possible with power button press. If ibm,power-off-ups token is used + * it will allow auto poweron after power is restored. + */ +static void pseries_power_off(void) +{ + int rc; + int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups"); + + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_POWER_OFF); + + if (rtas_poweron_auto == 0 || + rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) { + rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1); + printk(KERN_INFO "RTAS power-off returned %d\n", rc); + } else { + rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL); + printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc); + } + for (;;); +} + /* * Called very early, MMU is off, device-tree isn't unflattened */ @@ -741,6 +774,8 @@ static int __init pSeries_probe(void) else hpte_init_native(); + pm_power_off = pseries_power_off; + pr_debug("Machine is%s LPAR !\n", (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); @@ -754,34 +789,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) return PCI_PROBE_NORMAL; } -/** - * pSeries_power_off - tell firmware about how to power off the system. - * - * This function calls either the power-off rtas token in normal cases - * or the ibm,power-off-ups token (if present & requested) in case of - * a power failure. If power-off token is used, power on will only be - * possible with power button press. If ibm,power-off-ups token is used - * it will allow auto poweron after power is restored. - */ -static void pSeries_power_off(void) -{ - int rc; - int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups"); - - if (rtas_flash_term_hook) - rtas_flash_term_hook(SYS_POWER_OFF); - - if (rtas_poweron_auto == 0 || - rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) { - rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1); - printk(KERN_INFO "RTAS power-off returned %d\n", rc); - } else { - rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL); - printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc); - } - for (;;); -} - #ifndef CONFIG_PCI void pSeries_final_fixup(void) { } #endif @@ -796,7 +803,6 @@ define_machine(pseries) { .pcibios_fixup = pSeries_final_fixup, .pci_probe_mode = pSeries_pci_probe_mode, .restart = rtas_restart, - .power_off = pSeries_power_off, .halt = rtas_halt, .panic = rtas_os_term, .get_boot_time = rtas_get_boot_time, diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ad56edc39919..f532c92bf99d 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -326,7 +326,6 @@ static struct platform_driver axon_ram_driver = { .remove = axon_ram_remove, .driver = { .name = AXON_RAM_MODULE_NAME, - .owner = THIS_MODULE, .of_match_table = axon_ram_device_id, }, }; diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c index 90545ad1626e..861cebf9c292 100644 --- a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c @@ -210,7 +210,6 @@ static const struct of_device_id mpc85xx_l2ctlr_of_match[] = { static struct platform_driver mpc85xx_l2ctlr_of_platform_driver = { .driver = { .name = "fsl-l2ctlr", - .owner = THIS_MODULE, .of_match_table = mpc85xx_l2ctlr_of_match, }, .probe = mpc85xx_l2ctlr_of_probe, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index de40b48b460e..4bbb4b8dfd09 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -13,7 +13,6 @@ * */ #include <linux/irq.h> -#include <linux/bootmem.h> #include <linux/msi.h> #include <linux/pci.h> #include <linux/slab.h> @@ -82,8 +81,8 @@ static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p) static struct irq_chip fsl_msi_chip = { - .irq_mask = mask_msi_irq, - .irq_unmask = unmask_msi_irq, + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, .irq_ack = fsl_msi_end_irq, .irq_print_chip = fsl_msi_print_chip, }; @@ -242,7 +241,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) irq_set_msi_desc(virq, entry); fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data); - write_msi_msg(virq, &msg); + pci_write_msi_msg(virq, &msg); } return 0; @@ -361,7 +360,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, cascade_data->virq = virt_msir; msi->cascade_array[irq_index] = cascade_data; - ret = request_irq(virt_msir, fsl_msi_cascade, 0, + ret = request_irq(virt_msir, fsl_msi_cascade, IRQF_NO_THREAD, "fsl-msi-cascade", cascade_data); if (ret) { dev_err(&dev->dev, "failed to request_irq(%d), ret = %d\n", @@ -578,7 +577,6 @@ static const struct of_device_id fsl_of_msi_ids[] = { static struct platform_driver fsl_of_msi_driver = { .driver = { .name = "fsl-msi", - .owner = THIS_MODULE, .of_match_table = fsl_of_msi_ids, }, .probe = fsl_of_msi_probe, diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 65d2ed4549e6..6455c1eada1a 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -23,7 +23,6 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/interrupt.h> -#include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/log2.h> #include <linux/slab.h> @@ -152,7 +151,7 @@ static int setup_one_atmu(struct ccsr_pci __iomem *pci, flags |= 0x10000000; /* enable relaxed ordering */ for (i = 0; size > 0; i++) { - unsigned int bits = min(ilog2(size), + unsigned int bits = min_t(u32, ilog2(size), __ffs(pci_addr | phys_addr)); if (index + i >= 5) diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c index 8cf4aa0e3a25..1d6fd7c59fe9 100644 --- a/arch/powerpc/sysdev/fsl_pmc.c +++ b/arch/powerpc/sysdev/fsl_pmc.c @@ -80,7 +80,6 @@ static const struct of_device_id pmc_ids[] = { static struct platform_driver pmc_driver = { .driver = { .name = "fsl-pmc", - .owner = THIS_MODULE, .of_match_table = pmc_ids, }, .probe = pmc_probe, diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index c04b718307c8..c1cd3698f534 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -58,6 +58,19 @@ #define RIO_ISR_AACR 0x10120 #define RIO_ISR_AACR_AA 0x1 /* Accept All ID */ +#define RIWTAR_TRAD_VAL_SHIFT 12 +#define RIWTAR_TRAD_MASK 0x00FFFFFF +#define RIWBAR_BADD_VAL_SHIFT 12 +#define RIWBAR_BADD_MASK 0x003FFFFF +#define RIWAR_ENABLE 0x80000000 +#define RIWAR_TGINT_LOCAL 0x00F00000 +#define RIWAR_RDTYP_NO_SNOOP 0x00040000 +#define RIWAR_RDTYP_SNOOP 0x00050000 +#define RIWAR_WRTYP_NO_SNOOP 0x00004000 +#define RIWAR_WRTYP_SNOOP 0x00005000 +#define RIWAR_WRTYP_ALLOC 0x00006000 +#define RIWAR_SIZE_MASK 0x0000003F + #define __fsl_read_rio_config(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ @@ -266,6 +279,89 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, return 0; } +static void fsl_rio_inbound_mem_init(struct rio_priv *priv) +{ + int i; + + /* close inbound windows */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) + out_be32(&priv->inb_atmu_regs[i].riwar, 0); +} + +int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, + u64 rstart, u32 size, u32 flags) +{ + struct rio_priv *priv = mport->priv; + u32 base_size; + unsigned int base_size_log; + u64 win_start, win_end; + u32 riwar; + int i; + + if ((size & (size - 1)) != 0) + return -EINVAL; + + base_size_log = ilog2(size); + base_size = 1 << base_size_log; + + /* check if addresses are aligned with the window size */ + if (lstart & (base_size - 1)) + return -EINVAL; + if (rstart & (base_size - 1)) + return -EINVAL; + + /* check for conflicting ranges */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + continue; + win_start = ((u64)(in_be32(&priv->inb_atmu_regs[i].riwbar) & RIWBAR_BADD_MASK)) + << RIWBAR_BADD_VAL_SHIFT; + win_end = win_start + ((1 << ((riwar & RIWAR_SIZE_MASK) + 1)) - 1); + if (rstart < win_end && (rstart + size) > win_start) + return -EINVAL; + } + + /* find unused atmu */ + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + break; + } + if (i >= RIO_INB_ATMU_COUNT) + return -ENOMEM; + + out_be32(&priv->inb_atmu_regs[i].riwtar, lstart >> RIWTAR_TRAD_VAL_SHIFT); + out_be32(&priv->inb_atmu_regs[i].riwbar, rstart >> RIWBAR_BADD_VAL_SHIFT); + out_be32(&priv->inb_atmu_regs[i].riwar, RIWAR_ENABLE | RIWAR_TGINT_LOCAL | + RIWAR_RDTYP_SNOOP | RIWAR_WRTYP_SNOOP | (base_size_log - 1)); + + return 0; +} + +void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart) +{ + u32 win_start_shift, base_start_shift; + struct rio_priv *priv = mport->priv; + u32 riwar, riwtar; + int i; + + /* skip default window */ + base_start_shift = lstart >> RIWTAR_TRAD_VAL_SHIFT; + for (i = 0; i < RIO_INB_ATMU_COUNT; i++) { + riwar = in_be32(&priv->inb_atmu_regs[i].riwar); + if ((riwar & RIWAR_ENABLE) == 0) + continue; + + riwtar = in_be32(&priv->inb_atmu_regs[i].riwtar); + win_start_shift = riwtar & RIWTAR_TRAD_MASK; + if (win_start_shift == base_start_shift) { + out_be32(&priv->inb_atmu_regs[i].riwar, riwar & ~RIWAR_ENABLE); + return; + } + } +} + void fsl_rio_port_error_handler(int offset) { /*XXX: Error recovery is not implemented, we just clear errors */ @@ -389,6 +485,8 @@ int fsl_rio_setup(struct platform_device *dev) ops->add_outb_message = fsl_add_outb_message; ops->add_inb_buffer = fsl_add_inb_buffer; ops->get_inb_message = fsl_get_inb_message; + ops->map_inb = fsl_map_inb_mem; + ops->unmap_inb = fsl_unmap_inb_mem; rmu_node = of_parse_phandle(dev->dev.of_node, "fsl,srio-rmu-handle", 0); if (!rmu_node) { @@ -602,6 +700,11 @@ int fsl_rio_setup(struct platform_device *dev) RIO_ATMU_REGS_PORT2_OFFSET)); priv->maint_atmu_regs = priv->atmu_regs + 1; + priv->inb_atmu_regs = (struct rio_inb_atmu_regs __iomem *) + (priv->regs_win + + ((i == 0) ? RIO_INB_ATMU_REGS_PORT1_OFFSET : + RIO_INB_ATMU_REGS_PORT2_OFFSET)); + /* Set to receive any dist ID for serial RapidIO controller. */ if (port->phy_type == RIO_PHY_SERIAL) @@ -620,6 +723,7 @@ int fsl_rio_setup(struct platform_device *dev) rio_law_start = range_start; fsl_rio_setup_rmu(port, rmu_np[i]); + fsl_rio_inbound_mem_init(priv); dbell->mport[i] = port; @@ -673,7 +777,6 @@ static const struct of_device_id fsl_of_rio_rpn_ids[] = { static struct platform_driver fsl_of_rio_rpn_driver = { .driver = { .name = "fsl-of-rio", - .owner = THIS_MODULE, .of_match_table = fsl_of_rio_rpn_ids, }, .probe = fsl_of_rio_rpn_probe, diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h index ae8e27405a0d..d53407a34f32 100644 --- a/arch/powerpc/sysdev/fsl_rio.h +++ b/arch/powerpc/sysdev/fsl_rio.h @@ -50,9 +50,12 @@ #define RIO_S_DBELL_REGS_OFFSET 0x13400 #define RIO_S_PW_REGS_OFFSET 0x134e0 #define RIO_ATMU_REGS_DBELL_OFFSET 0x10C40 +#define RIO_INB_ATMU_REGS_PORT1_OFFSET 0x10d60 +#define RIO_INB_ATMU_REGS_PORT2_OFFSET 0x10f60 #define MAX_MSG_UNIT_NUM 2 #define MAX_PORT_NUM 4 +#define RIO_INB_ATMU_COUNT 4 struct rio_atmu_regs { u32 rowtar; @@ -63,6 +66,15 @@ struct rio_atmu_regs { u32 pad2[3]; }; +struct rio_inb_atmu_regs { + u32 riwtar; + u32 pad1; + u32 riwbar; + u32 pad2; + u32 riwar; + u32 pad3[3]; +}; + struct rio_dbell_ring { void *virt; dma_addr_t phys; @@ -99,6 +111,7 @@ struct rio_priv { void __iomem *regs_win; struct rio_atmu_regs __iomem *atmu_regs; struct rio_atmu_regs __iomem *maint_atmu_regs; + struct rio_inb_atmu_regs __iomem *inb_atmu_regs; void __iomem *maint_win; void *rmm_handle; /* RapidIO message manager(unit) Handle */ }; diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index ffd1169ebaab..99269c041615 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -197,8 +197,7 @@ static int __init setup_rstcr(void) if (!rstcr && ppc_md.restart == fsl_rstcr_restart) printk(KERN_ERR "No RSTCR register, warm reboot won't work\n"); - if (np) - of_node_put(np); + of_node_put(np); return 0; } @@ -238,7 +237,7 @@ void fsl_hv_restart(char *cmd) /* * Halt the current partition * - * This function should be assigned to the ppc_md.power_off and ppc_md.halt + * This function should be assigned to the pm_power_off and ppc_md.halt * function pointers, to shut down the partition when we're running under * the Freescale hypervisor. */ diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index b50f97811c25..b28733727ed3 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -20,7 +20,6 @@ #include <linux/signal.h> #include <linux/syscore_ops.h> #include <linux/device.h> -#include <linux/bootmem.h> #include <linux/spinlock.h> #include <linux/fsl_devices.h> #include <asm/irq.h> diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c index 5492dc5f56f4..f4f0301b9a60 100644 --- a/arch/powerpc/sysdev/mpc5xxx_clocks.c +++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c @@ -26,8 +26,7 @@ unsigned long mpc5xxx_get_bus_frequency(struct device_node *node) of_node_put(node); node = np; } - if (node) - of_node_put(node); + of_node_put(node); return p_bus_freq ? *p_bus_freq : 0; } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 89cec0ed6a58..c4648ad5c1f3 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -24,7 +24,6 @@ #include <linux/irq.h> #include <linux/smp.h> #include <linux/interrupt.h> -#include <linux/bootmem.h> #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/slab.h> diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index 7bdf3cc741e4..3f165d972a0e 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -270,7 +270,6 @@ static const struct of_device_id mpic_msgr_ids[] = { static struct platform_driver mpic_msgr_driver = { .driver = { .name = "mpic-msgr", - .owner = THIS_MODULE, .of_match_table = mpic_msgr_ids, }, .probe = mpic_msgr_probe, diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 15dccd35fa11..a3f660eed6de 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -16,7 +16,6 @@ #undef DEBUG #include <linux/irq.h> -#include <linux/bootmem.h> #include <linux/msi.h> #include <asm/mpic.h> #include <asm/prom.h> @@ -42,7 +41,7 @@ static struct mpic *msi_mpic; static void mpic_pasemi_msi_mask_irq(struct irq_data *data) { pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); - mask_msi_irq(data); + pci_msi_mask_irq(data); mpic_mask_irq(data); } @@ -50,7 +49,7 @@ static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) { pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); mpic_unmask_irq(data); - unmask_msi_irq(data); + pci_msi_unmask_irq(data); } static struct irq_chip mpic_pasemi_msi_chip = { @@ -136,7 +135,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) * register to generate MSI [512...1023] */ msg.data = hwirq-0x200; - write_msi_msg(virq, &msg); + pci_write_msi_msg(virq, &msg); } return 0; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 623d7fba15b4..b2cef1809389 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -10,7 +10,6 @@ */ #include <linux/irq.h> -#include <linux/bootmem.h> #include <linux/msi.h> #include <asm/mpic.h> #include <asm/prom.h> @@ -25,14 +24,14 @@ static struct mpic *msi_mpic; static void mpic_u3msi_mask_irq(struct irq_data *data) { - mask_msi_irq(data); + pci_msi_mask_irq(data); mpic_mask_irq(data); } static void mpic_u3msi_unmask_irq(struct irq_data *data) { mpic_unmask_irq(data); - unmask_msi_irq(data); + pci_msi_unmask_irq(data); } static struct irq_chip mpic_u3msi_chip = { @@ -171,7 +170,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) printk("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n", virq, hwirq, (unsigned long)addr); msg.data = hwirq; - write_msi_msg(virq, &msg); + pci_write_msi_msg(virq, &msg); hwirq++; } diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 13e67d93a7c1..8a0b77a3ec0c 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -210,7 +210,6 @@ static struct platform_driver pmi_of_platform_driver = { .remove = pmi_of_remove, .driver = { .name = "pmi", - .owner = THIS_MODULE, .of_match_table = pmi_match, }, }; diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/sysdev/ppc4xx_cpm.c index 82e2cfe35c62..ba95adf81d8d 100644 --- a/arch/powerpc/sysdev/ppc4xx_cpm.c +++ b/arch/powerpc/sysdev/ppc4xx_cpm.c @@ -281,7 +281,7 @@ static int __init cpm_init(void) printk(KERN_ERR "cpm: could not parse dcr property for %s\n", np->full_name); ret = -EINVAL; - goto out; + goto node_put; } cpm.dcr_host = dcr_map(np, dcr_base, dcr_len); @@ -290,7 +290,7 @@ static int __init cpm_init(void) printk(KERN_ERR "cpm: failed to map dcr property for %s\n", np->full_name); ret = -EINVAL; - goto out; + goto node_put; } /* All 4xx SoCs with a CPM controller have one of two @@ -330,9 +330,9 @@ static int __init cpm_init(void) if (cpm.standby || cpm.suspend) suspend_set_ops(&cpm_suspend_ops); +node_put: + of_node_put(np); out: - if (np) - of_node_put(np); return ret; } diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index a6a4dbda9078..ed9970ff8d94 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -85,7 +85,7 @@ static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1); return -EINVAL; } - write_msi_msg(hwirq, &msg); + pci_write_msi_msg(hwirq, &msg); } return 0; @@ -197,7 +197,6 @@ static struct platform_driver hsta_msi_driver = { .probe = hsta_msi_probe, .driver = { .name = "hsta-msi", - .owner = THIS_MODULE, .of_match_table = hsta_msi_ids, }, }; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 22b5200636e7..6e2e6aa378bb 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -22,7 +22,6 @@ */ #include <linux/irq.h> -#include <linux/bootmem.h> #include <linux/pci.h> #include <linux/msi.h> #include <linux/of_platform.h> @@ -116,7 +115,7 @@ static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) irq_set_msi_desc(virq, entry); msg.data = int_no; - write_msi_msg(virq, &msg); + pci_write_msi_msg(virq, &msg); } return 0; } @@ -270,7 +269,6 @@ static struct platform_driver ppc4xx_msi_driver = { .remove = ppc4xx_of_msi_remove, .driver = { .name = "ppc4xx-msi", - .owner = THIS_MODULE, .of_match_table = ppc4xx_msi_ids, }, diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c index df6e2fc4ff92..086aca69ecae 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.c +++ b/arch/powerpc/sysdev/ppc4xx_pci.c @@ -22,7 +22,6 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/of.h> -#include <linux/bootmem.h> #include <linux/delay.h> #include <linux/slab.h> diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index 238a07b97f2c..1f29cee8da7b 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -22,7 +22,6 @@ #include <linux/spinlock.h> #include <linux/mm.h> #include <linux/interrupt.h> -#include <linux/bootmem.h> #include <linux/module.h> #include <linux/delay.h> #include <linux/ioport.h> @@ -693,7 +692,6 @@ static const struct of_device_id qe_ids[] = { static struct platform_driver qe_driver = { .driver = { .name = "fsl-qe", - .owner = THIS_MODULE, .of_match_table = qe_ids, }, .probe = qe_probe, diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index b2b87c30e266..543765e1ef14 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -23,7 +23,6 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/device.h> -#include <linux/bootmem.h> #include <linux/spinlock.h> #include <asm/irq.h> #include <asm/io.h> diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 92033936a8f7..7c37157d4c24 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -19,7 +19,6 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/device.h> -#include <linux/bootmem.h> #include <linux/spinlock.h> #include <linux/irq.h> #include <linux/interrupt.h> diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 3c6ee1b64e5d..4ba554ec8eaf 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -73,7 +73,7 @@ static unsigned int ics_opal_startup(struct irq_data *d) * at that level, so we do it here by hand. */ if (d->msi_desc) - unmask_msi_irq(d); + pci_msi_unmask_irq(d); #endif /* unmask it */ diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 936575d99c5c..bc81335b2cbc 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -76,7 +76,7 @@ static unsigned int ics_rtas_startup(struct irq_data *d) * at that level, so we do it here by hand. */ if (d->msi_desc) - unmask_msi_irq(d); + pci_msi_unmask_irq(d); #endif /* unmask it */ ics_rtas_unmask_irq(d); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index fe0cca477164..365249cd346b 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -155,7 +155,7 @@ int __init xics_smp_probe(void) void xics_teardown_cpu(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* * we have to reset the cppr index to 0 because we're diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index b988b5addf86..5b150f0c5df9 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -51,6 +51,12 @@ #include <asm/paca.h> #endif +#if defined(CONFIG_PPC_SPLPAR) +#include <asm/plpar_wrappers.h> +#else +static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; }; +#endif + #include "nonstdio.h" #include "dis-asm.h" @@ -88,10 +94,9 @@ struct bpt { }; /* Bits in bpt.enabled */ -#define BP_IABR_TE 1 /* IABR translation enabled */ -#define BP_IABR 2 -#define BP_TRAP 8 -#define BP_DABR 0x10 +#define BP_CIABR 1 +#define BP_TRAP 2 +#define BP_DABR 4 #define NBPTS 256 static struct bpt bpts[NBPTS]; @@ -270,6 +275,45 @@ static inline void cinval(void *p) asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p)); } +/** + * write_ciabr() - write the CIABR SPR + * @ciabr: The value to write. + * + * This function writes a value to the CIARB register either directly + * through mtspr instruction if the kernel is in HV privilege mode or + * call a hypervisor function to achieve the same in case the kernel + * is in supervisor privilege mode. + */ +static void write_ciabr(unsigned long ciabr) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + mtspr(SPRN_CIABR, ciabr); + return; + } + plapr_set_ciabr(ciabr); +} + +/** + * set_ciabr() - set the CIABR + * @addr: The value to set. + * + * This function sets the correct privilege value into the the HW + * breakpoint address before writing it up in the CIABR register. + */ +static void set_ciabr(unsigned long addr) +{ + addr &= ~CIABR_PRIV; + + if (cpu_has_feature(CPU_FTR_HVMODE)) + addr |= CIABR_PRIV_HYPER; + else + addr |= CIABR_PRIV_SUPER; + write_ciabr(addr); +} + /* * Disable surveillance (the service processor watchdog function) * while we are in xmon. @@ -293,10 +337,10 @@ static inline void disable_surveillance(void) args.token = rtas_token("set-indicator"); if (args.token == RTAS_UNKNOWN_SERVICE) return; - args.nargs = 3; - args.nret = 1; + args.nargs = cpu_to_be32(3); + args.nret = cpu_to_be32(1); args.rets = &args.args[3]; - args.args[0] = SURVEILLANCE_TOKEN; + args.args[0] = cpu_to_be32(SURVEILLANCE_TOKEN); args.args[1] = 0; args.args[2] = 0; enter_rtas(__pa(&args)); @@ -727,7 +771,7 @@ static void insert_bpts(void) bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { - if ((bp->enabled & (BP_TRAP|BP_IABR)) == 0) + if ((bp->enabled & (BP_TRAP|BP_CIABR)) == 0) continue; if (mread(bp->address, &bp->instr[0], 4) != 4) { printf("Couldn't read instruction at %lx, " @@ -742,7 +786,7 @@ static void insert_bpts(void) continue; } store_inst(&bp->instr[0]); - if (bp->enabled & BP_IABR) + if (bp->enabled & BP_CIABR) continue; if (mwrite(bp->address, &bpinstr, 4) != 4) { printf("Couldn't write instruction at %lx, " @@ -764,9 +808,9 @@ static void insert_cpu_bpts(void) brk.len = 8; __set_breakpoint(&brk); } - if (iabr && cpu_has_feature(CPU_FTR_IABR)) - mtspr(SPRN_IABR, iabr->address - | (iabr->enabled & (BP_IABR|BP_IABR_TE))); + + if (iabr) + set_ciabr(iabr->address); } static void remove_bpts(void) @@ -777,7 +821,7 @@ static void remove_bpts(void) bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { - if ((bp->enabled & (BP_TRAP|BP_IABR)) != BP_TRAP) + if ((bp->enabled & (BP_TRAP|BP_CIABR)) != BP_TRAP) continue; if (mread(bp->address, &instr, 4) == 4 && instr == bpinstr @@ -792,8 +836,7 @@ static void remove_bpts(void) static void remove_cpu_bpts(void) { hw_breakpoint_disable(); - if (cpu_has_feature(CPU_FTR_IABR)) - mtspr(SPRN_IABR, 0); + write_ciabr(0); } /* Command interpreting routine */ @@ -907,7 +950,7 @@ cmds(struct pt_regs *excp) case 'u': dump_segments(); break; -#elif defined(CONFIG_4xx) +#elif defined(CONFIG_44x) case 'u': dump_tlb_44x(); break; @@ -981,7 +1024,8 @@ static void bootcmds(void) else if (cmd == 'h') ppc_md.halt(); else if (cmd == 'p') - ppc_md.power_off(); + if (pm_power_off) + pm_power_off(); } static int cpu_cmd(void) @@ -1127,7 +1171,7 @@ static char *breakpoint_help_string = "b <addr> [cnt] set breakpoint at given instr addr\n" "bc clear all breakpoints\n" "bc <n/addr> clear breakpoint number n or at addr\n" - "bi <addr> [cnt] set hardware instr breakpoint (POWER3/RS64 only)\n" + "bi <addr> [cnt] set hardware instr breakpoint (POWER8 only)\n" "bd <addr> [cnt] set hardware data breakpoint\n" ""; @@ -1166,13 +1210,13 @@ bpt_cmds(void) break; case 'i': /* bi - hardware instr breakpoint */ - if (!cpu_has_feature(CPU_FTR_IABR)) { + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { printf("Hardware instruction breakpoint " "not supported on this cpu\n"); break; } if (iabr) { - iabr->enabled &= ~(BP_IABR | BP_IABR_TE); + iabr->enabled &= ~BP_CIABR; iabr = NULL; } if (!scanhex(&a)) @@ -1181,7 +1225,7 @@ bpt_cmds(void) break; bp = new_breakpoint(a); if (bp != NULL) { - bp->enabled |= BP_IABR | BP_IABR_TE; + bp->enabled |= BP_CIABR; iabr = bp; } break; @@ -1238,7 +1282,7 @@ bpt_cmds(void) if (!bp->enabled) continue; printf("%2x %s ", BP_NUM(bp), - (bp->enabled & BP_IABR)? "inst": "trap"); + (bp->enabled & BP_CIABR) ? "inst": "trap"); xmon_print_symbol(bp->address, " ", "\n"); } break; |