diff options
353 files changed, 20388 insertions, 4364 deletions
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index e38b8df3d727..8e9359de1d28 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -191,7 +191,7 @@ o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that o A hardware or software issue shuts off the scheduler-clock interrupt on a CPU that is not in dyntick-idle mode. This problem really has happened, and seems to be most likely to - result in RCU CPU stall warnings for CONFIG_NO_HZ=n kernels. + result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. o A bug in the RCU implementation. diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt index 66f9cc310686..219970ba54b7 100644 --- a/Documentation/cpu-freq/governors.txt +++ b/Documentation/cpu-freq/governors.txt @@ -131,8 +131,8 @@ sampling_rate_min: The sampling rate is limited by the HW transition latency: transition_latency * 100 Or by kernel restrictions: -If CONFIG_NO_HZ is set, the limit is 10ms fixed. -If CONFIG_NO_HZ is not set or nohz=off boot parameter is used, the +If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed. +If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the limits depend on the CONFIG_HZ option: HZ=1000: min=20000us (20ms) HZ=250: min=80000us (80ms) diff --git a/Documentation/devicetree/bindings/input/cros-ec-keyb.txt b/Documentation/devicetree/bindings/input/cros-ec-keyb.txt new file mode 100644 index 000000000000..0f6355ce39b5 --- /dev/null +++ b/Documentation/devicetree/bindings/input/cros-ec-keyb.txt @@ -0,0 +1,72 @@ +ChromeOS EC Keyboard + +Google's ChromeOS EC Keyboard is a simple matrix keyboard implemented on +a separate EC (Embedded Controller) device. It provides a message for reading +key scans from the EC. These are then converted into keycodes for processing +by the kernel. + +This binding is based on matrix-keymap.txt and extends/modifies it as follows: + +Required properties: +- compatible: "google,cros-ec-keyb" + +Optional properties: +- google,needs-ghost-filter: True to enable a ghost filter for the matrix +keyboard. This is recommended if the EC does not have its own logic or +hardware for this. + + +Example: + +cros-ec-keyb { + compatible = "google,cros-ec-keyb"; + keypad,num-rows = <8>; + keypad,num-columns = <13>; + google,needs-ghost-filter; + /* + * Keymap entries take the form of 0xRRCCKKKK where + * RR=Row CC=Column KKKK=Key Code + * The values below are for a US keyboard layout and + * are taken from the Linux driver. Note that the + * 102ND key is not used for US keyboards. + */ + linux,keymap = < + /* CAPSLCK F1 B F10 */ + 0x0001003a 0x0002003b 0x00030030 0x00040044 + /* N = R_ALT ESC */ + 0x00060031 0x0008000d 0x000a0064 0x01010001 + /* F4 G F7 H */ + 0x0102003e 0x01030022 0x01040041 0x01060023 + /* ' F9 BKSPACE L_CTRL */ + 0x01080028 0x01090043 0x010b000e 0x0200001d + /* TAB F3 T F6 */ + 0x0201000f 0x0202003d 0x02030014 0x02040040 + /* ] Y 102ND [ */ + 0x0205001b 0x02060015 0x02070056 0x0208001a + /* F8 GRAVE F2 5 */ + 0x02090042 0x03010029 0x0302003c 0x03030006 + /* F5 6 - \ */ + 0x0304003f 0x03060007 0x0308000c 0x030b002b + /* R_CTRL A D F */ + 0x04000061 0x0401001e 0x04020020 0x04030021 + /* S K J ; */ + 0x0404001f 0x04050025 0x04060024 0x04080027 + /* L ENTER Z C */ + 0x04090026 0x040b001c 0x0501002c 0x0502002e + /* V X , M */ + 0x0503002f 0x0504002d 0x05050033 0x05060032 + /* L_SHIFT / . SPACE */ + 0x0507002a 0x05080035 0x05090034 0x050B0039 + /* 1 3 4 2 */ + 0x06010002 0x06020004 0x06030005 0x06040003 + /* 8 7 0 9 */ + 0x06050009 0x06060008 0x0608000b 0x0609000a + /* L_ALT DOWN RIGHT Q */ + 0x060a0038 0x060b006c 0x060c006a 0x07010010 + /* E R W I */ + 0x07020012 0x07030013 0x07040011 0x07050017 + /* U R_SHIFT P O */ + 0x07060016 0x07070036 0x07080019 0x07090018 + /* UP LEFT */ + 0x070b0067 0x070c0069>; +}; diff --git a/Documentation/devicetree/bindings/mfd/as3711.txt b/Documentation/devicetree/bindings/mfd/as3711.txt new file mode 100644 index 000000000000..d98cf18c721c --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/as3711.txt @@ -0,0 +1,73 @@ +AS3711 is an I2C PMIC from Austria MicroSystems with multiple DCDC and LDO power +supplies, a battery charger and an RTC. So far only bindings for the two stepup +DCDC converters are defined. Other DCDC and LDO supplies are configured, using +standard regulator properties, they must belong to a sub-node, called +"regulators" and be called "sd1" to "sd4" and "ldo1" to "ldo8." Stepup converter +configuration should be placed in a subnode, called "backlight." + +Compulsory properties: +- compatible : must be "ams,as3711" +- reg : specifies the I2C address + +To use the SU1 converter as a backlight source the following two properties must +be provided: +- su1-dev : framebuffer phandle +- su1-max-uA : maximum current + +To use the SU2 converter as a backlight source the following two properties must +be provided: +- su2-dev : framebuffer phandle +- su1-max-uA : maximum current + +Additionally one of these properties must be provided to select the type of +feedback used: +- su2-feedback-voltage : voltage feedback is used +- su2-feedback-curr1 : CURR1 input used for current feedback +- su2-feedback-curr2 : CURR2 input used for current feedback +- su2-feedback-curr3 : CURR3 input used for current feedback +- su2-feedback-curr-auto: automatic current feedback selection + +and one of these to select the over-voltage protection pin +- su2-fbprot-lx-sd4 : LX_SD4 is used for over-voltage protection +- su2-fbprot-gpio2 : GPIO2 is used for over-voltage protection +- su2-fbprot-gpio3 : GPIO3 is used for over-voltage protection +- su2-fbprot-gpio4 : GPIO4 is used for over-voltage protection + +If "su2-feedback-curr-auto" is selected, one or more of the following properties +have to be specified: +- su2-auto-curr1 : use CURR1 input for current feedback +- su2-auto-curr2 : use CURR2 input for current feedback +- su2-auto-curr3 : use CURR3 input for current feedback + +Example: + +as3711@40 { + compatible = "ams,as3711"; + reg = <0x40>; + + regulators { + sd4 { + regulator-name = "1.215V"; + regulator-min-microvolt = <1215000>; + regulator-max-microvolt = <1235000>; + }; + ldo2 { + regulator-name = "2.8V CPU"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + regulator-always-on; + regulator-boot-on; + }; + }; + + backlight { + compatible = "ams,as3711-bl"; + su2-dev = <&lcdc>; + su2-max-uA = <36000>; + su2-feedback-curr-auto; + su2-fbprot-gpio4; + su2-auto-curr1; + su2-auto-curr2; + su2-auto-curr3; + }; +}; diff --git a/Documentation/devicetree/bindings/mfd/cros-ec.txt b/Documentation/devicetree/bindings/mfd/cros-ec.txt new file mode 100644 index 000000000000..e0e59c58a1f9 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/cros-ec.txt @@ -0,0 +1,56 @@ +ChromeOS Embedded Controller + +Google's ChromeOS EC is a Cortex-M device which talks to the AP and +implements various function such as keyboard and battery charging. + +The EC can be connect through various means (I2C, SPI, LPC) and the +compatible string used depends on the inteface. Each connection method has +its own driver which connects to the top level interface-agnostic EC driver. +Other Linux driver (such as cros-ec-keyb for the matrix keyboard) connect to +the top-level driver. + +Required properties (I2C): +- compatible: "google,cros-ec-i2c" +- reg: I2C slave address + +Required properties (SPI): +- compatible: "google,cros-ec-spi" +- reg: SPI chip select + +Required properties (LPC): +- compatible: "google,cros-ec-lpc" +- reg: List of (IO address, size) pairs defining the interface uses + + +Example for I2C: + +i2c@12CA0000 { + cros-ec@1e { + reg = <0x1e>; + compatible = "google,cros-ec-i2c"; + interrupts = <14 0>; + interrupt-parent = <&wakeup_eint>; + wakeup-source; + }; + + +Example for SPI: + +spi@131b0000 { + ec@0 { + compatible = "google,cros-ec-spi"; + reg = <0x0>; + interrupts = <14 0>; + interrupt-parent = <&wakeup_eint>; + wakeup-source; + spi-max-frequency = <5000000>; + controller-data { + cs-gpio = <&gpf0 3 4 3 0>; + samsung,spi-cs; + samsung,spi-feedback-delay = <2>; + }; + }; +}; + + +Example for LPC is not supplied as it is not yet implemented. diff --git a/Documentation/devicetree/bindings/mfd/omap-usb-host.txt b/Documentation/devicetree/bindings/mfd/omap-usb-host.txt new file mode 100644 index 000000000000..b381fa696bf9 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/omap-usb-host.txt @@ -0,0 +1,80 @@ +OMAP HS USB Host + +Required properties: + +- compatible: should be "ti,usbhs-host" +- reg: should contain one register range i.e. start and length +- ti,hwmods: must contain "usb_host_hs" + +Optional properties: + +- num-ports: number of USB ports. Usually this is automatically detected + from the IP's revision register but can be overridden by specifying + this property. A maximum of 3 ports are supported at the moment. + +- portN-mode: String specifying the port mode for port N, where N can be + from 1 to 3. If the port mode is not specified, that port is treated + as unused. When specified, it must be one of the following. + "ehci-phy", + "ehci-tll", + "ehci-hsic", + "ohci-phy-6pin-datse0", + "ohci-phy-6pin-dpdm", + "ohci-phy-3pin-datse0", + "ohci-phy-4pin-dpdm", + "ohci-tll-6pin-datse0", + "ohci-tll-6pin-dpdm", + "ohci-tll-3pin-datse0", + "ohci-tll-4pin-dpdm", + "ohci-tll-2pin-datse0", + "ohci-tll-2pin-dpdm", + +- single-ulpi-bypass: Must be present if the controller contains a single + ULPI bypass control bit. e.g. OMAP3 silicon <= ES2.1 + +Required properties if child node exists: + +- #address-cells: Must be 1 +- #size-cells: Must be 1 +- ranges: must be present + +Properties for children: + +The OMAP HS USB Host subsystem contains EHCI and OHCI controllers. +See Documentation/devicetree/bindings/usb/omap-ehci.txt and +omap3-ohci.txt + +Example for OMAP4: + +usbhshost: usbhshost@4a064000 { + compatible = "ti,usbhs-host"; + reg = <0x4a064000 0x800>; + ti,hwmods = "usb_host_hs"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + usbhsohci: ohci@4a064800 { + compatible = "ti,ohci-omap3", "usb-ohci"; + reg = <0x4a064800 0x400>; + interrupt-parent = <&gic>; + interrupts = <0 76 0x4>; + }; + + usbhsehci: ehci@4a064c00 { + compatible = "ti,ehci-omap", "usb-ehci"; + reg = <0x4a064c00 0x400>; + interrupt-parent = <&gic>; + interrupts = <0 77 0x4>; + }; +}; + +&usbhshost { + port1-mode = "ehci-phy"; + port2-mode = "ehci-tll"; + port3-mode = "ehci-phy"; +}; + +&usbhsehci { + phys = <&hsusb1_phy 0 &hsusb3_phy>; +}; diff --git a/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt b/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt new file mode 100644 index 000000000000..62fe69724e3b --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt @@ -0,0 +1,17 @@ +OMAP HS USB Host TLL (Transceiver-Less Interface) + +Required properties: + +- compatible : should be "ti,usbhs-tll" +- reg : should contain one register range i.e. start and length +- interrupts : should contain the TLL module's interrupt +- ti,hwmod : must contain "usb_tll_hs" + +Example: + + usbhstll: usbhstll@4a062000 { + compatible = "ti,usbhs-tll"; + reg = <0x4a062000 0x1000>; + interrupts = <78>; + ti,hwmods = "usb_tll_hs"; + }; diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt index 7a7eb1e7bda6..f2f3e80934d2 100644 --- a/Documentation/devicetree/bindings/sound/wm8994.txt +++ b/Documentation/devicetree/bindings/sound/wm8994.txt @@ -5,14 +5,70 @@ on the board). Required properties: - - compatible : "wlf,wm1811", "wlf,wm8994", "wlf,wm8958" + - compatible : One of "wlf,wm1811", "wlf,wm8994" or "wlf,wm8958". - reg : the I2C address of the device for I2C, the chip select number for SPI. + - gpio-controller : Indicates this device is a GPIO controller. + - #gpio-cells : Must be 2. The first cell is the pin number and the + second cell is used to specify optional parameters (currently unused). + + - AVDD2-supply, DBVDD1-supply, DBVDD2-supply, DBVDD3-supply, CPVDD-supply, + SPKVDD1-supply, SPKVDD2-supply : power supplies for the device, as covered + in Documentation/devicetree/bindings/regulator/regulator.txt + +Optional properties: + + - interrupts : The interrupt line the IRQ signal for the device is + connected to. This is optional, if it is not connected then none + of the interrupt related properties should be specified. + - interrupt-controller : These devices contain interrupt controllers + and may provide interrupt services to other devices if they have an + interrupt line connected. + - interrupt-parent : The parent interrupt controller. + - #interrupt-cells: the number of cells to describe an IRQ, this should be 2. + The first cell is the IRQ number. + The second cell is the flags, encoded as the trigger masks from + Documentation/devicetree/bindings/interrupts.txt + + - wlf,gpio-cfg : A list of GPIO configuration register values. If absent, + no configuration of these registers is performed. If any value is + over 0xffff then the register will be left as default. If present 11 + values must be supplied. + + - wlf,micbias-cfg : Two MICBIAS register values for WM1811 or + WM8958. If absent the register defaults will be used. + + - wlf,ldo1ena : GPIO specifier for control of LDO1ENA input to device. + - wlf,ldo2ena : GPIO specifier for control of LDO2ENA input to device. + + - wlf,lineout1-se : If present LINEOUT1 is in single ended mode. + - wlf,lineout2-se : If present LINEOUT2 is in single ended mode. + + - wlf,lineout1-feedback : If present LINEOUT1 has common mode feedback + connected. + - wlf,lineout2-feedback : If present LINEOUT2 has common mode feedback + connected. + + - wlf,ldoena-always-driven : If present LDOENA is always driven. + Example: codec: wm8994@1a { compatible = "wlf,wm8994"; reg = <0x1a>; + + gpio-controller; + #gpio-cells = <2>; + + lineout1-se; + + AVDD2-supply = <®ulator>; + CPVDD-supply = <®ulator>; + DBVDD1-supply = <®ulator>; + DBVDD2-supply = <®ulator>; + DBVDD3-supply = <®ulator>; + SPKVDD1-supply = <®ulator>; + SPKVDD2-supply = <®ulator>; }; diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9653cf2f9727..8920f9f5fa9e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1964,6 +1964,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Valid arguments: on, off Default: on + nohz_full= [KNL,BOOT] + In kernels built with CONFIG_NO_HZ_FULL=y, set + the specified list of CPUs whose tick will be stopped + whenever possible. The boot CPU will be forced outside + the range to maintain the timekeeping. + The CPUs in this range must also be included in the + rcu_nocbs= set. + noiotrap [SH] Disables trapped I/O port accesses. noirqdebug [X86-32] Disables the code which attempts to detect and diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO index d378cba66456..6e0f63f343b4 100644 --- a/Documentation/s390/CommonIO +++ b/Documentation/s390/CommonIO @@ -8,9 +8,9 @@ Command line parameters Enable logging of debug information in case of ccw device timeouts. -* cio_ignore = {all} | - {<device> | <range of devices>} | - {!<device> | !<range of devices>} +* cio_ignore = device[,device[,..]] + + device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>} The given devices will be ignored by the common I/O-layer; no detection and device sensing will be done on any of those devices. The subchannel to @@ -24,8 +24,10 @@ Command line parameters device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you give a device number 0xabcd, it will be interpreted as 0.0.abcd. - You can use the 'all' keyword to ignore all devices. - The '!' operator will cause the I/O-layer to _not_ ignore a device. + You can use the 'all' keyword to ignore all devices. The 'ipldev' and 'condev' + keywords can be used to refer to the CCW based boot device and CCW console + device respectively (these are probably useful only when combined with the '!' + operator). The '!' operator will cause the I/O-layer to _not_ ignore a device. The command line is parsed from left to right. For example, diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt new file mode 100644 index 000000000000..5b5322024067 --- /dev/null +++ b/Documentation/timers/NO_HZ.txt @@ -0,0 +1,273 @@ + NO_HZ: Reducing Scheduling-Clock Ticks + + +This document describes Kconfig options and boot parameters that can +reduce the number of scheduling-clock interrupts, thereby improving energy +efficiency and reducing OS jitter. Reducing OS jitter is important for +some types of computationally intensive high-performance computing (HPC) +applications and for real-time applications. + +There are two main contexts in which the number of scheduling-clock +interrupts can be reduced compared to the old-school approach of sending +a scheduling-clock interrupt to all CPUs every jiffy whether they need +it or not (CONFIG_HZ_PERIODIC=y or CONFIG_NO_HZ=n for older kernels): + +1. Idle CPUs (CONFIG_NO_HZ_IDLE=y or CONFIG_NO_HZ=y for older kernels). + +2. CPUs having only one runnable task (CONFIG_NO_HZ_FULL=y). + +These two cases are described in the following two sections, followed +by a third section on RCU-specific considerations and a fourth and final +section listing known issues. + + +IDLE CPUs + +If a CPU is idle, there is little point in sending it a scheduling-clock +interrupt. After all, the primary purpose of a scheduling-clock interrupt +is to force a busy CPU to shift its attention among multiple duties, +and an idle CPU has no duties to shift its attention among. + +The CONFIG_NO_HZ_IDLE=y Kconfig option causes the kernel to avoid sending +scheduling-clock interrupts to idle CPUs, which is critically important +both to battery-powered devices and to highly virtualized mainframes. +A battery-powered device running a CONFIG_HZ_PERIODIC=y kernel would +drain its battery very quickly, easily 2-3 times as fast as would the +same device running a CONFIG_NO_HZ_IDLE=y kernel. A mainframe running +1,500 OS instances might find that half of its CPU time was consumed by +unnecessary scheduling-clock interrupts. In these situations, there +is strong motivation to avoid sending scheduling-clock interrupts to +idle CPUs. That said, dyntick-idle mode is not free: + +1. It increases the number of instructions executed on the path + to and from the idle loop. + +2. On many architectures, dyntick-idle mode also increases the + number of expensive clock-reprogramming operations. + +Therefore, systems with aggressive real-time response constraints often +run CONFIG_HZ_PERIODIC=y kernels (or CONFIG_NO_HZ=n for older kernels) +in order to avoid degrading from-idle transition latencies. + +An idle CPU that is not receiving scheduling-clock interrupts is said to +be "dyntick-idle", "in dyntick-idle mode", "in nohz mode", or "running +tickless". The remainder of this document will use "dyntick-idle mode". + +There is also a boot parameter "nohz=" that can be used to disable +dyntick-idle mode in CONFIG_NO_HZ_IDLE=y kernels by specifying "nohz=off". +By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling +dyntick-idle mode. + + +CPUs WITH ONLY ONE RUNNABLE TASK + +If a CPU has only one runnable task, there is little point in sending it +a scheduling-clock interrupt because there is no other task to switch to. + +The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid +sending scheduling-clock interrupts to CPUs with a single runnable task, +and such CPUs are said to be "adaptive-ticks CPUs". This is important +for applications with aggressive real-time response constraints because +it allows them to improve their worst-case response times by the maximum +duration of a scheduling-clock interrupt. It is also important for +computationally intensive short-iteration workloads: If any CPU is +delayed during a given iteration, all the other CPUs will be forced to +wait idle while the delayed CPU finishes. Thus, the delay is multiplied +by one less than the number of CPUs. In these situations, there is +again strong motivation to avoid sending scheduling-clock interrupts. + +By default, no CPU will be an adaptive-ticks CPU. The "nohz_full=" +boot parameter specifies the adaptive-ticks CPUs. For example, +"nohz_full=1,6-8" says that CPUs 1, 6, 7, and 8 are to be adaptive-ticks +CPUs. Note that you are prohibited from marking all of the CPUs as +adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain +online to handle timekeeping tasks in order to ensure that system calls +like gettimeofday() returns accurate values on adaptive-tick CPUs. +(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no +running user processes to observe slight drifts in clock rate.) +Therefore, the boot CPU is prohibited from entering adaptive-ticks +mode. Specifying a "nohz_full=" mask that includes the boot CPU will +result in a boot-time error message, and the boot CPU will be removed +from the mask. + +Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies +that all CPUs other than the boot CPU are adaptive-ticks CPUs. This +Kconfig parameter will be overridden by the "nohz_full=" boot parameter, +so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and +the "nohz_full=1" boot parameter is specified, the boot parameter will +prevail so that only CPU 1 will be an adaptive-ticks CPU. + +Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. +This is covered in the "RCU IMPLICATIONS" section below. + +Normally, a CPU remains in adaptive-ticks mode as long as possible. +In particular, transitioning to kernel mode does not automatically change +the mode. Instead, the CPU will exit adaptive-ticks mode only if needed, +for example, if that CPU enqueues an RCU callback. + +Just as with dyntick-idle mode, the benefits of adaptive-tick mode do +not come for free: + +1. CONFIG_NO_HZ_FULL selects CONFIG_NO_HZ_COMMON, so you cannot run + adaptive ticks without also running dyntick idle. This dependency + extends down into the implementation, so that all of the costs + of CONFIG_NO_HZ_IDLE are also incurred by CONFIG_NO_HZ_FULL. + +2. The user/kernel transitions are slightly more expensive due + to the need to inform kernel subsystems (such as RCU) about + the change in mode. + +3. POSIX CPU timers on adaptive-tick CPUs may miss their deadlines + (perhaps indefinitely) because they currently rely on + scheduling-tick interrupts. This will likely be fixed in + one of two ways: (1) Prevent CPUs with POSIX CPU timers from + entering adaptive-tick mode, or (2) Use hrtimers or other + adaptive-ticks-immune mechanism to cause the POSIX CPU timer to + fire properly. + +4. If there are more perf events pending than the hardware can + accommodate, they are normally round-robined so as to collect + all of them over time. Adaptive-tick mode may prevent this + round-robining from happening. This will likely be fixed by + preventing CPUs with large numbers of perf events pending from + entering adaptive-tick mode. + +5. Scheduler statistics for adaptive-tick CPUs may be computed + slightly differently than those for non-adaptive-tick CPUs. + This might in turn perturb load-balancing of real-time tasks. + +6. The LB_BIAS scheduler feature is disabled by adaptive ticks. + +Although improvements are expected over time, adaptive ticks is quite +useful for many types of real-time and compute-intensive applications. +However, the drawbacks listed above mean that adaptive ticks should not +(yet) be enabled by default. + + +RCU IMPLICATIONS + +There are situations in which idle CPUs cannot be permitted to +enter either dyntick-idle mode or adaptive-tick mode, the most +common being when that CPU has RCU callbacks pending. + +The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs +to enter dyntick-idle mode or adaptive-tick mode anyway. In this case, +a timer will awaken these CPUs every four jiffies in order to ensure +that the RCU callbacks are processed in a timely fashion. + +Another approach is to offload RCU callback processing to "rcuo" kthreads +using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to +offload may be selected via several methods: + +1. One of three mutually exclusive Kconfig options specify a + build-time default for the CPUs to offload: + + a. The CONFIG_RCU_NOCB_CPU_NONE=y Kconfig option results in + no CPUs being offloaded. + + b. The CONFIG_RCU_NOCB_CPU_ZERO=y Kconfig option causes + CPU 0 to be offloaded. + + c. The CONFIG_RCU_NOCB_CPU_ALL=y Kconfig option causes all + CPUs to be offloaded. Note that the callbacks will be + offloaded to "rcuo" kthreads, and that those kthreads + will in fact run on some CPU. However, this approach + gives fine-grained control on exactly which CPUs the + callbacks run on, along with their scheduling priority + (including the default of SCHED_OTHER), and it further + allows this control to be varied dynamically at runtime. + +2. The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated + list of CPUs and CPU ranges, for example, "1,3-5" selects CPUs 1, + 3, 4, and 5. The specified CPUs will be offloaded in addition to + any CPUs specified as offloaded by CONFIG_RCU_NOCB_CPU_ZERO=y or + CONFIG_RCU_NOCB_CPU_ALL=y. This means that the "rcu_nocbs=" boot + parameter has no effect for kernels built with RCU_NOCB_CPU_ALL=y. + +The offloaded CPUs will never queue RCU callbacks, and therefore RCU +never prevents offloaded CPUs from entering either dyntick-idle mode +or adaptive-tick mode. That said, note that it is up to userspace to +pin the "rcuo" kthreads to specific CPUs if desired. Otherwise, the +scheduler will decide where to run them, which might or might not be +where you want them to run. + + +KNOWN ISSUES + +o Dyntick-idle slows transitions to and from idle slightly. + In practice, this has not been a problem except for the most + aggressive real-time workloads, which have the option of disabling + dyntick-idle mode, an option that most of them take. However, + some workloads will no doubt want to use adaptive ticks to + eliminate scheduling-clock interrupt latencies. Here are some + options for these workloads: + + a. Use PMQOS from userspace to inform the kernel of your + latency requirements (preferred). + + b. On x86 systems, use the "idle=mwait" boot parameter. + + c. On x86 systems, use the "intel_idle.max_cstate=" to limit + ` the maximum C-state depth. + + d. On x86 systems, use the "idle=poll" boot parameter. + However, please note that use of this parameter can cause + your CPU to overheat, which may cause thermal throttling + to degrade your latencies -- and that this degradation can + be even worse than that of dyntick-idle. Furthermore, + this parameter effectively disables Turbo Mode on Intel + CPUs, which can significantly reduce maximum performance. + +o Adaptive-ticks slows user/kernel transitions slightly. + This is not expected to be a problem for computationally intensive + workloads, which have few such transitions. Careful benchmarking + will be required to determine whether or not other workloads + are significantly affected by this effect. + +o Adaptive-ticks does not do anything unless there is only one + runnable task for a given CPU, even though there are a number + of other situations where the scheduling-clock tick is not + needed. To give but one example, consider a CPU that has one + runnable high-priority SCHED_FIFO task and an arbitrary number + of low-priority SCHED_OTHER tasks. In this case, the CPU is + required to run the SCHED_FIFO task until it either blocks or + some other higher-priority task awakens on (or is assigned to) + this CPU, so there is no point in sending a scheduling-clock + interrupt to this CPU. However, the current implementation + nevertheless sends scheduling-clock interrupts to CPUs having a + single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER + tasks, even though these interrupts are unnecessary. + + Better handling of these sorts of situations is future work. + +o A reboot is required to reconfigure both adaptive idle and RCU + callback offloading. Runtime reconfiguration could be provided + if needed, however, due to the complexity of reconfiguring RCU at + runtime, there would need to be an earthshakingly good reason. + Especially given that you have the straightforward option of + simply offloading RCU callbacks from all CPUs and pinning them + where you want them whenever you want them pinned. + +o Additional configuration is required to deal with other sources + of OS jitter, including interrupts and system-utility tasks + and processes. This configuration normally involves binding + interrupts and tasks to particular CPUs. + +o Some sources of OS jitter can currently be eliminated only by + constraining the workload. For example, the only way to eliminate + OS jitter due to global TLB shootdowns is to avoid the unmapping + operations (such as kernel module unload operations) that + result in these shootdowns. For another example, page faults + and TLB misses can be reduced (and in some cases eliminated) by + using huge pages and by constraining the amount of memory used + by the application. Pre-faulting the working set can also be + helpful, especially when combined with the mlock() and mlockall() + system calls. + +o Unless all CPUs are idle, at least one CPU must keep the + scheduling-clock interrupt going in order to support accurate + timekeeping. + +o If there are adaptive-ticks CPUs, there will be at least one + CPU keeping the scheduling-clock interrupt going, even if all + CPUs are otherwise idle. diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 119358dfb742..5f91eda91647 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1486,15 +1486,23 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +For the special case of virtio-ccw devices on s390, the ioevent is matched +to a subchannel/virtqueue tuple instead. + The following flags are defined: #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) If datamatch flag is set, the event will be signaled only if the written value to the registered address is equal to datamatch in struct kvm_ioeventfd. +For virtio-ccw devices, addr contains the subchannel id and datamatch the +virtqueue index. + 4.60 KVM_DIRTY_TLB @@ -1780,27 +1788,48 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_DTL | 128 PPC | KVM_REG_PPC_EPCR | 32 PPC | KVM_REG_PPC_EPR | 32 + PPC | KVM_REG_PPC_TCR | 32 + PPC | KVM_REG_PPC_TSR | 32 + PPC | KVM_REG_PPC_OR_TSR | 32 + PPC | KVM_REG_PPC_CLEAR_TSR | 32 + PPC | KVM_REG_PPC_MAS0 | 32 + PPC | KVM_REG_PPC_MAS1 | 32 + PPC | KVM_REG_PPC_MAS2 | 64 + PPC | KVM_REG_PPC_MAS7_3 | 64 + PPC | KVM_REG_PPC_MAS4 | 32 + PPC | KVM_REG_PPC_MAS6 | 32 + PPC | KVM_REG_PPC_MMUCFG | 32 + PPC | KVM_REG_PPC_TLB0CFG | 32 + PPC | KVM_REG_PPC_TLB1CFG | 32 + PPC | KVM_REG_PPC_TLB2CFG | 32 + PPC | KVM_REG_PPC_TLB3CFG | 32 + PPC | KVM_REG_PPC_TLB0PS | 32 + PPC | KVM_REG_PPC_TLB1PS | 32 + PPC | KVM_REG_PPC_TLB2PS | 32 + PPC | KVM_REG_PPC_TLB3PS | 32 + PPC | KVM_REG_PPC_EPTCFG | 32 + PPC | KVM_REG_PPC_ICP_STATE | 64 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: ARM core registers have the following id bit patterns: - 0x4002 0000 0010 <index into the kvm_regs struct:16> + 0x4020 0000 0010 <index into the kvm_regs struct:16> ARM 32-bit CP15 registers have the following id bit patterns: - 0x4002 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3> + 0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3> ARM 64-bit CP15 registers have the following id bit patterns: - 0x4003 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3> + 0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3> ARM CCSIDR registers are demultiplexed by CSSELR value: - 0x4002 0000 0011 00 <csselr:8> + 0x4020 0000 0011 00 <csselr:8> ARM 32-bit VFP control registers have the following id bit patterns: - 0x4002 0000 0012 1 <regno:12> + 0x4020 0000 0012 1 <regno:12> ARM 64-bit FP registers have the following id bit patterns: - 0x4002 0000 0012 0 <regno:12> + 0x4030 0000 0012 0 <regno:12> 4.69 KVM_GET_ONE_REG @@ -2161,6 +2190,76 @@ header; first `n_valid' valid entries with contents from the data written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.79 KVM_CREATE_DEVICE + +Capability: KVM_CAP_DEVICE_CTRL +Type: vm ioctl +Parameters: struct kvm_create_device (in/out) +Returns: 0 on success, -1 on error +Errors: + ENODEV: The device type is unknown or unsupported + EEXIST: Device already created, and this type of device may not + be instantiated multiple times + + Other error conditions may be defined by individual device types or + have their standard meanings. + +Creates an emulated device in the kernel. The file descriptor returned +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR. + +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the +device type is supported (not necessarily whether it can be created +in the current vm). + +Individual devices should not define flags. Attributes should be used +for specifying any behavior that is not implied by the device type +number. + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR + +Capability: KVM_CAP_DEVICE_CTRL +Type: device ioctl +Parameters: struct kvm_device_attr +Returns: 0 on success, -1 on error +Errors: + ENXIO: The group or attribute is unknown/unsupported for this device + EPERM: The attribute cannot (currently) be accessed this way + (e.g. read-only attribute, or attribute that only makes + sense when the device is in a different state) + + Other error conditions may be defined by individual device types. + +Gets/sets a specified piece of device configuration and/or state. The +semantics are device-specific. See individual device documentation in +the "devices" directory. As with ONE_REG, the size of the data +transferred is defined by the particular attribute. + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + +4.81 KVM_HAS_DEVICE_ATTR + +Capability: KVM_CAP_DEVICE_CTRL +Type: device ioctl +Parameters: struct kvm_device_attr +Returns: 0 on success, -1 on error +Errors: + ENXIO: The group or attribute is unknown/unsupported for this device + +Tests whether a device supports a particular attribute. A successful +return indicates the attribute is implemented. It does not necessarily +indicate that the attribute can be read or written in the device's +current state. "addr" is ignored. 4.77 KVM_ARM_VCPU_INIT @@ -2243,6 +2342,25 @@ and distributor interface, the ioctl must be called after calling KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the base addresses will return -EEXIST. +4.82 KVM_PPC_RTAS_DEFINE_TOKEN + +Capability: KVM_CAP_PPC_RTAS +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_rtas_token_args +Returns: 0 on success, -1 on error + +Defines a token value for a RTAS (Run Time Abstraction Services) +service in order to allow it to be handled in the kernel. The +argument struct gives the name of the service, which must be the name +of a service that has a kernel-side implementation. If the token +value is non-zero, it will be associated with that service, and +subsequent RTAS calls by the guest specifying that token will be +handled by the kernel. If the token value is 0, then any token +associated with the service will be forgotten, and subsequent RTAS +calls by the guest for that service will be passed to userspace to be +handled. + 5. The kvm_run structure ------------------------ @@ -2646,3 +2764,19 @@ to receive the topmost interrupt vector. When disabled (args[0] == 0), behavior is as if this facility is unsupported. When this capability is enabled, KVM_EXIT_EPR can occur. + +6.6 KVM_CAP_IRQ_MPIC + +Architectures: ppc +Parameters: args[0] is the MPIC device fd + args[1] is the MPIC CPU number for this vcpu + +This capability connects the vcpu to an in-kernel MPIC device. + +6.7 KVM_CAP_IRQ_XICS + +Architectures: ppc +Parameters: args[0] is the XICS device fd + args[1] is the XICS CPU number (server ID) for this vcpu + +This capability connects the vcpu to an in-kernel XICS device. diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README new file mode 100644 index 000000000000..34a69834124a --- /dev/null +++ b/Documentation/virtual/kvm/devices/README @@ -0,0 +1 @@ +This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL. diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt new file mode 100644 index 000000000000..8257397adc3c --- /dev/null +++ b/Documentation/virtual/kvm/devices/mpic.txt @@ -0,0 +1,53 @@ +MPIC interrupt controller +========================= + +Device types supported: + KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0 + KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2 + +Only one MPIC instance, of any type, may be instantiated. The created +MPIC will act as the system interrupt controller, connecting to each +vcpu's interrupt inputs. + +Groups: + KVM_DEV_MPIC_GRP_MISC + Attributes: + KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit) + Base address of the 256 KiB MPIC register space. Must be + naturally aligned. A value of zero disables the mapping. + Reset value is zero. + + KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit) + Access an MPIC register, as if the access were made from the guest. + "attr" is the byte offset into the MPIC register space. Accesses + must be 4-byte aligned. + + MSIs may be signaled by using this attribute group to write + to the relevant MSIIR. + + KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit) + IRQ input line for each standard openpic source. 0 is inactive and 1 + is active, regardless of interrupt sense. + + For edge-triggered interrupts: Writing 1 is considered an activating + edge, and writing 0 is ignored. Reading returns 1 if a previously + signaled edge has not been acknowledged, and 0 otherwise. + + "attr" is the IRQ number. IRQ numbers for standard sources are the + byte offset of the relevant IVPR from EIVPR0, divided by 32. + +IRQ Routing: + + The MPIC emulation supports IRQ routing. Only a single MPIC device can + be instantiated. Once that device has been created, it's available as + irqchip id 0. + + This irqchip 0 has 256 interrupt pins, which expose the interrupts in + the main array of interrupt sources (a.k.a. "SRC" interrupts). + + The numbering is the same as the MPIC device tree binding -- based on + the register offset from the beginning of the sources array, without + regard to any subdivisions in chip documentation such as "internal" + or "external" interrupts. + + Access to non-SRC interrupts is not implemented through IRQ routing mechanisms. diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt new file mode 100644 index 000000000000..42864935ac5d --- /dev/null +++ b/Documentation/virtual/kvm/devices/xics.txt @@ -0,0 +1,66 @@ +XICS interrupt controller + +Device type supported: KVM_DEV_TYPE_XICS + +Groups: + KVM_DEV_XICS_SOURCES + Attributes: One per interrupt source, indexed by the source number. + +This device emulates the XICS (eXternal Interrupt Controller +Specification) defined in PAPR. The XICS has a set of interrupt +sources, each identified by a 20-bit source number, and a set of +Interrupt Control Presentation (ICP) entities, also called "servers", +each associated with a virtual CPU. + +The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH +capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and +the interrupt server number (i.e. the vcpu number from the XICS's +point of view) in args[1] of the kvm_enable_cap struct. Each ICP has +64 bits of state which can be read and written using the +KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit +state word has the following bitfields, starting at the +least-significant end of the word: + +* Unused, 16 bits + +* Pending interrupt priority, 8 bits + Zero is the highest priority, 255 means no interrupt is pending. + +* Pending IPI (inter-processor interrupt) priority, 8 bits + Zero is the highest priority, 255 means no IPI is pending. + +* Pending interrupt source number, 24 bits + Zero means no interrupt pending, 2 means an IPI is pending + +* Current processor priority, 8 bits + Zero is the highest priority, meaning no interrupts can be + delivered, and 255 is the lowest priority. + +Each source has 64 bits of state that can be read and written using +the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the +KVM_DEV_XICS_SOURCES attribute group, with the attribute number being +the interrupt source number. The 64 bit state word has the following +bitfields, starting from the least-significant end of the word: + +* Destination (server number), 32 bits + This specifies where the interrupt should be sent, and is the + interrupt server number specified for the destination vcpu. + +* Priority, 8 bits + This is the priority specified for this interrupt source, where 0 is + the highest priority and 255 is the lowest. An interrupt with a + priority of 255 will never be delivered. + +* Level sensitive flag, 1 bit + This bit is 1 for a level-sensitive interrupt source, or 0 for + edge-sensitive (or MSI). + +* Masked flag, 1 bit + This bit is set to 1 if the interrupt is masked (cannot be delivered + regardless of its priority), for example by the ibm,int-off RTAS + call, or 0 if it is not masked. + +* Pending flag, 1 bit + This bit is 1 if the source has a pending interrupt, otherwise 0. + +Only one XICS instance may be created per VM. @@ -1399,7 +1399,7 @@ quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)) # Run depmod only if we have System.map and depmod is executable quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \ - $(KERNELRELEASE) "$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX))" + $(KERNELRELEASE) "$(patsubst y,_,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))" # Create temporary dir for module support files # clean it up only when building all modules diff --git a/arch/Kconfig b/arch/Kconfig index 99f0e17df429..dd0e8eb8042f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -381,6 +381,12 @@ config MODULES_USE_ELF_REL Modules only use ELF REL relocations. Modules with ELF RELA relocations will give an error. +config HAVE_UNDERSCORE_SYMBOL_PREFIX + bool + help + Some architectures generate an _ in front of C symbols; things like + module loading and assembly files need to know about this. + # # ABI hall of shame # diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h index 1a66f907e5cc..bf863edb517d 100644 --- a/arch/arm/include/asm/idmap.h +++ b/arch/arm/include/asm/idmap.h @@ -8,7 +8,6 @@ #define __idmap __section(.idmap.text) noinline notrace extern pgd_t *idmap_pgd; -extern pgd_t *hyp_pgd; void setup_mm_for_reboot(void); diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c4e643d939e..57cb786a6203 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -87,7 +87,7 @@ struct kvm_vcpu_fault_info { u32 hyp_pc; /* PC when exception was taken from Hyp mode */ }; -typedef struct vfp_hard_struct kvm_kernel_vfp_t; +typedef struct vfp_hard_struct kvm_cpu_context_t; struct kvm_vcpu_arch { struct kvm_regs regs; @@ -105,8 +105,10 @@ struct kvm_vcpu_arch { struct kvm_vcpu_fault_info fault; /* Floating point registers (VFP and Advanced SIMD/NEON) */ - kvm_kernel_vfp_t vfp_guest; - kvm_kernel_vfp_t *vfp_host; + struct vfp_hard_struct vfp_guest; + + /* Host FP context */ + kvm_cpu_context_t *host_cpu_context; /* VGIC state */ struct vgic_cpu vgic_cpu; @@ -188,23 +190,38 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, +static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, + unsigned long long pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { - unsigned long pgd_low, pgd_high; - - pgd_low = (pgd_ptr & ((1ULL << 32) - 1)); - pgd_high = (pgd_ptr >> 32ULL); - /* - * Call initialization code, and switch to the full blown - * HYP code. The init code doesn't need to preserve these registers as - * r1-r3 and r12 are already callee save according to the AAPCS. - * Note that we slightly misuse the prototype by casing the pgd_low to - * a void *. + * Call initialization code, and switch to the full blown HYP + * code. The init code doesn't need to preserve these + * registers as r0-r3 are already callee saved according to + * the AAPCS. + * Note that we slightly misuse the prototype by casing the + * stack pointer to a void *. + * + * We don't have enough registers to perform the full init in + * one go. Install the boot PGD first, and then install the + * runtime PGD, stack pointer and vectors. The PGDs are always + * passed as the third argument, in order to be passed into + * r2-r3 to the init code (yes, this is compliant with the + * PCS!). */ - kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); + + kvm_call_hyp(NULL, 0, boot_pgd_ptr); + + kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } +static inline int kvm_arch_dev_ioctl_check_extension(long ext) +{ + return 0; +} + +int kvm_perf_init(void); +int kvm_perf_teardown(void); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 970f3b5fa109..472ac7091003 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -19,21 +19,33 @@ #ifndef __ARM_KVM_MMU_H__ #define __ARM_KVM_MMU_H__ -#include <asm/cacheflush.h> -#include <asm/pgalloc.h> -#include <asm/idmap.h> +#include <asm/memory.h> +#include <asm/page.h> /* * We directly use the kernel VA for the HYP, as we can directly share * the mapping (HTTBR "covers" TTBR1). */ -#define HYP_PAGE_OFFSET_MASK (~0UL) +#define HYP_PAGE_OFFSET_MASK UL(~0) #define HYP_PAGE_OFFSET PAGE_OFFSET #define KERN_TO_HYP(kva) (kva) +/* + * Our virtual mapping for the boot-time MMU-enable code. Must be + * shared across all the page-tables. Conveniently, we use the vectors + * page, where no kernel data will ever be shared with HYP. + */ +#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) + +#ifndef __ASSEMBLY__ + +#include <asm/cacheflush.h> +#include <asm/pgalloc.h> + int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); -void free_hyp_pmds(void); +void free_boot_hyp_pgd(void); +void free_hyp_pgds(void); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); @@ -45,6 +57,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); +phys_addr_t kvm_mmu_get_boot_httbr(void); +phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); @@ -114,4 +128,8 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) } } +#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) + +#endif /* !__ASSEMBLY__ */ + #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index a53efa993690..ee68cce6b48e 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -158,7 +158,7 @@ int main(void) DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); - DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host)); + DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 087fc321e9e5..b1b89882b113 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -99,7 +99,7 @@ static const struct file_operations proc_status_fops = { .open = proc_status_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b571484e9f03..a871b8e00fca 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -20,7 +20,7 @@ VMLINUX_SYMBOL(__idmap_text_start) = .; \ *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; \ - ALIGN_FUNCTION(); \ + . = ALIGN(32); \ VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ *(.hyp.idmap.text) \ VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; @@ -315,3 +315,8 @@ SECTIONS */ ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") +/* + * The HYP init code can't be more than a page long. + * The above comment applies as well. + */ +ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big") diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 49dd64e579c2..370e1a8af6ac 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -41,9 +41,9 @@ config KVM_ARM_HOST Provides host support for ARM processors. config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" - depends on KVM_ARM_HOST - default 4 + int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST + default 4 if KVM_ARM_HOST + default 0 help Static number of max supported virtual CPUs per VM. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 8dc5e76cb789..53c5ed83d16f 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o +obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c index 6ac938d46297..c55b6089e923 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/arch/arm/kvm/arch_timer.c @@ -22,6 +22,7 @@ #include <linux/kvm_host.h> #include <linux/interrupt.h> +#include <clocksource/arm_arch_timer.h> #include <asm/arch_timer.h> #include <asm/kvm_vgic.h> @@ -64,7 +65,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */ + timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, vcpu->arch.timer_cpu.irq->irq, vcpu->arch.timer_cpu.irq->level); @@ -133,8 +134,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) cycle_t cval, now; u64 ns; - /* Check if the timer is enabled and unmasked first */ - if ((timer->cntv_ctl & 3) != 1) + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) return; cval = timer->cntv_cval; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a0dfc2a53f91..37d216d814cd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -16,6 +16,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include <linux/cpu.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> @@ -48,7 +49,7 @@ __asm__(".arch_extension virt"); #endif static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state; +static kvm_cpu_context_t __percpu *kvm_host_cpu_state; static unsigned long hyp_default_vectors; /* Per-CPU variable containing the currently running vcpu. */ @@ -206,7 +207,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; default: - r = 0; + r = kvm_arch_dev_ioctl_check_extension(ext); break; } return r; @@ -218,27 +219,18 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - int user_alloc) -{ - return 0; -} - int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { } @@ -326,7 +318,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; - vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state); + vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); /* * Check whether this vcpu requires the cache to be flushed on @@ -639,7 +631,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) return 0; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status) { u32 irq = irq_level->irq; unsigned int irq_type, vcpu_idx, irq_num; @@ -794,30 +787,48 @@ long kvm_arch_vm_ioctl(struct file *filp, } } -static void cpu_init_hyp_mode(void *vector) +static void cpu_init_hyp_mode(void *dummy) { + unsigned long long boot_pgd_ptr; unsigned long long pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; /* Switch from the HYP stub to our own HYP init vector */ - __hyp_set_vectors((unsigned long)vector); + __hyp_set_vectors(kvm_get_idmap_vector()); + boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; - __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); + __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); +} + +static int hyp_init_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + cpu_init_hyp_mode(NULL); + break; + } + + return NOTIFY_OK; } +static struct notifier_block hyp_init_cpu_nb = { + .notifier_call = hyp_init_cpu_notify, +}; + /** * Inits Hyp-mode on all online CPUs */ static int init_hyp_mode(void) { - phys_addr_t init_phys_addr; int cpu; int err = 0; @@ -850,24 +861,6 @@ static int init_hyp_mode(void) } /* - * Execute the init code on each CPU. - * - * Note: The stack is not mapped yet, so don't do anything else than - * initializing the hypervisor mode on each CPU using a local stack - * space for temporary storage. - */ - init_phys_addr = virt_to_phys(__kvm_hyp_init); - for_each_online_cpu(cpu) { - smp_call_function_single(cpu, cpu_init_hyp_mode, - (void *)(long)init_phys_addr, 1); - } - - /* - * Unmap the identity mapping - */ - kvm_clear_hyp_idmap(); - - /* * Map the Hyp-code called directly from the host */ err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); @@ -890,33 +883,38 @@ static int init_hyp_mode(void) } /* - * Map the host VFP structures + * Map the host CPU structures */ - kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t); - if (!kvm_host_vfp_state) { + kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); + if (!kvm_host_cpu_state) { err = -ENOMEM; - kvm_err("Cannot allocate host VFP state\n"); + kvm_err("Cannot allocate host CPU state\n"); goto out_free_mappings; } for_each_possible_cpu(cpu) { - kvm_kernel_vfp_t *vfp; + kvm_cpu_context_t *cpu_ctxt; - vfp = per_cpu_ptr(kvm_host_vfp_state, cpu); - err = create_hyp_mappings(vfp, vfp + 1); + cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); + err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1); if (err) { - kvm_err("Cannot map host VFP state: %d\n", err); - goto out_free_vfp; + kvm_err("Cannot map host CPU state: %d\n", err); + goto out_free_context; } } /* + * Execute the init code on each CPU. + */ + on_each_cpu(cpu_init_hyp_mode, NULL, 1); + + /* * Init HYP view of VGIC */ err = kvm_vgic_hyp_init(); if (err) - goto out_free_vfp; + goto out_free_context; #ifdef CONFIG_KVM_ARM_VGIC vgic_present = true; @@ -929,12 +927,19 @@ static int init_hyp_mode(void) if (err) goto out_free_mappings; +#ifndef CONFIG_HOTPLUG_CPU + free_boot_hyp_pgd(); +#endif + + kvm_perf_init(); + kvm_info("Hyp mode initialized successfully\n"); + return 0; -out_free_vfp: - free_percpu(kvm_host_vfp_state); +out_free_context: + free_percpu(kvm_host_cpu_state); out_free_mappings: - free_hyp_pmds(); + free_hyp_pgds(); out_free_stack_pages: for_each_possible_cpu(cpu) free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); @@ -943,27 +948,42 @@ out_err: return err; } +static void check_kvm_target_cpu(void *ret) +{ + *(int *)ret = kvm_target_cpu(); +} + /** * Initialize Hyp-mode and memory mappings on all CPUs. */ int kvm_arch_init(void *opaque) { int err; + int ret, cpu; if (!is_hyp_mode_available()) { kvm_err("HYP mode not available\n"); return -ENODEV; } - if (kvm_target_cpu() < 0) { - kvm_err("Target CPU not supported!\n"); - return -ENODEV; + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); + if (ret < 0) { + kvm_err("Error, CPU %d not supported!\n", cpu); + return -ENODEV; + } } err = init_hyp_mode(); if (err) goto out_err; + err = register_cpu_notifier(&hyp_init_cpu_nb); + if (err) { + kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); + goto out_err; + } + kvm_coproc_table_init(); return 0; out_err: @@ -973,6 +993,7 @@ out_err: /* NOP: Compiling as a module not supported */ void kvm_arch_exit(void) { + kvm_perf_teardown(); } static int arm_init(void) diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 9f37a79b880b..f048338135f7 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -21,13 +21,33 @@ #include <asm/asm-offsets.h> #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> /******************************************************************** * Hypervisor initialization * - should be called with: - * r0,r1 = Hypervisor pgd pointer - * r2 = top of Hyp stack (kernel VA) - * r3 = pointer to hyp vectors + * r0 = top of Hyp stack (kernel VA) + * r1 = pointer to hyp vectors + * r2,r3 = Hypervisor pgd pointer + * + * The init scenario is: + * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, + * runtime stack, runtime vectors + * - Enable the MMU with the boot pgd + * - Jump to a target into the trampoline page (remember, this is the same + * physical page!) + * - Now switch to the runtime pgd (same VA, and still the same physical + * page!) + * - Invalidate TLBs + * - Set stack and vectors + * - Profit! (or eret, if you only care about the code). + * + * As we only have four registers available to pass parameters (and we + * need six), we split the init in two phases: + * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD. + * Provides the basic HYP init, and enable the MMU. + * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD. + * Switches to the runtime PGD, set stack and vectors. */ .text @@ -47,22 +67,25 @@ __kvm_hyp_init: W(b) . __do_hyp_init: + cmp r0, #0 @ We have a SP? + bne phase2 @ Yes, second stage init + @ Set the HTTBR to point to the hypervisor PGD pointer passed - mcrr p15, 4, r0, r1, c2 + mcrr p15, 4, r2, r3, c2 @ Set the HTCR and VTCR to the same shareability and cacheability @ settings as the non-secure TTBCR and with T0SZ == 0. mrc p15, 4, r0, c2, c0, 2 @ HTCR - ldr r12, =HTCR_MASK - bic r0, r0, r12 + ldr r2, =HTCR_MASK + bic r0, r0, r2 mrc p15, 0, r1, c2, c0, 2 @ TTBCR and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) orr r0, r0, r1 mcr p15, 4, r0, c2, c0, 2 @ HTCR mrc p15, 4, r1, c2, c1, 2 @ VTCR - ldr r12, =VTCR_MASK - bic r1, r1, r12 + ldr r2, =VTCR_MASK + bic r1, r1, r2 bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits orr r1, r0, r1 orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) @@ -85,24 +108,41 @@ __do_hyp_init: @ - Memory alignment checks: enabled @ - MMU: enabled (this code must be run from an identity mapping) mrc p15, 4, r0, c1, c0, 0 @ HSCR - ldr r12, =HSCTLR_MASK - bic r0, r0, r12 + ldr r2, =HSCTLR_MASK + bic r0, r0, r2 mrc p15, 0, r1, c1, c0, 0 @ SCTLR - ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) - and r1, r1, r12 - ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) ) - THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) - orr r1, r1, r12 + ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) + and r1, r1, r2 + ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) ) + THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) + orr r1, r1, r2 orr r0, r0, r1 isb mcr p15, 4, r0, c1, c0, 0 @ HSCR - isb - @ Set stack pointer and return to the kernel - mov sp, r2 + @ End of init phase-1 + eret + +phase2: + @ Set stack pointer + mov sp, r0 @ Set HVBAR to point to the HYP vectors - mcr p15, 4, r3, c12, c0, 0 @ HVBAR + mcr p15, 4, r1, c12, c0, 0 @ HVBAR + + @ Jump to the trampoline page + ldr r0, =TRAMPOLINE_VA + adr r1, target + bfi r0, r1, #0, #PAGE_SHIFT + mov pc, r0 + +target: @ We're now in the trampoline code, switch page tables + mcrr p15, 4, r2, r3, c2 + isb + + @ Invalidate the old TLBs + mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH + dsb eret diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2f12e4056408..965706578f13 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -32,8 +32,15 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +static pgd_t *boot_hyp_pgd; +static pgd_t *hyp_pgd; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); +static void *init_bounce_page; +static unsigned long hyp_idmap_start; +static unsigned long hyp_idmap_end; +static phys_addr_t hyp_idmap_vector; + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); @@ -71,172 +78,224 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static void free_ptes(pmd_t *pmd, unsigned long addr) +static void clear_pud_entry(pud_t *pud) { - pte_t *pte; - unsigned int i; + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} - for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { - if (!pmd_none(*pmd) && pmd_table(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - pte_free_kernel(NULL, pte); - } - pmd++; +static void clear_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static bool pmd_empty(pmd_t *pmd) +{ + struct page *pmd_page = virt_to_page(pmd); + return page_count(pmd_page) == 1; +} + +static void clear_pte_entry(pte_t *pte) +{ + if (pte_present(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); } } -static void free_hyp_pgd_entry(unsigned long addr) +static bool pte_empty(pte_t *pte) +{ + struct page *pte_page = virt_to_page(pte); + return page_count(pte_page) == 1; +} + +static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - unsigned long hyp_addr = KERN_TO_HYP(addr); + pte_t *pte; + unsigned long long addr = start, end = start + size; + u64 range; + + while (addr < end) { + pgd = pgdp + pgd_index(addr); + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + addr += PUD_SIZE; + continue; + } - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + addr += PMD_SIZE; + continue; + } - if (pud_none(*pud)) - return; - BUG_ON(pud_bad(*pud)); + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(pte); + range = PAGE_SIZE; - pmd = pmd_offset(pud, hyp_addr); - free_ptes(pmd, addr); - pmd_free(NULL, pmd); - pud_clear(pud); + /* If we emptied the pte, walk back up the ladder */ + if (pte_empty(pte)) { + clear_pmd_entry(pmd); + range = PMD_SIZE; + if (pmd_empty(pmd)) { + clear_pud_entry(pud); + range = PUD_SIZE; + } + } + + addr += range; + } } /** - * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables + * free_boot_hyp_pgd - free HYP boot page tables * - * Assumes this is a page table used strictly in Hyp-mode and therefore contains - * either mappings in the kernel memory area (above PAGE_OFFSET), or - * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). + * Free the HYP boot page tables. The bounce page is also freed. */ -void free_hyp_pmds(void) +void free_boot_hyp_pgd(void) { - unsigned long addr; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - free_hyp_pgd_entry(addr); - for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - free_hyp_pgd_entry(addr); + + if (boot_hyp_pgd) { + unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + kfree(boot_hyp_pgd); + boot_hyp_pgd = NULL; + } + + if (hyp_pgd) + unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + + kfree(init_bounce_page); + init_bounce_page = NULL; + mutex_unlock(&kvm_hyp_pgd_mutex); } -static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end) +/** + * free_hyp_pgds - free Hyp-mode page tables + * + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and + * therefore contains either mappings in the kernel memory area (above + * PAGE_OFFSET), or device mappings in the vmalloc range (from + * VMALLOC_START to VMALLOC_END). + * + * boot_hyp_pgd should only map two pages for the init code. + */ +void free_hyp_pgds(void) { - pte_t *pte; unsigned long addr; - struct page *page; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); + free_boot_hyp_pgd(); + + mutex_lock(&kvm_hyp_pgd_mutex); - pte = pte_offset_kernel(pmd, hyp_addr); - BUG_ON(!virt_addr_valid(addr)); - page = virt_to_page(addr); - kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); + if (hyp_pgd) { + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + kfree(hyp_pgd); + hyp_pgd = NULL; } + + mutex_unlock(&kvm_hyp_pgd_mutex); } -static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end, - unsigned long *pfn_base) +static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) { pte_t *pte; unsigned long addr; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - - pte = pte_offset_kernel(pmd, hyp_addr); - BUG_ON(pfn_valid(*pfn_base)); - kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); - (*pfn_base)++; - } + addr = start; + do { + pte = pte_offset_kernel(pmd, addr); + kvm_set_pte(pte, pfn_pte(pfn, prot)); + get_page(virt_to_page(pte)); + kvm_flush_dcache_to_poc(pte, sizeof(*pte)); + pfn++; + } while (addr += PAGE_SIZE, addr != end); } static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, - unsigned long end, unsigned long *pfn_base) + unsigned long end, unsigned long pfn, + pgprot_t prot) { pmd_t *pmd; pte_t *pte; unsigned long addr, next; - for (addr = start; addr < end; addr = next) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pmd = pmd_offset(pud, hyp_addr); + addr = start; + do { + pmd = pmd_offset(pud, addr); BUG_ON(pmd_sect(*pmd)); if (pmd_none(*pmd)) { - pte = pte_alloc_one_kernel(NULL, hyp_addr); + pte = pte_alloc_one_kernel(NULL, addr); if (!pte) { kvm_err("Cannot allocate Hyp pte\n"); return -ENOMEM; } pmd_populate_kernel(NULL, pmd, pte); + get_page(virt_to_page(pmd)); + kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); } next = pmd_addr_end(addr, end); - /* - * If pfn_base is NULL, we map kernel pages into HYP with the - * virtual address. Otherwise, this is considered an I/O - * mapping and we map the physical region starting at - * *pfn_base to [start, end[. - */ - if (!pfn_base) - create_hyp_pte_mappings(pmd, addr, next); - else - create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); - } + create_hyp_pte_mappings(pmd, addr, next, pfn, prot); + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); return 0; } -static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) +static int __create_hyp_mappings(pgd_t *pgdp, + unsigned long start, unsigned long end, + unsigned long pfn, pgprot_t prot) { - unsigned long start = (unsigned long)from; - unsigned long end = (unsigned long)to; pgd_t *pgd; pud_t *pud; pmd_t *pmd; unsigned long addr, next; int err = 0; - if (start >= end) - return -EINVAL; - /* Check for a valid kernel memory mapping */ - if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) - return -EINVAL; - /* Check for a valid kernel IO mapping */ - if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) - return -EINVAL; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = start; addr < end; addr = next) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); + addr = start & PAGE_MASK; + end = PAGE_ALIGN(end); + do { + pgd = pgdp + pgd_index(addr); + pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { - pmd = pmd_alloc_one(NULL, hyp_addr); + pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); err = -ENOMEM; goto out; } pud_populate(NULL, pud, pmd); + get_page(virt_to_page(pud)); + kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pgd_addr_end(addr, end); - err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); + err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (err) goto out; - } + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; @@ -250,27 +309,41 @@ out: * The same virtual address as the kernel virtual address is also used * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying * physical pages. - * - * Note: Wrapping around zero in the "to" address is not supported. */ int create_hyp_mappings(void *from, void *to) { - return __create_hyp_mappings(from, to, NULL); + unsigned long phys_addr = virt_to_phys(from); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel memory mapping */ + if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP); } /** * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode * @from: The kernel start VA of the range * @to: The kernel end VA of the range (exclusive) - * @addr: The physical start address which gets mapped + * @phys_addr: The physical start address which gets mapped * * The resulting HYP VA is the same as the kernel VA, modulo * HYP_PAGE_OFFSET. */ -int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) +int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) { - unsigned long pfn = __phys_to_pfn(addr); - return __create_hyp_mappings(from, to, &pfn); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel IO mapping */ + if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } /** @@ -307,42 +380,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return 0; } -static void clear_pud_entry(pud_t *pud) -{ - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - pmd_free(NULL, pmd_table); - put_page(virt_to_page(pud)); -} - -static void clear_pmd_entry(pmd_t *pmd) -{ - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - pte_free_kernel(NULL, pte_table); - put_page(virt_to_page(pmd)); -} - -static bool pmd_empty(pmd_t *pmd) -{ - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; -} - -static void clear_pte_entry(pte_t *pte) -{ - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - } -} - -static bool pte_empty(pte_t *pte) -{ - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; -} - /** * unmap_stage2_range -- Clear stage2 page table entries to unmap a range * @kvm: The VM pointer @@ -356,43 +393,7 @@ static bool pte_empty(pte_t *pte) */ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - phys_addr_t addr = start, end = start + size; - u64 range; - - while (addr < end) { - pgd = kvm->arch.pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) { - addr += PUD_SIZE; - continue; - } - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr += PMD_SIZE; - continue; - } - - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(pte); - range = PAGE_SIZE; - - /* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { - clear_pmd_entry(pmd); - range = PMD_SIZE; - if (pmd_empty(pmd)) { - clear_pud_entry(pud); - range = PUD_SIZE; - } - } - - addr += range; - } + unmap_range(kvm->arch.pgd, start, size); } /** @@ -728,47 +729,105 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) phys_addr_t kvm_mmu_get_httbr(void) { - VM_BUG_ON(!virt_addr_valid(hyp_pgd)); return virt_to_phys(hyp_pgd); } +phys_addr_t kvm_mmu_get_boot_httbr(void) +{ + return virt_to_phys(boot_hyp_pgd); +} + +phys_addr_t kvm_get_idmap_vector(void) +{ + return hyp_idmap_vector; +} + int kvm_mmu_init(void) { - if (!hyp_pgd) { + int err; + + hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + + if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { + /* + * Our init code is crossing a page boundary. Allocate + * a bounce page, copy the code over and use that. + */ + size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; + phys_addr_t phys_base; + + init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!init_bounce_page) { + kvm_err("Couldn't allocate HYP init bounce page\n"); + err = -ENOMEM; + goto out; + } + + memcpy(init_bounce_page, __hyp_idmap_text_start, len); + /* + * Warning: the code we just copied to the bounce page + * must be flushed to the point of coherency. + * Otherwise, the data may be sitting in L2, and HYP + * mode won't be able to observe it as it runs with + * caches off at that point. + */ + kvm_flush_dcache_to_poc(init_bounce_page, len); + + phys_base = virt_to_phys(init_bounce_page); + hyp_idmap_vector += phys_base - hyp_idmap_start; + hyp_idmap_start = phys_base; + hyp_idmap_end = phys_base + len; + + kvm_info("Using HYP init bounce page @%lx\n", + (unsigned long)phys_base); + } + + hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); - return -ENOMEM; + err = -ENOMEM; + goto out; } - return 0; -} + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(boot_hyp_pgd, + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); -/** - * kvm_clear_idmap - remove all idmaps from the hyp pgd - * - * Free the underlying pmds for all pgds in range and clear the pgds (but - * don't free them) afterwards. - */ -void kvm_clear_hyp_idmap(void) -{ - unsigned long addr, end; - unsigned long next; - pgd_t *pgd = hyp_pgd; - pud_t *pud; - pmd_t *pmd; + if (err) { + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + goto out; + } - addr = virt_to_phys(__hyp_idmap_text_start); - end = virt_to_phys(__hyp_idmap_text_end); + /* Map the very same page at the trampoline VA */ + err = __create_hyp_mappings(boot_hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } - pgd += pgd_index(addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); + /* Map the same page again into the runtime page tables */ + err = __create_hyp_mappings(hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } - pud_clear(pud); - kvm_clean_pmd_entry(pmd); - pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); - } while (pgd++, addr = next, addr < end); + return 0; +out: + free_hyp_pgds(); + return err; } diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c new file mode 100644 index 000000000000..1a3849da0b4b --- /dev/null +++ b/arch/arm/kvm/perf.c @@ -0,0 +1,68 @@ +/* + * Based on the x86 implementation. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/perf_event.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +static int kvm_is_in_guest(void) +{ + return kvm_arm_get_running_vcpu() != NULL; +} + +static int kvm_is_user_mode(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return !vcpu_mode_priv(vcpu); + + return 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return *vcpu_pc(vcpu); + + return 0; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +int kvm_perf_init(void) +{ + return perf_register_guest_info_callbacks(&kvm_guest_cbs); +} + +int kvm_perf_teardown(void) +{ + return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); +} diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index d58ad4ff8d34..2ebc97e16b91 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -466,8 +466,6 @@ config MACH_MX31ADS_WM1133_EV1 depends on MACH_MX31ADS depends on MFD_WM8350_I2C depends on REGULATOR_WM8350 = y - select MFD_WM8350_CONFIG_MODE_0 - select MFD_WM8352_CONFIG_MODE_0 help Include support for the Wolfson Microelectronics 1133-EV1 PMU and audio module for the MX31ADS platform. diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index dd712f109738..358b82cb9f78 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -557,7 +557,7 @@ static const struct file_operations omap_pm_debug_fops = { .open = omap_pm_debug_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static void omap_pm_init_debugfs(void) diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig index 283cb77d4721..20578536aec7 100644 --- a/arch/arm/mach-s3c64xx/Kconfig +++ b/arch/arm/mach-s3c64xx/Kconfig @@ -200,10 +200,7 @@ endchoice config SMDK6410_WM1190_EV1 bool "Support Wolfson Microelectronics 1190-EV1 PMIC card" depends on MACH_SMDK6410 - select MFD_WM8350_CONFIG_MODE_0 - select MFD_WM8350_CONFIG_MODE_3 select MFD_WM8350_I2C - select MFD_WM8352_CONFIG_MODE_0 select REGULATOR select REGULATOR_WM8350 select SAMSUNG_GPIO_EXTRA64 diff --git a/arch/arm/mach-s3c64xx/mach-crag6410-module.c b/arch/arm/mach-s3c64xx/mach-crag6410-module.c index a946b759fabd..7ccfef227c77 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410-module.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410-module.c @@ -208,7 +208,7 @@ static const struct i2c_board_info wm1277_devs[] = { static struct arizona_pdata wm5102_reva_pdata = { .ldoena = S3C64XX_GPN(7), .gpio_base = CODEC_GPIO_BASE, - .irq_active_high = true, + .irq_flags = IRQF_TRIGGER_HIGH, .micd_pol_gpio = CODEC_GPIO_BASE + 4, .micd_rate = 6, .gpio_defaults = { @@ -238,7 +238,7 @@ static struct spi_board_info wm5102_reva_spi_devs[] = { static struct arizona_pdata wm5102_pdata = { .ldoena = S3C64XX_GPN(7), .gpio_base = CODEC_GPIO_BASE, - .irq_active_high = true, + .irq_flags = IRQF_TRIGGER_HIGH, .micd_pol_gpio = CODEC_GPIO_BASE + 2, .gpio_defaults = { [2] = 0x10000, /* AIF3TXLRCLK */ diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 5ee505c937d1..83cb3ac27095 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -8,7 +8,6 @@ #include <asm/pgtable.h> #include <asm/sections.h> #include <asm/system_info.h> -#include <asm/virt.h> pgd_t *idmap_pgd; @@ -83,37 +82,10 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, } while (pgd++, addr = next, addr != end); } -#if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE) -pgd_t *hyp_pgd; - -extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; - -static int __init init_static_idmap_hyp(void) -{ - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - if (!hyp_pgd) - return -ENOMEM; - - pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n", - __hyp_idmap_text_start, __hyp_idmap_text_end); - identity_mapping_add(hyp_pgd, __hyp_idmap_text_start, - __hyp_idmap_text_end, PMD_SECT_AP1); - - return 0; -} -#else -static int __init init_static_idmap_hyp(void) -{ - return 0; -} -#endif - extern char __idmap_text_start[], __idmap_text_end[]; static int __init init_static_idmap(void) { - int ret; - idmap_pgd = pgd_alloc(&init_mm); if (!idmap_pgd) return -ENOMEM; @@ -123,12 +95,10 @@ static int __init init_static_idmap(void) identity_mapping_add(idmap_pgd, __idmap_text_start, __idmap_text_end, 0); - ret = init_static_idmap_hyp(); - /* Flush L1 for the hardware to see this page table content */ flush_cache_louis(); - return ret; + return 0; } early_initcall(init_static_idmap); diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index c3f2e0bc644a..453ebe46b065 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -1,7 +1,3 @@ -config SYMBOL_PREFIX - string - default "_" - config MMU def_bool n @@ -33,6 +29,7 @@ config BLACKFIN select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_UID16 + select HAVE_UNDERSCORE_SYMBOL_PREFIX select VIRT_TO_BUS select ARCH_WANT_IPC_PARSE_VERSION select HAVE_GENERIC_HARDIRQS diff --git a/arch/cris/arch-v10/kernel/fasttimer.c b/arch/cris/arch-v10/kernel/fasttimer.c index ce6f512968a4..48a59afbeeb1 100644 --- a/arch/cris/arch-v10/kernel/fasttimer.c +++ b/arch/cris/arch-v10/kernel/fasttimer.c @@ -644,7 +644,7 @@ static const struct file_operations proc_fasttimer_fops = { .open = proc_fasttimer_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif /* PROC_FS */ diff --git a/arch/cris/arch-v32/kernel/fasttimer.c b/arch/cris/arch-v32/kernel/fasttimer.c index e43dd70acd96..f6644535b17e 100644 --- a/arch/cris/arch-v32/kernel/fasttimer.c +++ b/arch/cris/arch-v32/kernel/fasttimer.c @@ -616,7 +616,7 @@ static const struct file_operations proc_fasttimer_fops = { .open = proc_fasttimer_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif /* PROC_FS */ diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 79250de1b12a..303e4f9a79d1 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -12,10 +12,7 @@ config H8300 select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION - -config SYMBOL_PREFIX - string - default "_" + select HAVE_UNDERSCORE_SYMBOL_PREFIX config MMU bool diff --git a/arch/h8300/kernel/gpio.c b/arch/h8300/kernel/gpio.c index b02c752cd326..084bfd0c107e 100644 --- a/arch/h8300/kernel/gpio.c +++ b/arch/h8300/kernel/gpio.c @@ -161,7 +161,7 @@ static const struct file_operations gpio_proc_fops = { .open = gpio_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static __init int register_proc(void) diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index cfa74983c675..989dd3fe8de1 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -26,6 +26,7 @@ #define KVM_USER_MEM_SLOTS 32 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS /* define exit reasons from vmm to kvm*/ #define EXIT_REASON_VM_PANIC 0 diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h index ec6c6b301238..99503c284400 100644 --- a/arch/ia64/include/uapi/asm/kvm.h +++ b/arch/ia64/include/uapi/asm/kvm.h @@ -27,7 +27,6 @@ /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_IOAPIC #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_DEVICE_ASSIGNMENT /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index b17129e3b7c8..2b3c2d79256f 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -929,7 +929,7 @@ static const struct file_operations proc_palinfo_fops = { .open = proc_palinfo_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static void __cpuinit diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 5035245cb258..4bc580af67b3 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -694,7 +694,7 @@ static const struct file_operations proc_salinfo_fops = { .open = proc_salinfo_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; module_init(salinfo_init); diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 2cd225f8c68d..990b86420cc6 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -21,12 +21,11 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on BROKEN depends on HAVE_KVM && MODULES - # for device assignment: - depends on PCI depends on BROKEN select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING select KVM_APIC_ARCHITECTURE select KVM_MMIO ---help--- @@ -50,6 +49,17 @@ config KVM_INTEL Provides support for KVM on Itanium 2 processors equipped with the VT extensions. +config KVM_DEVICE_ASSIGNMENT + bool "KVM legacy PCI device assignment support" + depends on KVM && PCI && IOMMU_API + default y + ---help--- + Provide support for legacy PCI device assignment through KVM. The + kernel now also supports a full featured userspace device driver + framework through VFIO, which supersedes much of this support. + + If unsure, say Y. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index db3d7c5d1071..1a4053789d01 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -49,10 +49,10 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ - coalesced_mmio.o irq_comm.o assigned-dev.o) + coalesced_mmio.o irq_comm.o) -ifeq ($(CONFIG_IOMMU_API),y) -common-objs += $(addprefix ../../../virt/kvm/, iommu.o) +ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) +common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index ad3126a58644..5b2dc0d10c8f 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -204,9 +204,11 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_IOMMU: r = iommu_present(&pci_bus_type); break; +#endif default: r = 0; } @@ -924,13 +926,15 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) { if (!irqchip_in_kernel(kvm)) return -ENXIO; irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event->irq, irq_event->level); + irq_event->irq, irq_event->level, + line_status); return 0; } @@ -942,24 +946,6 @@ long kvm_arch_vm_ioctl(struct file *filp, int r = -ENOTTY; switch (ioctl) { - case KVM_SET_MEMORY_REGION: { - struct kvm_memory_region kvm_mem; - struct kvm_userspace_memory_region kvm_userspace_mem; - - r = -EFAULT; - if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) - goto out; - kvm_userspace_mem.slot = kvm_mem.slot; - kvm_userspace_mem.flags = kvm_mem.flags; - kvm_userspace_mem.guest_phys_addr = - kvm_mem.guest_phys_addr; - kvm_userspace_mem.memory_size = kvm_mem.memory_size; - r = kvm_vm_ioctl_set_memory_region(kvm, - &kvm_userspace_mem, false); - if (r) - goto out; - break; - } case KVM_CREATE_IRQCHIP: r = -EFAULT; r = kvm_ioapic_init(kvm); @@ -1384,9 +1370,7 @@ void kvm_arch_sync_events(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); -#ifdef KVM_CAP_DEVICE_ASSIGNMENT kvm_free_all_assigned_devices(kvm); -#endif kfree(kvm->arch.vioapic); kvm_release_vm_pages(kvm); } @@ -1578,9 +1562,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { unsigned long i; unsigned long pfn; @@ -1610,8 +1593,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { return; } diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index c3e2935b6db4..c5f92a926a9a 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h @@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); #define kvm_apic_present(x) (true) #define kvm_lapic_enabled(x) (true) -static inline bool kvm_apic_vid_enabled(void) -{ - /* IA64 has no apicv supporting, do nothing here */ - return false; -} - #endif diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c index daa8d6badb16..ec4de2b09653 100644 --- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -149,7 +149,7 @@ static const struct file_operations proc_fit_fops = { .open = proc_fit_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int proc_version_show(struct seq_file *m, void *v) @@ -183,7 +183,7 @@ static const struct file_operations proc_version_fops = { .open = proc_version_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; /* module entry points */ diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index b06b41861aac..6f16c1469327 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -1,7 +1,3 @@ -config SYMBOL_PREFIX - string - default "_" - config METAG def_bool y select EMBEDDED @@ -28,6 +24,7 @@ config METAG select HAVE_OPROFILE select HAVE_PERF_EVENTS select HAVE_SYSCALL_TRACEPOINTS + select HAVE_UNDERSCORE_SYMBOL_PREFIX select IRQ_DOMAIN select MODULES_USE_ELF_RELA select OF diff --git a/arch/mips/kernel/smtc-proc.c b/arch/mips/kernel/smtc-proc.c index 9fb714450e95..c10aa84c9fa9 100644 --- a/arch/mips/kernel/smtc-proc.c +++ b/arch/mips/kernel/smtc-proc.c @@ -61,7 +61,7 @@ static const struct file_operations smtc_proc_fops = { .open = smtc_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; void init_smtc_stats(void) diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c index 4eaab6327369..3d27800edba2 100644 --- a/arch/mips/pci/ops-pmcmsp.c +++ b/arch/mips/pci/ops-pmcmsp.c @@ -92,7 +92,7 @@ static const struct file_operations msp_pci_rd_cnt_fops = { .open = msp_pci_rd_cnt_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; /***************************************************************************** @@ -169,7 +169,7 @@ static const struct file_operations gen_pci_cfg_wr_fops = { .open = gen_pci_cfg_wr_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; /***************************************************************************** diff --git a/arch/mips/sibyte/sb1250/bus_watcher.c b/arch/mips/sibyte/sb1250/bus_watcher.c index cb1e3cb37d70..8871e3345bff 100644 --- a/arch/mips/sibyte/sb1250/bus_watcher.c +++ b/arch/mips/sibyte/sb1250/bus_watcher.c @@ -145,7 +145,7 @@ static const struct file_operations bw_proc_fops = { .open = bw_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static void create_proc_decoder(struct bw_stats_struct *stats) diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c index 8fa314fbfb18..3e04242de5a7 100644 --- a/arch/parisc/kernel/pdc_chassis.c +++ b/arch/parisc/kernel/pdc_chassis.c @@ -275,7 +275,7 @@ static const struct file_operations pdc_chassis_warn_fops = { .open = pdc_chassis_warn_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int __init pdc_chassis_create_procfs(void) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 4bc2c3dad6ad..cf4df8e2139a 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -270,6 +270,9 @@ #define H_SET_MODE 0x31C #define MAX_HCALL_OPCODE H_SET_MODE +/* Platform specific hcalls, used by KVM */ +#define H_RTAS 0xf000 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5a56e1c5f851..349ed85c7d61 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); +extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, + unsigned int vec); extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); @@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, unsigned long pte_index); extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, unsigned long *nb_ret); -extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); +extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, + unsigned long gpa, bool dirty); extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, @@ -458,6 +461,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) #define OSI_SC_MAGIC_R4 0x77810F9B #define INS_DCBZ 0x7c0007ec +/* TO = 31 for unconditional trap */ +#define INS_TW 0x7fe00008 /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 38bec1dc9928..9c1ff330c805 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v) (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); } +#ifdef CONFIG_KVM_BOOK3S_64_HV +/* + * Note modification of an HPTE; set the HPTE modified bit + * if anyone is interested. + */ +static inline void note_hpte_modification(struct kvm *kvm, + struct revmap_entry *rev) +{ + if (atomic_read(&kvm->arch.hpte_mod_interest)) + rev->guest_rpte |= HPTE_GR_MODIFIED; +} +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index cdc3d2717cc6..9039d3c97eec 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -20,6 +20,11 @@ #ifndef __ASM_KVM_BOOK3S_ASM_H__ #define __ASM_KVM_BOOK3S_ASM_H__ +/* XICS ICP register offsets */ +#define XICS_XIRR 4 +#define XICS_MFRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ + #ifdef __ASSEMBLY__ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -81,10 +86,11 @@ struct kvmppc_host_state { #ifdef CONFIG_KVM_BOOK3S_64_HV u8 hwthread_req; u8 hwthread_state; - + u8 host_ipi; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; + u32 saved_xirr; u64 dabr; u64 host_mmcr[3]; u32 host_pmc[8]; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index b7cd3356a532..d3c1eb34c986 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -26,6 +26,8 @@ /* LPIDs we support with this build -- runtime limit may be lower */ #define KVMPPC_NR_LPIDS 64 +#define KVMPPC_INST_EHPRIV 0x7c00021c + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { vcpu->arch.gpr[num] = val; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d1bb86074721..af326cde7cb6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -44,6 +44,10 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +/* These values are internal and can be increased later */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 256 + #if !defined(CONFIG_KVM_440) #include <linux/mmu_notifier.h> @@ -188,6 +192,10 @@ struct kvmppc_linear_info { int type; }; +/* XICS components, defined in book3s_xics.c */ +struct kvmppc_xics; +struct kvmppc_icp; + /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE @@ -255,6 +263,13 @@ struct kvm_arch { #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; + struct list_head rtas_tokens; +#endif +#ifdef CONFIG_KVM_MPIC + struct openpic *mpic; +#endif +#ifdef CONFIG_KVM_XICS + struct kvmppc_xics *xics; #endif }; @@ -301,11 +316,13 @@ struct kvmppc_vcore { * that a guest can register. */ struct kvmppc_vpa { + unsigned long gpa; /* Current guest phys addr */ void *pinned_addr; /* Address in kernel linear mapping */ void *pinned_end; /* End of region */ unsigned long next_gpa; /* Guest phys addr for update */ unsigned long len; /* Number of bytes required */ u8 update_pending; /* 1 => update pinned_addr from next_gpa */ + bool dirty; /* true => area has been modified by kernel */ }; struct kvmppc_pte { @@ -359,6 +376,11 @@ struct kvmppc_slb { #define KVMPPC_BOOKE_MAX_IAC 4 #define KVMPPC_BOOKE_MAX_DAC 2 +/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */ +#define KVMPPC_EPR_NONE 0 /* EPR not supported */ +#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ +#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ + struct kvmppc_booke_debug_reg { u32 dbcr0; u32 dbcr1; @@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg { u64 dac[KVMPPC_BOOKE_MAX_DAC]; }; +#define KVMPPC_IRQ_DEFAULT 0 +#define KVMPPC_IRQ_MPIC 1 +#define KVMPPC_IRQ_XICS 2 + +struct openpic; + struct kvm_vcpu_arch { ulong host_stack; u32 host_pid; @@ -502,8 +530,11 @@ struct kvm_vcpu_arch { spinlock_t wdt_lock; struct timer_list wdt_timer; u32 tlbcfg[4]; + u32 tlbps[4]; u32 mmucfg; + u32 eptcfg; u32 epr; + u32 crit_save; struct kvmppc_booke_debug_reg dbg_reg; #endif gpa_t paddr_accessed; @@ -521,7 +552,7 @@ struct kvm_vcpu_arch { u8 sane; u8 cpu_type; u8 hcall_needed; - u8 epr_enabled; + u8 epr_flags; /* KVMPPC_EPR_xxx */ u8 epr_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ @@ -548,6 +579,13 @@ struct kvm_vcpu_arch { unsigned long magic_page_pa; /* phys addr to map the magic page to */ unsigned long magic_page_ea; /* effect. addr to map the magic page to */ + int irq_type; /* one of KVM_IRQ_* */ + int irq_cpu_id; + struct openpic *mpic; /* KVM_IRQ_MPIC */ +#ifdef CONFIG_KVM_XICS + struct kvmppc_icp *icp; /* XICS presentation controller */ +#endif + #ifdef CONFIG_KVM_BOOK3S_64_HV struct kvm_vcpu_arch_shared shregs; @@ -588,5 +626,6 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FQPR 0x0060 #define __KVM_HAVE_ARCH_WQP +#define __KVM_HAVE_CREATE_DEVICE #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44a657adf416..a5287fe03d77 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -44,7 +44,7 @@ enum emulation_result { EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_AGAIN, /* something went wrong. go again */ - EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */ + EMULATE_EXIT_USER, /* emulation requires exit to user-space */ }; extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); @@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); -extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -131,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); + extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, @@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old); + const struct kvm_memory_slot *old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, @@ -165,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); + +extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); +extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); +extern void kvmppc_rtas_tokens_free(struct kvm *kvm); +extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, + u32 priority); +extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, + u32 *priority); +extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); +extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. @@ -246,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); +struct openpic; + #ifdef CONFIG_KVM_BOOK3S_64_HV static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { paca[cpu].kvm_hstate.xics_phys = addr; } +static inline u32 kvmppc_get_xics_latch(void) +{ + u32 xirr = get_paca()->kvm_hstate.saved_xirr; + + get_paca()->kvm_hstate.saved_xirr = 0; + + return xirr; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{ + paca[cpu].kvm_hstate.host_ipi = host_ipi; +} + +extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); extern void kvm_linear_init(void); #else @@ -260,6 +289,46 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvm_linear_init(void) {} + +static inline u32 kvmppc_get_xics_latch(void) +{ + return 0; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{} + +static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_kick(vcpu); +} +#endif + +#ifdef CONFIG_KVM_XICS +static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; +} +extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); +extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); +extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); +extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu); +#else +static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) + { return 0; } +static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } +static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, + unsigned long server) + { return -EINVAL; } +static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm, + struct kvm_irq_level *args) + { return -ENOTTY; } +static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) + { return 0; } #endif static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) @@ -271,6 +340,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) #endif } +#ifdef CONFIG_KVM_MPIC + +void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); +int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 cpu); +void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu); + +#else + +static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) +{ +} + +static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu) +{ + return -EINVAL; +} + +static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, + struct kvm_vcpu *vcpu) +{ +} + +#endif /* CONFIG_KVM_MPIC */ + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg); int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, @@ -283,8 +378,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids); static inline void kvmppc_mmu_flush_icache(pfn_t pfn) { - /* Clear i-cache for new pages */ struct page *page; + /* + * We can only access pages that the kernel maps + * as memory. Bail out for unmapped ones. + */ + if (!pfn_valid(pfn)) + return; + + /* Clear i-cache for new pages */ page = pfn_to_page(pfn); if (!test_bit(PG_arch_1, &page->flags)) { flush_dcache_icache_page(page); @@ -324,4 +426,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) return ea; } +extern void xics_wake_cpu(int cpu); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 3d17427e4fd7..a6136515c7f2 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -300,6 +300,7 @@ #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ #define LPCR_MER 0x00000800 /* Mediated External Exception */ +#define LPCR_MER_SH 11 #define LPCR_LPES 0x0000000c #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 16064d00adb9..0fb1a6e9ff90 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -25,6 +25,8 @@ /* Select powerpc specific features in <linux/kvm.h> */ #define __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT +#define __KVM_HAVE_IRQCHIP +#define __KVM_HAVE_IRQ_LINE struct kvm_regs { __u64 pc; @@ -272,8 +274,31 @@ struct kvm_debug_exit_arch { /* for KVM_SET_GUEST_DEBUG */ struct kvm_guest_debug_arch { + struct { + /* H/W breakpoint/watchpoint address */ + __u64 addr; + /* + * Type denotes h/w breakpoint, read watchpoint, write + * watchpoint or watchpoint (both read and write). + */ +#define KVMPPC_DEBUG_NONE 0x0 +#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) +#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) +#define KVMPPC_DEBUG_WATCH_READ (1UL << 3) + __u32 type; + __u32 reserved; + } bp[16]; }; +/* Debug related defines */ +/* + * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic + * and upper 16 bits are architecture specific. Architecture specific defines + * that ioctl is for setting hardware breakpoint or software breakpoint. + */ +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 +#define KVM_GUESTDBG_USE_HW_BP 0x00020000 + /* definition of registers in kvm_run */ struct kvm_sync_regs { }; @@ -299,6 +324,12 @@ struct kvm_allocate_rma { __u64 rma_size; }; +/* for KVM_CAP_PPC_RTAS */ +struct kvm_rtas_token_args { + char name[120]; + __u64 token; /* Use a token of 0 to undefine a mapping */ +}; + struct kvm_book3e_206_tlb_entry { __u32 mas8; __u32 mas1; @@ -359,6 +390,26 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* Per-vcpu XICS interrupt controller state */ +#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) + +#define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */ +#define KVM_REG_PPC_ICP_CPPR_MASK 0xff +#define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */ +#define KVM_REG_PPC_ICP_XISR_MASK 0xffffff +#define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */ +#define KVM_REG_PPC_ICP_MFRR_MASK 0xff +#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ +#define KVM_REG_PPC_ICP_PPRI_MASK 0xff + +/* Device control API: PPC-specific devices */ +#define KVM_DEV_MPIC_GRP_MISC 1 +#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ + +#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */ +#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */ + +/* One-Reg API: PPC-specific registers */ #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) @@ -417,4 +468,47 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) +/* Timer Status Register OR/CLEAR interface */ +#define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87) +#define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88) +#define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89) +#define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a) + +/* Debugging: Special instruction for software breakpoint */ +#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b) + +/* MMU registers */ +#define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c) +#define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d) +#define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e) +#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f) +#define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90) +#define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91) +#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92) +/* + * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using + * KVM_CAP_SW_TLB ioctl + */ +#define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93) +#define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94) +#define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95) +#define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96) +#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97) +#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98) +#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99) +#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) +#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) + +/* PPC64 eXternal Interrupt Controller Specification */ +#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ + +/* Layout of 64-bit source attribute values */ +#define KVM_XICS_DESTINATION_SHIFT 0 +#define KVM_XICS_DESTINATION_MASK 0xffffffffULL +#define KVM_XICS_PRIORITY_SHIFT 32 +#define KVM_XICS_PRIORITY_MASK 0xff +#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) +#define KVM_XICS_MASKED (1ULL << 41) +#define KVM_XICS_PENDING (1ULL << 42) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 172233eab799..b51a97cfedf8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -480,6 +480,7 @@ int main(void) DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); + DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); @@ -576,6 +577,8 @@ int main(void) HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); + HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); + HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); @@ -599,6 +602,7 @@ int main(void) DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); + DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save)); #endif /* CONFIG_PPC_BOOK3S */ #endif /* CONFIG_KVM */ diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 3d7fd21c65f9..2f5c6b6d6877 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return kvmppc_set_sregs_ivor(vcpu, sregs); } +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + return -EINVAL; +} + +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + return -EINVAL; +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_44x *vcpu_44x; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 63c67ec72e43..eb643f862579 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -136,21 +136,41 @@ config KVM_E500V2 If unsure, say N. config KVM_E500MC - bool "KVM support for PowerPC E500MC/E5500 processors" + bool "KVM support for PowerPC E500MC/E5500/E6500 processors" depends on PPC_E500MC select KVM select KVM_MMIO select KVM_BOOKE_HV select MMU_NOTIFIER ---help--- - Support running unmodified E500MC/E5500 (32-bit) guest kernels in - virtual machines on E500MC/E5500 host processors. + Support running unmodified E500MC/E5500/E6500 guest kernels in + virtual machines on E500MC/E5500/E6500 host processors. This module provides access to the hardware capabilities through a character device node named /dev/kvm. If unsure, say N. +config KVM_MPIC + bool "KVM in-kernel MPIC emulation" + depends on KVM && E500 + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_MSI + help + Enable support for emulating MPIC devices inside the + host kernel, rather than relying on userspace to emulate. + Currently, support is limited to certain versions of + Freescale's MPIC implementation. + +config KVM_XICS + bool "KVM in-kernel XICS emulation" + depends on KVM_BOOK3S_64 && !KVM_MPIC + ---help--- + Include support for the XICS (eXternal Interrupt Controller + Specification) interrupt controller architecture used on + IBM POWER (pSeries) servers. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index b772eded8c26..422de3f4d46c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv.o \ book3s_hv_interrupts.o \ book3s_64_mmu_hv.o +kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ + book3s_hv_rm_xics.o kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_64_vio_hv.o \ book3s_hv_ras.o \ - book3s_hv_builtin.o + book3s_hv_builtin.o \ + $(kvm-book3s_64-builtin-xics-objs-y) + +kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ + book3s_xics.o kvm-book3s_64-module-objs := \ ../../../virt/kvm/kvm_main.o \ @@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \ emulate.o \ book3s.o \ book3s_64_vio.o \ + book3s_rtas.o \ $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) @@ -103,6 +110,9 @@ kvm-book3s_32-objs := \ book3s_32_mmu.o kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) +kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) + kvm-objs := $(kvm-objs-m) $(kvm-objs-y) obj-$(CONFIG_KVM_440) += kvm.o diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a4b645285240..700df6f1d32c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) return prio; } -static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, +void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) { unsigned long old_pending = vcpu->arch.pending_exceptions; @@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, vec); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); @@ -530,6 +529,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); break; #endif /* CONFIG_ALTIVEC */ + case KVM_REG_PPC_DEBUG_INST: { + u32 opcode = INS_TW; + r = copy_to_user((u32 __user *)(long)reg->addr, + &opcode, sizeof(u32)); + break; + } +#ifdef CONFIG_KVM_XICS + case KVM_REG_PPC_ICP_STATE: + if (!vcpu->arch.icp) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); + break; +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; @@ -592,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_KVM_XICS + case KVM_REG_PPC_ICP_STATE: + if (!vcpu->arch.icp) { + r = -ENXIO; + break; + } + r = kvmppc_xics_set_icp(vcpu, + set_reg_val(reg->id, val)); + break; +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; @@ -607,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return 0; } +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + void kvmppc_decrementer_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index da98e26f6e45..5880dfb31074 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Harvest R and C */ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; - rev[i].guest_rpte = ptel | rcbits; + if (rcbits & ~rev[i].guest_rpte) { + rev[i].guest_rpte = ptel | rcbits; + note_hpte_modification(kvm, &rev[i]); + } } unlock_rmap(rmapp); hptep[0] &= ~HPTE_V_HVLOCK; @@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Now check and modify the HPTE */ if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { kvmppc_clear_ref_hpte(kvm, hptep, i); - rev[i].guest_rpte |= HPTE_R_R; + if (!(rev[i].guest_rpte & HPTE_R_R)) { + rev[i].guest_rpte |= HPTE_R_R; + note_hpte_modification(kvm, &rev[i]); + } ret = 1; } hptep[0] &= ~HPTE_V_HVLOCK; @@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) hptep[1] &= ~HPTE_R_C; eieio(); hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; - rev[i].guest_rpte |= HPTE_R_C; + if (!(rev[i].guest_rpte & HPTE_R_C)) { + rev[i].guest_rpte |= HPTE_R_C; + note_hpte_modification(kvm, &rev[i]); + } ret = 1; } hptep[0] &= ~HPTE_V_HVLOCK; @@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) return ret; } +static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, + struct kvm_memory_slot *memslot, + unsigned long *map) +{ + unsigned long gfn; + + if (!vpa->dirty || !vpa->pinned_addr) + return; + gfn = vpa->gpa >> PAGE_SHIFT; + if (gfn < memslot->base_gfn || + gfn >= memslot->base_gfn + memslot->npages) + return; + + vpa->dirty = false; + if (map) + __set_bit_le(gfn - memslot->base_gfn, map); +} + long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map) { unsigned long i; unsigned long *rmapp; + struct kvm_vcpu *vcpu; preempt_disable(); rmapp = memslot->arch.rmap; @@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, __set_bit_le(i, map); ++rmapp; } + + /* Harvest dirty bits from VPA and DTL updates */ + /* Note: we never modify the SLB shadow buffer areas */ + kvm_for_each_vcpu(i, vcpu, kvm) { + spin_lock(&vcpu->arch.vpa_update_lock); + harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); + harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); + spin_unlock(&vcpu->arch.vpa_update_lock); + } preempt_enable(); return 0; } @@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, unsigned long gfn = gpa >> PAGE_SHIFT; struct page *page, *pages[1]; int npages; - unsigned long hva, psize, offset; + unsigned long hva, offset; unsigned long pa; unsigned long *physp; int srcu_idx; @@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, } srcu_read_unlock(&kvm->srcu, srcu_idx); - psize = PAGE_SIZE; - if (PageHuge(page)) { - page = compound_head(page); - psize <<= compound_order(page); - } - offset = gpa & (psize - 1); + offset = gpa & (PAGE_SIZE - 1); if (nb_ret) - *nb_ret = psize - offset; + *nb_ret = PAGE_SIZE - offset; return page_address(page) + offset; err: @@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, return NULL; } -void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) +void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, + bool dirty) { struct page *page = virt_to_page(va); + struct kvm_memory_slot *memslot; + unsigned long gfn; + unsigned long *rmap; + int srcu_idx; put_page(page); + + if (!dirty || !kvm->arch.using_mmu_notifiers) + return; + + /* We need to mark this page dirty in the rmap chain */ + gfn = gpa >> PAGE_SHIFT; + srcu_idx = srcu_read_lock(&kvm->srcu); + memslot = gfn_to_memslot(kvm, gfn); + if (memslot) { + rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; + lock_rmap(rmap); + *rmap |= KVMPPC_RMAP_CHANGED; + unlock_rmap(rmap); + } + srcu_read_unlock(&kvm->srcu, srcu_idx); } /* @@ -1193,16 +1245,36 @@ struct kvm_htab_ctx { #define HPTE_SIZE (2 * sizeof(unsigned long)) +/* + * Returns 1 if this HPT entry has been modified or has pending + * R/C bit changes. + */ +static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) +{ + unsigned long rcbits_unset; + + if (revp->guest_rpte & HPTE_GR_MODIFIED) + return 1; + + /* Also need to consider changes in reference and changed bits */ + rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); + if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) + return 1; + + return 0; +} + static long record_hpte(unsigned long flags, unsigned long *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { unsigned long v, r; + unsigned long rcbits_unset; int ok = 1; int valid, dirty; /* Unmodified entries are uninteresting except on the first pass */ - dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + dirty = hpte_dirty(revp, hptp); if (!first_pass && !dirty) return 0; @@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); v = hptp[0]; + + /* re-evaluate valid and dirty from synchronized HPTE value */ + valid = !!(v & HPTE_V_VALID); + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + + /* Harvest R and C into guest view if necessary */ + rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); + if (valid && (rcbits_unset & hptp[1])) { + revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | + HPTE_GR_MODIFIED; + dirty = 1; + } + if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; + valid = 1; } - /* re-evaluate valid and dirty from synchronized HPTE value */ - valid = !!(v & HPTE_V_VALID); if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) valid = 0; - r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); - dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + + r = revp->guest_rpte; /* only clear modified if this is the right sort of entry */ if (valid == want_valid && dirty) { r &= ~HPTE_GR_MODIFIED; @@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, /* Skip uninteresting entries, i.e. clean on not-first pass */ if (!first_pass) { while (i < kvm->arch.hpt_npte && - !(revp->guest_rpte & HPTE_GR_MODIFIED)) { + !hpte_dirty(revp, hptp)) { ++i; hptp += 2; ++revp; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 836c56975e21..1f6344c4408d 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, run->papr_hcall.args[i] = gpr; } - emulated = EMULATE_DO_PAPR; + run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->arch.hcall_needed = 1; + emulated = EMULATE_EXIT_USER; break; } #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f5416934932b..9de24f8e03c7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -66,6 +66,31 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu->cpu; + wait_queue_head_t *wqp; + + wqp = kvm_arch_vcpu_wq(vcpu); + if (waitqueue_active(wqp)) { + wake_up_interruptible(wqp); + ++vcpu->stat.halt_wakeup; + } + + me = get_cpu(); + + /* CPU points to the first thread of the core */ + if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { + int real_cpu = cpu + vcpu->arch.ptid; + if (paca[real_cpu].kvm_hstate.xics_phys) + xics_wake_cpu(real_cpu); + else if (cpu_online(cpu)) + smp_send_reschedule(cpu); + } + put_cpu(); +} + /* * We use the vcpu_load/put functions to measure stolen time. * Stolen time is counted as time when either the vcpu is able to @@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, len = ((struct reg_vpa *)va)->length.hword; else len = ((struct reg_vpa *)va)->length.word; - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, vpa, false); /* Check length */ if (len > nb || len < sizeof(struct reg_vpa)) @@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) va = NULL; nb = 0; if (gpa) - va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); + va = kvmppc_pin_guest_page(kvm, gpa, &nb); spin_lock(&vcpu->arch.vpa_update_lock); if (gpa == vpap->next_gpa) break; /* sigh... unpin that one and try again */ if (va) - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, gpa, false); } vpap->update_pending = 0; @@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) * has changed the mappings underlying guest memory, * so unregister the region. */ - kvmppc_unpin_guest_page(kvm, va); + kvmppc_unpin_guest_page(kvm, va, gpa, false); va = NULL; } if (vpap->pinned_addr) - kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); + kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, + vpap->dirty); + vpap->gpa = gpa; vpap->pinned_addr = va; + vpap->dirty = false; if (va) vpap->pinned_end = va + vpap->len; } @@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, /* order writing *dt vs. writing vpa->dtl_idx */ smp_wmb(); vpa->dtl_idx = ++vcpu->arch.dtl_index; + vcpu->arch.dtl.dirty = true; } int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) @@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; struct kvm_vcpu *tvcpu; - int idx; + int idx, rc; switch (req) { case H_ENTER: @@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6)); break; + case H_RTAS: + if (list_empty(&vcpu->kvm->arch.rtas_tokens)) + return RESUME_HOST; + + rc = kvmppc_rtas_hcall(vcpu); + + if (rc == -ENOENT) + return RESUME_HOST; + else if (rc == 0) + break; + + /* Send the error out to userspace via KVM_RUN */ + return rc; + + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu)) { + ret = kvmppc_xics_hcall(vcpu, req); + break; + } /* fallthrough */ default: return RESUME_HOST; } @@ -913,15 +964,19 @@ out: return ERR_PTR(err); } +static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) +{ + if (vpa->pinned_addr) + kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa, + vpa->dirty); +} + void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->arch.vpa_update_lock); - if (vcpu->arch.dtl.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); - if (vcpu->arch.slb_shadow.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); - if (vcpu->arch.vpa.pinned_addr) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); + unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); + unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); + unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); @@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) } extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); -extern void xics_wake_cpu(int cpu); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) @@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) break; vc->runner = vcpu; n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) + list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { if (!v->arch.pending_exceptions) n_ceded += v->arch.ceded; + else + v->arch.ceded = 0; + } if (n_ceded == vc->n_runnable) kvmppc_vcore_blocked(vc); else @@ -1645,12 +1702,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; struct kvm_memory_slot *memslot; - if (npages && old.npages) { + if (npages && old->npages) { /* * If modifying a memslot, reset all the rmap dirty bits. * If this is a new memslot, we don't need to do anything @@ -1827,6 +1884,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) cpumask_setall(&kvm->arch.need_tlb_flush); INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); kvm->arch.rma = NULL; @@ -1872,6 +1930,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) kvm->arch.rma = NULL; } + kvmppc_rtas_tokens_free(kvm); + kvmppc_free_hpt(kvm); WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 19c93bae1aea..6dcbb49105a4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, } EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); -/* - * Note modification of an HPTE; set the HPTE modified bit - * if anyone is interested. - */ -static inline void note_hpte_modification(struct kvm *kvm, - struct revmap_entry *rev) -{ - if (atomic_read(&kvm->arch.hpte_mod_interest)) - rev->guest_rpte |= HPTE_GR_MODIFIED; -} - /* Remove this HPTE from the chain for a real page */ static void remove_revmap_chain(struct kvm *kvm, long pte_index, struct revmap_entry *rev, diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c new file mode 100644 index 000000000000..b4b0082f761c --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -0,0 +1,406 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/err.h> + +#include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> +#include <asm/hvcall.h> +#include <asm/xics.h> +#include <asm/debug.h> +#include <asm/synch.h> +#include <asm/ppc-opcode.h> + +#include "book3s_xics.h" + +#define DEBUG_PASSUP + +static inline void rm_writeb(unsigned long paddr, u8 val) +{ + __asm__ __volatile__("sync; stbcix %0,0,%1" + : : "r" (val), "r" (paddr) : "memory"); +} + +static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, + struct kvm_vcpu *this_vcpu) +{ + struct kvmppc_icp *this_icp = this_vcpu->arch.icp; + unsigned long xics_phys; + int cpu; + + /* Mark the target VCPU as having an interrupt pending */ + vcpu->stat.queue_intr++; + set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); + + /* Kick self ? Just set MER and return */ + if (vcpu == this_vcpu) { + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER); + return; + } + + /* Check if the core is loaded, if not, too hard */ + cpu = vcpu->cpu; + if (cpu < 0 || cpu >= nr_cpu_ids) { + this_icp->rm_action |= XICS_RM_KICK_VCPU; + this_icp->rm_kick_target = vcpu; + return; + } + /* In SMT cpu will always point to thread 0, we adjust it */ + cpu += vcpu->arch.ptid; + + /* Not too hard, then poke the target */ + xics_phys = paca[cpu].kvm_hstate.xics_phys; + rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); +} + +static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) +{ + /* Note: Only called on self ! */ + clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, + &vcpu->arch.pending_exceptions); + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); +} + +static inline bool icp_rm_try_update(struct kvmppc_icp *icp, + union kvmppc_icp_state old, + union kvmppc_icp_state new) +{ + struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu; + bool success; + + /* Calculate new output value */ + new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); + + /* Attempt atomic update */ + success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; + if (!success) + goto bail; + + /* + * Check for output state update + * + * Note that this is racy since another processor could be updating + * the state already. This is why we never clear the interrupt output + * here, we only ever set it. The clear only happens prior to doing + * an update and only by the processor itself. Currently we do it + * in Accept (H_XIRR) and Up_Cppr (H_XPPR). + * + * We also do not try to figure out whether the EE state has changed, + * we unconditionally set it if the new state calls for it. The reason + * for that is that we opportunistically remove the pending interrupt + * flag when raising CPPR, so we need to set it back here if an + * interrupt is still pending. + */ + if (new.out_ee) + icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu); + + /* Expose the state change for debug purposes */ + this_vcpu->arch.icp->rm_dbgstate = new; + this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu; + + bail: + return success; +} + +static inline int check_too_hard(struct kvmppc_xics *xics, + struct kvmppc_icp *icp) +{ + return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS; +} + +static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u8 new_cppr) +{ + union kvmppc_icp_state old_state, new_state; + bool resend; + + /* + * This handles several related states in one operation: + * + * ICP State: Down_CPPR + * + * Load CPPR with new value and if the XISR is 0 + * then check for resends: + * + * ICP State: Resend + * + * If MFRR is more favored than CPPR, check for IPIs + * and notify ICS of a potential resend. This is done + * asynchronously (when used in real mode, we will have + * to exit here). + * + * We do not handle the complete Check_IPI as documented + * here. In the PAPR, this state will be used for both + * Set_MFRR and Down_CPPR. However, we know that we aren't + * changing the MFRR state here so we don't need to handle + * the case of an MFRR causing a reject of a pending irq, + * this will have been handled when the MFRR was set in the + * first place. + * + * Thus we don't have to handle rejects, only resends. + * + * When implementing real mode for HV KVM, resend will lead to + * a H_TOO_HARD return and the whole transaction will be handled + * in virtual mode. + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Down_CPPR */ + new_state.cppr = new_cppr; + + /* + * Cut down Resend / Check_IPI / IPI + * + * The logic is that we cannot have a pending interrupt + * trumped by an IPI at this point (see above), so we + * know that either the pending interrupt is already an + * IPI (in which case we don't care to override it) or + * it's either more favored than us or non existent + */ + if (new_state.mfrr < new_cppr && + new_state.mfrr <= new_state.pending_pri) { + new_state.pending_pri = new_state.mfrr; + new_state.xisr = XICS_IPI; + } + + /* Latch/clear resend bit */ + resend = new_state.need_resend; + new_state.need_resend = 0; + + } while (!icp_rm_try_update(icp, old_state, new_state)); + + /* + * Now handle resend checks. Those are asynchronous to the ICP + * state update in HW (ie bus transactions) so we can handle them + * separately here as well. + */ + if (resend) + icp->rm_action |= XICS_RM_CHECK_RESEND; +} + + +unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 xirr; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + /* First clear the interrupt */ + icp_rm_clr_vcpu_irq(icp->vcpu); + + /* + * ICP State: Accept_Interrupt + * + * Return the pending interrupt (if any) along with the + * current CPPR, then clear the XISR & set CPPR to the + * pending priority + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + xirr = old_state.xisr | (((u32)old_state.cppr) << 24); + if (!old_state.xisr) + break; + new_state.cppr = new_state.pending_pri; + new_state.pending_pri = 0xff; + new_state.xisr = 0; + + } while (!icp_rm_try_update(icp, old_state, new_state)); + + /* Return the result in GPR4 */ + vcpu->arch.gpr[4] = xirr; + + return check_too_hard(xics, icp); +} + +int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp; + u32 reject; + bool resend; + bool local; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + local = this_icp->server_num == server; + if (local) + icp = this_icp; + else + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + + /* + * ICP state: Set_MFRR + * + * If the CPPR is more favored than the new MFRR, then + * nothing needs to be done as there can be no XISR to + * reject. + * + * If the CPPR is less favored, then we might be replacing + * an interrupt, and thus need to possibly reject it as in + * + * ICP state: Check_IPI + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Set_MFRR */ + new_state.mfrr = mfrr; + + /* Check_IPI */ + reject = 0; + resend = false; + if (mfrr < new_state.cppr) { + /* Reject a pending interrupt if not an IPI */ + if (mfrr <= new_state.pending_pri) + reject = new_state.xisr; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } + + if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + resend = new_state.need_resend; + new_state.need_resend = 0; + } + } while (!icp_rm_try_update(icp, old_state, new_state)); + + /* Pass rejects to virtual mode */ + if (reject && reject != XICS_IPI) { + this_icp->rm_action |= XICS_RM_REJECT; + this_icp->rm_reject = reject; + } + + /* Pass resends to virtual mode */ + if (resend) + this_icp->rm_action |= XICS_RM_CHECK_RESEND; + + return check_too_hard(xics, this_icp); +} + +int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 reject; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + /* + * ICP State: Set_CPPR + * + * We can safely compare the new value with the current + * value outside of the transaction as the CPPR is only + * ever changed by the processor on itself + */ + if (cppr > icp->state.cppr) { + icp_rm_down_cppr(xics, icp, cppr); + goto bail; + } else if (cppr == icp->state.cppr) + return H_SUCCESS; + + /* + * ICP State: Up_CPPR + * + * The processor is raising its priority, this can result + * in a rejection of a pending interrupt: + * + * ICP State: Reject_Current + * + * We can remove EE from the current processor, the update + * transaction will set it again if needed + */ + icp_rm_clr_vcpu_irq(icp->vcpu); + + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + reject = 0; + new_state.cppr = cppr; + + if (cppr <= new_state.pending_pri) { + reject = new_state.xisr; + new_state.xisr = 0; + new_state.pending_pri = 0xff; + } + + } while (!icp_rm_try_update(icp, old_state, new_state)); + + /* Pass rejects to virtual mode */ + if (reject && reject != XICS_IPI) { + icp->rm_action |= XICS_RM_REJECT; + icp->rm_reject = reject; + } + bail: + return check_too_hard(xics, icp); +} + +int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u32 irq = xirr & 0x00ffffff; + u16 src; + + if (!xics || !xics->real_mode) + return H_TOO_HARD; + + /* + * ICP State: EOI + * + * Note: If EOI is incorrectly used by SW to lower the CPPR + * value (ie more favored), we do not check for rejection of + * a pending interrupt, this is a SW error and PAPR sepcifies + * that we don't have to deal with it. + * + * The sending of an EOI to the ICS is handled after the + * CPPR update + * + * ICP State: Down_CPPR which we handle + * in a separate function as it's shared with H_CPPR. + */ + icp_rm_down_cppr(xics, icp, xirr >> 24); + + /* IPIs have no EOI */ + if (irq == XICS_IPI) + goto bail; + /* + * EOI handling: If the interrupt is still asserted, we need to + * resend it. We can take a lockless "peek" at the ICS state here. + * + * "Message" interrupts will never have "asserted" set + */ + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + goto bail; + state = &ics->irq_state[src]; + + /* Still asserted, resend it, we make it look like a reject */ + if (state->asserted) { + icp->rm_action |= XICS_RM_REJECT; + icp->rm_reject = irq; + } + bail: + return check_too_hard(xics, icp); +} diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e33d11f1b977..b02f91e4c70d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline) * * *****************************************************************************/ -#define XICS_XIRR 4 -#define XICS_QIRR 0xc -#define XICS_IPI 2 /* interrupt source # for IPIs */ - /* * We come in here when wakened from nap mode on a secondary hw thread. * Relocation is off and most register values are lost. @@ -101,50 +97,51 @@ kvm_start_guest: li r0,1 stb r0,PACA_NAPSTATELOST(r13) - /* get vcpu pointer, NULL if we have no vcpu to run */ - ld r4,HSTATE_KVM_VCPU(r13) - cmpdi cr1,r4,0 + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede + + /* + * We weren't napping due to cede, so this must be a secondary + * thread being woken up to run a guest, or being woken up due + * to a stray IPI. (Or due to some machine check or hypervisor + * maintenance interrupt while the core is in KVM.) + */ /* Check the wake reason in SRR1 to see why we got here */ mfspr r3,SPRN_SRR1 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ cmpwi r3,4 /* was it an external interrupt? */ - bne 27f - - /* - * External interrupt - for now assume it is an IPI, since we - * should never get any other interrupts sent to offline threads. - * Only do this for secondary threads. - */ - beq cr1,25f - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f -25: ld r5,HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_QIRR - li r7,XICS_XIRR + bne 27f /* if not */ + ld r5,HSTATE_XICS_PHYS(r13) + li r7,XICS_XIRR /* if it was an external interrupt, */ lwzcix r8,r5,r7 /* get and ack the interrupt */ sync clrldi. r9,r8,40 /* get interrupt source ID. */ - beq 27f /* none there? */ - cmpwi r9,XICS_IPI - bne 26f + beq 28f /* none there? */ + cmpwi r9,XICS_IPI /* was it an IPI? */ + bne 29f + li r0,0xff + li r6,XICS_MFRR stbcix r0,r5,r6 /* clear IPI */ -26: stwcix r8,r5,r7 /* EOI the interrupt */ - -27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + stwcix r8,r5,r7 /* EOI the interrupt */ + sync /* order loading of vcpu after that */ - /* reload vcpu pointer after clearing the IPI */ + /* get vcpu pointer, NULL if we have no vcpu to run */ ld r4,HSTATE_KVM_VCPU(r13) cmpdi r4,0 /* if we have no vcpu to run, go back to sleep */ beq kvm_no_guest + b kvmppc_hv_entry - /* were we napping due to cede? */ - lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede +27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + b kvm_no_guest +28: /* SRR1 said external but ICP said nope?? */ + b kvm_no_guest +29: /* External non-IPI interrupt to offline secondary thread? help?? */ + stw r8,HSTATE_SAVED_XIRR(r13) + b kvm_no_guest .global kvmppc_hv_entry kvmppc_hv_entry: @@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) lwz r5, LPPACA_YIELDCOUNT(r3) addi r5, r5, 1 stw r5, LPPACA_YIELDCOUNT(r3) + li r6, 1 + stb r6, VCPU_VPA_DIRTY(r4) 25: /* Load up DAR and DSISR */ ld r5, VCPU_DAR(r4) @@ -485,20 +484,20 @@ toc_tlbie_lock: mtctr r6 mtxer r7 + ld r10, VCPU_PC(r4) + ld r11, VCPU_MSR(r4) kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ ld r6, VCPU_SRR0(r4) ld r7, VCPU_SRR1(r4) - ld r10, VCPU_PC(r4) - ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ + /* r11 = vcpu->arch.msr & ~MSR_HV */ rldicl r11, r11, 63 - MSR_HV_LG, 1 rotldi r11, r11, 1 + MSR_HV_LG ori r11, r11, MSR_ME /* Check if we can deliver an external or decrementer interrupt now */ ld r0,VCPU_PENDING_EXC(r4) - li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) - oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h + lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h and r0,r0,r8 cmpdi cr1,r0,0 andi. r0,r11,MSR_EE @@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Move SRR0 and SRR1 into the respective regs */ 5: mtspr SPRN_SRR0, r6 mtspr SPRN_SRR1, r7 - li r0,0 - stb r0,VCPU_CEDED(r4) /* cancel cede */ fast_guest_return: + li r0,0 + stb r0,VCPU_CEDED(r4) /* cancel cede */ mtspr SPRN_HSRR0,r10 mtspr SPRN_HSRR1,r11 @@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) cmpwi r12,BOOK3S_INTERRUPT_SYSCALL beq hcall_try_real_mode - /* Check for mediated interrupts (could be done earlier really ...) */ + /* Only handle external interrupts here on arch 206 and later */ BEGIN_FTR_SECTION - cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL - bne+ 1f - andi. r0,r11,MSR_EE - beq 1f - mfspr r5,SPRN_LPCR - andi. r0,r5,LPCR_MER - bne bounce_ext_interrupt -1: -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + b ext_interrupt_to_host +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) + + /* External interrupt ? */ + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + bne+ ext_interrupt_to_host + + /* External interrupt, first check for host_ipi. If this is + * set, we know the host wants us out so let's do it now + */ +do_ext_interrupt: + lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 + bne ext_interrupt_to_host + + /* Now read the interrupt from the ICP */ + ld r5, HSTATE_XICS_PHYS(r13) + li r7, XICS_XIRR + cmpdi r5, 0 + beq- ext_interrupt_to_host + lwzcix r3, r5, r7 + rlwinm. r0, r3, 0, 0xffffff + sync + beq 3f /* if nothing pending in the ICP */ + + /* We found something in the ICP... + * + * If it's not an IPI, stash it in the PACA and return to + * the host, we don't (yet) handle directing real external + * interrupts directly to the guest + */ + cmpwi r0, XICS_IPI + bne ext_stash_for_host + + /* It's an IPI, clear the MFRR and EOI it */ + li r0, 0xff + li r6, XICS_MFRR + stbcix r0, r5, r6 /* clear the IPI */ + stwcix r3, r5, r7 /* EOI it */ + sync + + /* We need to re-check host IPI now in case it got set in the + * meantime. If it's clear, we bounce the interrupt to the + * guest + */ + lbz r0, HSTATE_HOST_IPI(r13) + cmpwi r0, 0 + bne- 1f + + /* Allright, looks like an IPI for the guest, we need to set MER */ +3: + /* Check if any CPU is heading out to the host, if so head out too */ + ld r5, HSTATE_KVM_VCORE(r13) + lwz r0, VCORE_ENTRY_EXIT(r5) + cmpwi r0, 0x100 + bge ext_interrupt_to_host + + /* See if there is a pending interrupt for the guest */ + mfspr r8, SPRN_LPCR + ld r0, VCPU_PENDING_EXC(r9) + /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ + rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 + rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH + beq 2f + + /* And if the guest EE is set, we can deliver immediately, else + * we return to the guest with MER set + */ + andi. r0, r11, MSR_EE + beq 2f + mtspr SPRN_SRR0, r10 + mtspr SPRN_SRR1, r11 + li r10, BOOK3S_INTERRUPT_EXTERNAL + li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11, r11, 63 +2: mr r4, r9 + mtspr SPRN_LPCR, r8 + b fast_guest_return + + /* We raced with the host, we need to resend that IPI, bummer */ +1: li r0, IPI_PRIORITY + stbcix r0, r5, r6 /* set the IPI */ + sync + b ext_interrupt_to_host + +ext_stash_for_host: + /* It's not an IPI and it's for the host, stash it in the PACA + * before exit, it will be picked up by the host ICP driver + */ + stw r3, HSTATE_SAVED_XIRR(r13) +ext_interrupt_to_host: guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ /* Save DEC */ @@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) beq 44f ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ li r0,IPI_PRIORITY - li r7,XICS_QIRR + li r7,XICS_MFRR stbcix r0,r7,r8 /* trigger the IPI */ 44: srdi. r3,r3,1 addi r6,r6,PACA_SIZE @@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) lwz r3, LPPACA_YIELDCOUNT(r8) addi r3, r3, 1 stw r3, LPPACA_YIELDCOUNT(r8) + li r3, 1 + stb r3, VCPU_VPA_DIRTY(r9) 25: /* Save PMU registers if requested */ /* r8 and cr0.eq are live here */ @@ -1350,11 +1433,19 @@ hcall_real_table: .long 0 /* 0x58 */ .long 0 /* 0x5c */ .long 0 /* 0x60 */ - .long 0 /* 0x64 */ - .long 0 /* 0x68 */ - .long 0 /* 0x6c */ - .long 0 /* 0x70 */ - .long 0 /* 0x74 */ +#ifdef CONFIG_KVM_XICS + .long .kvmppc_rm_h_eoi - hcall_real_table + .long .kvmppc_rm_h_cppr - hcall_real_table + .long .kvmppc_rm_h_ipi - hcall_real_table + .long 0 /* 0x70 - H_IPOLL */ + .long .kvmppc_rm_h_xirr - hcall_real_table +#else + .long 0 /* 0x64 - H_EOI */ + .long 0 /* 0x68 - H_CPPR */ + .long 0 /* 0x6c - H_IPI */ + .long 0 /* 0x70 - H_IPOLL */ + .long 0 /* 0x74 - H_XIRR */ +#endif .long 0 /* 0x78 */ .long 0 /* 0x7c */ .long 0 /* 0x80 */ @@ -1405,15 +1496,6 @@ ignore_hdec: mr r4,r9 b fast_guest_return -bounce_ext_interrupt: - mr r4,r9 - mtspr SPRN_SRR0,r10 - mtspr SPRN_SRR1,r11 - li r10,BOOK3S_INTERRUPT_EXTERNAL - li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11,r11,63 - b fast_guest_return - _GLOBAL(kvmppc_h_set_dabr) std r4,VCPU_DABR(r3) /* Work around P7 bug where DABR can get corrupted on mtspr */ @@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) b . kvm_end_cede: + /* get vcpu pointer */ + ld r4, HSTATE_KVM_VCPU(r13) + /* Woken by external or decrementer interrupt */ ld r1, HSTATE_HOST_R1(r13) @@ -1558,6 +1643,16 @@ kvm_end_cede: li r0,0 stb r0,HSTATE_NAPPING(r13) + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3, SPRN_SRR1 + rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */ + cmpwi r3, 4 /* was it an external interrupt? */ + li r12, BOOK3S_INTERRUPT_EXTERNAL + mr r9, r4 + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + beq do_ext_interrupt /* if so */ + /* see if any other thread is already exiting */ lwz r0,VCORE_ENTRY_EXIT(r5) cmpwi r0,0x100 @@ -1577,8 +1672,7 @@ kvm_cede_prodded: /* we've ceded but we want to give control to the host */ kvm_cede_exit: - li r3,H_TOO_HARD - blr + b hcall_real_fallback /* Try to handle a machine check in real mode */ machine_check_realmode: @@ -1626,7 +1720,7 @@ secondary_nap: beq 37f sync li r0, 0xff - li r6, XICS_QIRR + li r6, XICS_MFRR stbcix r0, r5, r6 /* clear the IPI */ stwcix r3, r5, r7 /* EOI it */ 37: sync diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index dbdc15aa8127..bdc40b8e77d9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -762,9 +762,7 @@ program_interrupt: run->exit_reason = KVM_EXIT_MMIO; r = RESUME_HOST_NV; break; - case EMULATE_DO_PAPR: - run->exit_reason = KVM_EXIT_PAPR_HCALL; - vcpu->arch.hcall_needed = 1; + case EMULATE_EXIT_USER: r = RESUME_HOST_NV; break; default: @@ -1283,7 +1281,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { } @@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) { #ifdef CONFIG_PPC64 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); #endif if (firmware_has_feature(FW_FEATURE_SET_MODE)) { diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index ee02b30878ed..b24309c6c2d5 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) +{ + long rc = kvmppc_xics_hcall(vcpu, cmd); + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { switch (cmd) { @@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; return EMULATE_DONE; + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu)) + return kvmppc_h_pr_xics_hcall(vcpu, cmd); + break; + case H_RTAS: + if (list_empty(&vcpu->kvm->arch.rtas_tokens)) + return RESUME_HOST; + if (kvmppc_rtas_hcall(vcpu)) + break; + kvmppc_set_gpr(vcpu, 3, 0); + return EMULATE_DONE; } return EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c new file mode 100644 index 000000000000..3219ba895246 --- /dev/null +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -0,0 +1,274 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/err.h> + +#include <asm/uaccess.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> +#include <asm/hvcall.h> +#include <asm/rtas.h> + +#ifdef CONFIG_KVM_XICS +static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 3 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + server = args->args[1]; + priority = args->args[2]; + + rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 1 || args->nret != 3) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + server = priority = 0; + rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); + if (rc) { + rc = -3; + goto out; + } + + args->rets[1] = server; + args->rets[2] = priority; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args->nargs != 1 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + rc = kvmppc_xics_int_off(vcpu->kvm, irq); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args->nargs != 1 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + rc = kvmppc_xics_int_on(vcpu->kvm, irq); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} +#endif /* CONFIG_KVM_XICS */ + +struct rtas_handler { + void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); + char *name; +}; + +static struct rtas_handler rtas_handlers[] = { +#ifdef CONFIG_KVM_XICS + { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, + { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, + { .name = "ibm,int-off", .handler = kvm_rtas_int_off }, + { .name = "ibm,int-on", .handler = kvm_rtas_int_on }, +#endif +}; + +struct rtas_token_definition { + struct list_head list; + struct rtas_handler *handler; + u64 token; +}; + +static int rtas_name_matches(char *s1, char *s2) +{ + struct kvm_rtas_token_args args; + return !strncmp(s1, s2, sizeof(args.name)); +} + +static int rtas_token_undefine(struct kvm *kvm, char *name) +{ + struct rtas_token_definition *d, *tmp; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { + if (rtas_name_matches(d->handler->name, name)) { + list_del(&d->list); + kfree(d); + return 0; + } + } + + /* It's not an error to undefine an undefined token */ + return 0; +} + +static int rtas_token_define(struct kvm *kvm, char *name, u64 token) +{ + struct rtas_token_definition *d; + struct rtas_handler *h = NULL; + bool found; + int i; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { + if (d->token == token) + return -EEXIST; + } + + found = false; + for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) { + h = &rtas_handlers[i]; + if (rtas_name_matches(h->name, name)) { + found = true; + break; + } + } + + if (!found) + return -ENOENT; + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return -ENOMEM; + + d->handler = h; + d->token = token; + + list_add_tail(&d->list, &kvm->arch.rtas_tokens); + + return 0; +} + +int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) +{ + struct kvm_rtas_token_args args; + int rc; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + mutex_lock(&kvm->lock); + + if (args.token) + rc = rtas_token_define(kvm, args.name, args.token); + else + rc = rtas_token_undefine(kvm, args.name); + + mutex_unlock(&kvm->lock); + + return rc; +} + +int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) +{ + struct rtas_token_definition *d; + struct rtas_args args; + rtas_arg_t *orig_rets; + gpa_t args_phys; + int rc; + + /* r4 contains the guest physical address of the RTAS args */ + args_phys = kvmppc_get_gpr(vcpu, 4); + + rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + if (rc) + goto fail; + + /* + * args->rets is a pointer into args->args. Now that we've + * copied args we need to fix it up to point into our copy, + * not the guest args. We also need to save the original + * value so we can restore it on the way out. + */ + orig_rets = args.rets; + args.rets = &args.args[args.nargs]; + + mutex_lock(&vcpu->kvm->lock); + + rc = -ENOENT; + list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { + if (d->token == args.token) { + d->handler->handler(vcpu, &args); + rc = 0; + break; + } + } + + mutex_unlock(&vcpu->kvm->lock); + + if (rc == 0) { + args.rets = orig_rets; + rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + if (rc) + goto fail; + } + + return rc; + +fail: + /* + * We only get here if the guest has called RTAS with a bogus + * args pointer. That means we can't get to the args, and so we + * can't fail the RTAS call. So fail right out to userspace, + * which should kill the guest. + */ + return rc; +} + +void kvmppc_rtas_tokens_free(struct kvm *kvm) +{ + struct rtas_token_definition *d, *tmp; + + lockdep_assert_held(&kvm->lock); + + list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { + list_del(&d->list); + kfree(d); + } +} diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c new file mode 100644 index 000000000000..f7a103756618 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.c @@ -0,0 +1,1270 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/anon_inodes.h> + +#include <asm/uaccess.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> +#include <asm/hvcall.h> +#include <asm/xics.h> +#include <asm/debug.h> + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include "book3s_xics.h" + +#if 1 +#define XICS_DBG(fmt...) do { } while (0) +#else +#define XICS_DBG(fmt...) trace_printk(fmt) +#endif + +#define ENABLE_REALMODE true +#define DEBUG_REALMODE false + +/* + * LOCKING + * ======= + * + * Each ICS has a mutex protecting the information about the IRQ + * sources and avoiding simultaneous deliveries if the same interrupt. + * + * ICP operations are done via a single compare & swap transaction + * (most ICP state fits in the union kvmppc_icp_state) + */ + +/* + * TODO + * ==== + * + * - To speed up resends, keep a bitmap of "resend" set bits in the + * ICS + * + * - Speed up server# -> ICP lookup (array ? hash table ?) + * + * - Make ICS lockless as well, or at least a per-interrupt lock or hashed + * locks array to improve scalability + */ + +/* -- ICS routines -- */ + +static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u32 new_irq); + +static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, + bool report_status) +{ + struct ics_irq_state *state; + struct kvmppc_ics *ics; + u16 src; + + XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); + return -EINVAL; + } + state = &ics->irq_state[src]; + if (!state->exists) + return -EINVAL; + + if (report_status) + return state->asserted; + + /* + * We set state->asserted locklessly. This should be fine as + * we are the only setter, thus concurrent access is undefined + * to begin with. + */ + if (level == KVM_INTERRUPT_SET_LEVEL) + state->asserted = 1; + else if (level == KVM_INTERRUPT_UNSET) { + state->asserted = 0; + return 0; + } + + /* Attempt delivery */ + icp_deliver_irq(xics, NULL, irq); + + return state->asserted; +} + +static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct kvmppc_icp *icp) +{ + int i; + + mutex_lock(&ics->lock); + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct ics_irq_state *state = &ics->irq_state[i]; + + if (!state->resend) + continue; + + XICS_DBG("resend %#x prio %#x\n", state->number, + state->priority); + + mutex_unlock(&ics->lock); + icp_deliver_irq(xics, icp, state->number); + mutex_lock(&ics->lock); + } + + mutex_unlock(&ics->lock); +} + +static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct ics_irq_state *state, + u32 server, u32 priority, u32 saved_priority) +{ + bool deliver; + + mutex_lock(&ics->lock); + + state->server = server; + state->priority = priority; + state->saved_priority = saved_priority; + deliver = false; + if ((state->masked_pending || state->resend) && priority != MASKED) { + state->masked_pending = 0; + deliver = true; + } + + mutex_unlock(&ics->lock); + + return deliver; +} + +int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + icp = kvmppc_xics_find_server(kvm, server); + if (!icp) + return -EINVAL; + + XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", + irq, server, priority, + state->masked_pending, state->resend); + + if (write_xive(xics, ics, state, server, priority, priority)) + icp_deliver_irq(xics, icp, irq); + + return 0; +} + +int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + *server = state->server; + *priority = state->priority; + mutex_unlock(&ics->lock); + + return 0; +} + +int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + icp = kvmppc_xics_find_server(kvm, state->server); + if (!icp) + return -EINVAL; + + if (write_xive(xics, ics, state, state->server, state->saved_priority, + state->saved_priority)) + icp_deliver_irq(xics, icp, irq); + + return 0; +} + +int kvmppc_xics_int_off(struct kvm *kvm, u32 irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + write_xive(xics, ics, state, state->server, MASKED, state->priority); + + return 0; +} + +/* -- ICP routines, including hcalls -- */ + +static inline bool icp_try_update(struct kvmppc_icp *icp, + union kvmppc_icp_state old, + union kvmppc_icp_state new, + bool change_self) +{ + bool success; + + /* Calculate new output value */ + new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); + + /* Attempt atomic update */ + success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; + if (!success) + goto bail; + + XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", + icp->server_num, + old.cppr, old.mfrr, old.pending_pri, old.xisr, + old.need_resend, old.out_ee); + XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", + new.cppr, new.mfrr, new.pending_pri, new.xisr, + new.need_resend, new.out_ee); + /* + * Check for output state update + * + * Note that this is racy since another processor could be updating + * the state already. This is why we never clear the interrupt output + * here, we only ever set it. The clear only happens prior to doing + * an update and only by the processor itself. Currently we do it + * in Accept (H_XIRR) and Up_Cppr (H_XPPR). + * + * We also do not try to figure out whether the EE state has changed, + * we unconditionally set it if the new state calls for it. The reason + * for that is that we opportunistically remove the pending interrupt + * flag when raising CPPR, so we need to set it back here if an + * interrupt is still pending. + */ + if (new.out_ee) { + kvmppc_book3s_queue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + if (!change_self) + kvmppc_fast_vcpu_kick(icp->vcpu); + } + bail: + return success; +} + +static void icp_check_resend(struct kvmppc_xics *xics, + struct kvmppc_icp *icp) +{ + u32 icsid; + + /* Order this load with the test for need_resend in the caller */ + smp_rmb(); + for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) { + struct kvmppc_ics *ics = xics->ics[icsid]; + + if (!test_and_clear_bit(icsid, icp->resend_map)) + continue; + if (!ics) + continue; + ics_check_resend(xics, ics, icp); + } +} + +static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, + u32 *reject) +{ + union kvmppc_icp_state old_state, new_state; + bool success; + + XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority, + icp->server_num); + + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + *reject = 0; + + /* See if we can deliver */ + success = new_state.cppr > priority && + new_state.mfrr > priority && + new_state.pending_pri > priority; + + /* + * If we can, check for a rejection and perform the + * delivery + */ + if (success) { + *reject = new_state.xisr; + new_state.xisr = irq; + new_state.pending_pri = priority; + } else { + /* + * If we failed to deliver we set need_resend + * so a subsequent CPPR state change causes us + * to try a new delivery. + */ + new_state.need_resend = true; + } + + } while (!icp_try_update(icp, old_state, new_state, false)); + + return success; +} + +static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u32 new_irq) +{ + struct ics_irq_state *state; + struct kvmppc_ics *ics; + u32 reject; + u16 src; + + /* + * This is used both for initial delivery of an interrupt and + * for subsequent rejection. + * + * Rejection can be racy vs. resends. We have evaluated the + * rejection in an atomic ICP transaction which is now complete, + * so potentially the ICP can already accept the interrupt again. + * + * So we need to retry the delivery. Essentially the reject path + * boils down to a failed delivery. Always. + * + * Now the interrupt could also have moved to a different target, + * thus we may need to re-do the ICP lookup as well + */ + + again: + /* Get the ICS state and lock it */ + ics = kvmppc_xics_find_ics(xics, new_irq, &src); + if (!ics) { + XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq); + return; + } + state = &ics->irq_state[src]; + + /* Get a lock on the ICS */ + mutex_lock(&ics->lock); + + /* Get our server */ + if (!icp || state->server != icp->server_num) { + icp = kvmppc_xics_find_server(xics->kvm, state->server); + if (!icp) { + pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n", + new_irq, state->server); + goto out; + } + } + + /* Clear the resend bit of that interrupt */ + state->resend = 0; + + /* + * If masked, bail out + * + * Note: PAPR doesn't mention anything about masked pending + * when doing a resend, only when doing a delivery. + * + * However that would have the effect of losing a masked + * interrupt that was rejected and isn't consistent with + * the whole masked_pending business which is about not + * losing interrupts that occur while masked. + * + * I don't differenciate normal deliveries and resends, this + * implementation will differ from PAPR and not lose such + * interrupts. + */ + if (state->priority == MASKED) { + XICS_DBG("irq %#x masked pending\n", new_irq); + state->masked_pending = 1; + goto out; + } + + /* + * Try the delivery, this will set the need_resend flag + * in the ICP as part of the atomic transaction if the + * delivery is not possible. + * + * Note that if successful, the new delivery might have itself + * rejected an interrupt that was "delivered" before we took the + * icp mutex. + * + * In this case we do the whole sequence all over again for the + * new guy. We cannot assume that the rejected interrupt is less + * favored than the new one, and thus doesn't need to be delivered, + * because by the time we exit icp_try_to_deliver() the target + * processor may well have alrady consumed & completed it, and thus + * the rejected interrupt might actually be already acceptable. + */ + if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) { + /* + * Delivery was successful, did we reject somebody else ? + */ + if (reject && reject != XICS_IPI) { + mutex_unlock(&ics->lock); + new_irq = reject; + goto again; + } + } else { + /* + * We failed to deliver the interrupt we need to set the + * resend map bit and mark the ICS state as needing a resend + */ + set_bit(ics->icsid, icp->resend_map); + state->resend = 1; + + /* + * If the need_resend flag got cleared in the ICP some time + * between icp_try_to_deliver() atomic update and now, then + * we know it might have missed the resend_map bit. So we + * retry + */ + smp_mb(); + if (!icp->state.need_resend) { + mutex_unlock(&ics->lock); + goto again; + } + } + out: + mutex_unlock(&ics->lock); +} + +static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u8 new_cppr) +{ + union kvmppc_icp_state old_state, new_state; + bool resend; + + /* + * This handles several related states in one operation: + * + * ICP State: Down_CPPR + * + * Load CPPR with new value and if the XISR is 0 + * then check for resends: + * + * ICP State: Resend + * + * If MFRR is more favored than CPPR, check for IPIs + * and notify ICS of a potential resend. This is done + * asynchronously (when used in real mode, we will have + * to exit here). + * + * We do not handle the complete Check_IPI as documented + * here. In the PAPR, this state will be used for both + * Set_MFRR and Down_CPPR. However, we know that we aren't + * changing the MFRR state here so we don't need to handle + * the case of an MFRR causing a reject of a pending irq, + * this will have been handled when the MFRR was set in the + * first place. + * + * Thus we don't have to handle rejects, only resends. + * + * When implementing real mode for HV KVM, resend will lead to + * a H_TOO_HARD return and the whole transaction will be handled + * in virtual mode. + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Down_CPPR */ + new_state.cppr = new_cppr; + + /* + * Cut down Resend / Check_IPI / IPI + * + * The logic is that we cannot have a pending interrupt + * trumped by an IPI at this point (see above), so we + * know that either the pending interrupt is already an + * IPI (in which case we don't care to override it) or + * it's either more favored than us or non existent + */ + if (new_state.mfrr < new_cppr && + new_state.mfrr <= new_state.pending_pri) { + WARN_ON(new_state.xisr != XICS_IPI && + new_state.xisr != 0); + new_state.pending_pri = new_state.mfrr; + new_state.xisr = XICS_IPI; + } + + /* Latch/clear resend bit */ + resend = new_state.need_resend; + new_state.need_resend = 0; + + } while (!icp_try_update(icp, old_state, new_state, true)); + + /* + * Now handle resend checks. Those are asynchronous to the ICP + * state update in HW (ie bus transactions) so we can handle them + * separately here too + */ + if (resend) + icp_check_resend(xics, icp); +} + +static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 xirr; + + /* First, remove EE from the processor */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + /* + * ICP State: Accept_Interrupt + * + * Return the pending interrupt (if any) along with the + * current CPPR, then clear the XISR & set CPPR to the + * pending priority + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + xirr = old_state.xisr | (((u32)old_state.cppr) << 24); + if (!old_state.xisr) + break; + new_state.cppr = new_state.pending_pri; + new_state.pending_pri = 0xff; + new_state.xisr = 0; + + } while (!icp_try_update(icp, old_state, new_state, true)); + + XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); + + return xirr; +} + +static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp; + u32 reject; + bool resend; + bool local; + + XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", + vcpu->vcpu_id, server, mfrr); + + icp = vcpu->arch.icp; + local = icp->server_num == server; + if (!local) { + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + } + + /* + * ICP state: Set_MFRR + * + * If the CPPR is more favored than the new MFRR, then + * nothing needs to be rejected as there can be no XISR to + * reject. If the MFRR is being made less favored then + * there might be a previously-rejected interrupt needing + * to be resent. + * + * If the CPPR is less favored, then we might be replacing + * an interrupt, and thus need to possibly reject it as in + * + * ICP state: Check_IPI + */ + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + /* Set_MFRR */ + new_state.mfrr = mfrr; + + /* Check_IPI */ + reject = 0; + resend = false; + if (mfrr < new_state.cppr) { + /* Reject a pending interrupt if not an IPI */ + if (mfrr <= new_state.pending_pri) + reject = new_state.xisr; + new_state.pending_pri = mfrr; + new_state.xisr = XICS_IPI; + } + + if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { + resend = new_state.need_resend; + new_state.need_resend = 0; + } + } while (!icp_try_update(icp, old_state, new_state, local)); + + /* Handle reject */ + if (reject && reject != XICS_IPI) + icp_deliver_irq(xics, icp, reject); + + /* Handle resend */ + if (resend) + icp_check_resend(xics, icp); + + return H_SUCCESS; +} + +static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) +{ + union kvmppc_icp_state old_state, new_state; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 reject; + + XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); + + /* + * ICP State: Set_CPPR + * + * We can safely compare the new value with the current + * value outside of the transaction as the CPPR is only + * ever changed by the processor on itself + */ + if (cppr > icp->state.cppr) + icp_down_cppr(xics, icp, cppr); + else if (cppr == icp->state.cppr) + return; + + /* + * ICP State: Up_CPPR + * + * The processor is raising its priority, this can result + * in a rejection of a pending interrupt: + * + * ICP State: Reject_Current + * + * We can remove EE from the current processor, the update + * transaction will set it again if needed + */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + do { + old_state = new_state = ACCESS_ONCE(icp->state); + + reject = 0; + new_state.cppr = cppr; + + if (cppr <= new_state.pending_pri) { + reject = new_state.xisr; + new_state.xisr = 0; + new_state.pending_pri = 0xff; + } + + } while (!icp_try_update(icp, old_state, new_state, true)); + + /* + * Check for rejects. They are handled by doing a new delivery + * attempt (see comments in icp_deliver_irq). + */ + if (reject && reject != XICS_IPI) + icp_deliver_irq(xics, icp, reject); +} + +static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u32 irq = xirr & 0x00ffffff; + u16 src; + + XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); + + /* + * ICP State: EOI + * + * Note: If EOI is incorrectly used by SW to lower the CPPR + * value (ie more favored), we do not check for rejection of + * a pending interrupt, this is a SW error and PAPR sepcifies + * that we don't have to deal with it. + * + * The sending of an EOI to the ICS is handled after the + * CPPR update + * + * ICP State: Down_CPPR which we handle + * in a separate function as it's shared with H_CPPR. + */ + icp_down_cppr(xics, icp, xirr >> 24); + + /* IPIs have no EOI */ + if (irq == XICS_IPI) + return H_SUCCESS; + /* + * EOI handling: If the interrupt is still asserted, we need to + * resend it. We can take a lockless "peek" at the ICS state here. + * + * "Message" interrupts will never have "asserted" set + */ + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); + return H_PARAMETER; + } + state = &ics->irq_state[src]; + + /* Still asserted, resend it */ + if (state->asserted) + icp_deliver_irq(xics, icp, irq); + + return H_SUCCESS; +} + +static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + + XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n", + hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt); + + if (icp->rm_action & XICS_RM_KICK_VCPU) + kvmppc_fast_vcpu_kick(icp->rm_kick_target); + if (icp->rm_action & XICS_RM_CHECK_RESEND) + icp_check_resend(xics, icp); + if (icp->rm_action & XICS_RM_REJECT) + icp_deliver_irq(xics, icp, icp->rm_reject); + + icp->rm_action = 0; + + return H_SUCCESS; +} + +int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + unsigned long res; + int rc = H_SUCCESS; + + /* Check if we have an ICP */ + if (!xics || !vcpu->arch.icp) + return H_HARDWARE; + + /* Check for real mode returning too hard */ + if (xics->real_mode) + return kvmppc_xics_rm_complete(vcpu, req); + + switch (req) { + case H_XIRR: + res = kvmppc_h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 4, res); + break; + case H_CPPR: + kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_EOI: + rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_IPI: + rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + break; + } + + return rc; +} + + +/* -- Initialisation code etc. -- */ + +static int xics_debug_show(struct seq_file *m, void *private) +{ + struct kvmppc_xics *xics = m->private; + struct kvm *kvm = xics->kvm; + struct kvm_vcpu *vcpu; + int icsid, i; + + if (!kvm) + return 0; + + seq_printf(m, "=========\nICP state\n=========\n"); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_icp *icp = vcpu->arch.icp; + union kvmppc_icp_state state; + + if (!icp) + continue; + + state.raw = ACCESS_ONCE(icp->state.raw); + seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n", + icp->server_num, state.xisr, + state.pending_pri, state.cppr, state.mfrr, + state.out_ee, state.need_resend); + } + + for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { + struct kvmppc_ics *ics = xics->ics[icsid]; + + if (!ics) + continue; + + seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n", + icsid); + + mutex_lock(&ics->lock); + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + struct ics_irq_state *irq = &ics->irq_state[i]; + + seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", + irq->number, irq->server, irq->priority, + irq->saved_priority, irq->asserted, + irq->resend, irq->masked_pending); + + } + mutex_unlock(&ics->lock); + } + return 0; +} + +static int xics_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, xics_debug_show, inode->i_private); +} + +static const struct file_operations xics_debug_fops = { + .open = xics_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void xics_debugfs_init(struct kvmppc_xics *xics) +{ + char *name; + + name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); + if (!name) { + pr_err("%s: no memory for name\n", __func__); + return; + } + + xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xics, &xics_debug_fops); + + pr_debug("%s: created %s\n", __func__, name); + kfree(name); +} + +static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, + struct kvmppc_xics *xics, int irq) +{ + struct kvmppc_ics *ics; + int i, icsid; + + icsid = irq >> KVMPPC_XICS_ICS_SHIFT; + + mutex_lock(&kvm->lock); + + /* ICS already exists - somebody else got here first */ + if (xics->ics[icsid]) + goto out; + + /* Create the ICS */ + ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL); + if (!ics) + goto out; + + mutex_init(&ics->lock); + ics->icsid = icsid; + + for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { + ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i; + ics->irq_state[i].priority = MASKED; + ics->irq_state[i].saved_priority = MASKED; + } + smp_wmb(); + xics->ics[icsid] = ics; + + if (icsid > xics->max_icsid) + xics->max_icsid = icsid; + + out: + mutex_unlock(&kvm->lock); + return xics->ics[icsid]; +} + +int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) +{ + struct kvmppc_icp *icp; + + if (!vcpu->kvm->arch.xics) + return -ENODEV; + + if (kvmppc_xics_find_server(vcpu->kvm, server_num)) + return -EEXIST; + + icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); + if (!icp) + return -ENOMEM; + + icp->vcpu = vcpu; + icp->server_num = server_num; + icp->state.mfrr = MASKED; + icp->state.pending_pri = MASKED; + vcpu->arch.icp = icp; + + XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); + + return 0; +} + +u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + union kvmppc_icp_state state; + + if (!icp) + return 0; + state = icp->state; + return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) | + ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) | + ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) | + ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT); +} + +int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + union kvmppc_icp_state old_state, new_state; + struct kvmppc_ics *ics; + u8 cppr, mfrr, pending_pri; + u32 xisr; + u16 src; + bool resend; + + if (!icp || !xics) + return -ENOENT; + + cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; + xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & + KVM_REG_PPC_ICP_XISR_MASK; + mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; + pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT; + + /* Require the new state to be internally consistent */ + if (xisr == 0) { + if (pending_pri != 0xff) + return -EINVAL; + } else if (xisr == XICS_IPI) { + if (pending_pri != mfrr || pending_pri >= cppr) + return -EINVAL; + } else { + if (pending_pri >= mfrr || pending_pri >= cppr) + return -EINVAL; + ics = kvmppc_xics_find_ics(xics, xisr, &src); + if (!ics) + return -EINVAL; + } + + new_state.raw = 0; + new_state.cppr = cppr; + new_state.xisr = xisr; + new_state.mfrr = mfrr; + new_state.pending_pri = pending_pri; + + /* + * Deassert the CPU interrupt request. + * icp_try_update will reassert it if necessary. + */ + kvmppc_book3s_dequeue_irqprio(icp->vcpu, + BOOK3S_INTERRUPT_EXTERNAL_LEVEL); + + /* + * Note that if we displace an interrupt from old_state.xisr, + * we don't mark it as rejected. We expect userspace to set + * the state of the interrupt sources to be consistent with + * the ICP states (either before or afterwards, which doesn't + * matter). We do handle resends due to CPPR becoming less + * favoured because that is necessary to end up with a + * consistent state in the situation where userspace restores + * the ICS states before the ICP states. + */ + do { + old_state = ACCESS_ONCE(icp->state); + + if (new_state.mfrr <= old_state.mfrr) { + resend = false; + new_state.need_resend = old_state.need_resend; + } else { + resend = old_state.need_resend; + new_state.need_resend = 0; + } + } while (!icp_try_update(icp, old_state, new_state, false)); + + if (resend) + icp_check_resend(xics, icp); + + return 0; +} + +static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) +{ + int ret; + struct kvmppc_ics *ics; + struct ics_irq_state *irqp; + u64 __user *ubufp = (u64 __user *) addr; + u16 idx; + u64 val, prio; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return -ENOENT; + + irqp = &ics->irq_state[idx]; + mutex_lock(&ics->lock); + ret = -ENOENT; + if (irqp->exists) { + val = irqp->server; + prio = irqp->priority; + if (prio == MASKED) { + val |= KVM_XICS_MASKED; + prio = irqp->saved_priority; + } + val |= prio << KVM_XICS_PRIORITY_SHIFT; + if (irqp->asserted) + val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; + else if (irqp->masked_pending || irqp->resend) + val |= KVM_XICS_PENDING; + ret = 0; + } + mutex_unlock(&ics->lock); + + if (!ret && put_user(val, ubufp)) + ret = -EFAULT; + + return ret; +} + +static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) +{ + struct kvmppc_ics *ics; + struct ics_irq_state *irqp; + u64 __user *ubufp = (u64 __user *) addr; + u16 idx; + u64 val; + u8 prio; + u32 server; + + if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) + return -ENOENT; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) { + ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); + if (!ics) + return -ENOMEM; + } + irqp = &ics->irq_state[idx]; + if (get_user(val, ubufp)) + return -EFAULT; + + server = val & KVM_XICS_DESTINATION_MASK; + prio = val >> KVM_XICS_PRIORITY_SHIFT; + if (prio != MASKED && + kvmppc_xics_find_server(xics->kvm, server) == NULL) + return -EINVAL; + + mutex_lock(&ics->lock); + irqp->server = server; + irqp->saved_priority = prio; + if (val & KVM_XICS_MASKED) + prio = MASKED; + irqp->priority = prio; + irqp->resend = 0; + irqp->masked_pending = 0; + irqp->asserted = 0; + if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) + irqp->asserted = 1; + irqp->exists = 1; + mutex_unlock(&ics->lock); + + if (val & KVM_XICS_PENDING) + icp_deliver_irq(xics, NULL, irqp->number); + + return 0; +} + +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, + bool line_status) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + + return ics_deliver_irq(xics, irq, level, line_status); +} + +static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct kvmppc_xics *xics = dev->private; + + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + return xics_set_source(xics, attr->attr, attr->addr); + } + return -ENXIO; +} + +static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct kvmppc_xics *xics = dev->private; + + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + return xics_get_source(xics, attr->attr, attr->addr); + } + return -ENXIO; +} + +static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_XICS_GRP_SOURCES: + if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && + attr->attr < KVMPPC_XICS_NR_IRQS) + return 0; + break; + } + return -ENXIO; +} + +static void kvmppc_xics_free(struct kvm_device *dev) +{ + struct kvmppc_xics *xics = dev->private; + int i; + struct kvm *kvm = xics->kvm; + + debugfs_remove(xics->dentry); + + if (kvm) + kvm->arch.xics = NULL; + + for (i = 0; i <= xics->max_icsid; i++) + kfree(xics->ics[i]); + kfree(xics); + kfree(dev); +} + +static int kvmppc_xics_create(struct kvm_device *dev, u32 type) +{ + struct kvmppc_xics *xics; + struct kvm *kvm = dev->kvm; + int ret = 0; + + xics = kzalloc(sizeof(*xics), GFP_KERNEL); + if (!xics) + return -ENOMEM; + + dev->private = xics; + xics->dev = dev; + xics->kvm = kvm; + + /* Already there ? */ + mutex_lock(&kvm->lock); + if (kvm->arch.xics) + ret = -EEXIST; + else + kvm->arch.xics = xics; + mutex_unlock(&kvm->lock); + + if (ret) + return ret; + + xics_debugfs_init(xics); + +#ifdef CONFIG_KVM_BOOK3S_64_HV + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + /* Enable real mode support */ + xics->real_mode = ENABLE_REALMODE; + xics->real_mode_dbg = DEBUG_REALMODE; + } +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + + return 0; +} + +struct kvm_device_ops kvm_xics_ops = { + .name = "kvm-xics", + .create = kvmppc_xics_create, + .destroy = kvmppc_xics_free, + .set_attr = xics_set_attr, + .get_attr = xics_get_attr, + .has_attr = xics_has_attr, +}; + +int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 xcpu) +{ + struct kvmppc_xics *xics = dev->private; + int r = -EBUSY; + + if (dev->ops != &kvm_xics_ops) + return -EPERM; + if (xics->kvm != vcpu->kvm) + return -EPERM; + if (vcpu->arch.irq_type) + return -EBUSY; + + r = kvmppc_xics_create_icp(vcpu, xcpu); + if (!r) + vcpu->arch.irq_type = KVMPPC_IRQ_XICS; + + return r; +} + +void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.icp) + return; + kfree(vcpu->arch.icp); + vcpu->arch.icp = NULL; + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; +} diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h new file mode 100644 index 000000000000..dd9326c5c19b --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.h @@ -0,0 +1,130 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef _KVM_PPC_BOOK3S_XICS_H +#define _KVM_PPC_BOOK3S_XICS_H + +/* + * We use a two-level tree to store interrupt source information. + * There are up to 1024 ICS nodes, each of which can represent + * 1024 sources. + */ +#define KVMPPC_XICS_MAX_ICS_ID 1023 +#define KVMPPC_XICS_ICS_SHIFT 10 +#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT) +#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1) + +/* + * Interrupt source numbers below this are reserved, for example + * 0 is "no interrupt", and 2 is used for IPIs. + */ +#define KVMPPC_XICS_FIRST_IRQ 16 +#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \ + KVMPPC_XICS_IRQ_PER_ICS) + +/* Priority value to use for disabling an interrupt */ +#define MASKED 0xff + +/* State for one irq source */ +struct ics_irq_state { + u32 number; + u32 server; + u8 priority; + u8 saved_priority; + u8 resend; + u8 masked_pending; + u8 asserted; /* Only for LSI */ + u8 exists; +}; + +/* Atomic ICP state, updated with a single compare & swap */ +union kvmppc_icp_state { + unsigned long raw; + struct { + u8 out_ee:1; + u8 need_resend:1; + u8 cppr; + u8 mfrr; + u8 pending_pri; + u32 xisr; + }; +}; + +/* One bit per ICS */ +#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1) + +struct kvmppc_icp { + struct kvm_vcpu *vcpu; + unsigned long server_num; + union kvmppc_icp_state state; + unsigned long resend_map[ICP_RESEND_MAP_SIZE]; + + /* Real mode might find something too hard, here's the action + * it might request from virtual mode + */ +#define XICS_RM_KICK_VCPU 0x1 +#define XICS_RM_CHECK_RESEND 0x2 +#define XICS_RM_REJECT 0x4 + u32 rm_action; + struct kvm_vcpu *rm_kick_target; + u32 rm_reject; + + /* Debug stuff for real mode */ + union kvmppc_icp_state rm_dbgstate; + struct kvm_vcpu *rm_dbgtgt; +}; + +struct kvmppc_ics { + struct mutex lock; + u16 icsid; + struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; +}; + +struct kvmppc_xics { + struct kvm *kvm; + struct kvm_device *dev; + struct dentry *dentry; + u32 max_icsid; + bool real_mode; + bool real_mode_dbg; + struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1]; +}; + +static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, + u32 nr) +{ + struct kvm_vcpu *vcpu = NULL; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num) + return vcpu->arch.icp; + } + return NULL; +} + +static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, + u32 irq, u16 *source) +{ + u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT; + u16 src = irq & KVMPPC_XICS_SRC_MASK; + struct kvmppc_ics *ics; + + if (source) + *source = src; + if (icsid > KVMPPC_XICS_MAX_ICS_ID) + return NULL; + ics = xics->ics[icsid]; + if (!ics) + return NULL; + return ics; +} + + +#endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 020923e43134..1020119226db 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, prio); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); @@ -347,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; } - if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) + if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags) update_epr = true; switch (priority) { @@ -428,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); - if (update_epr == true) - kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + if (update_epr == true) { + if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) + kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) { + BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC); + kvmppc_mpic_set_epr(vcpu); + } + } new_msr &= msr_mask; #if defined(CONFIG_64BIT) @@ -746,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_core_queue_program(vcpu, ESR_PIL); return RESUME_HOST; + case EMULATE_EXIT_USER: + return RESUME_HOST; + default: BUG(); } @@ -1148,6 +1156,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } +static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr) +{ + u32 old_tsr = vcpu->arch.tsr; + + vcpu->arch.tsr = new_tsr; + + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + + update_timer_ints(vcpu); +} + /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { @@ -1287,16 +1307,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, kvmppc_emulate_dec(vcpu); } - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { - u32 old_tsr = vcpu->arch.tsr; - - vcpu->arch.tsr = sregs->u.e.tsr; - - if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) - arm_next_watchdog(vcpu); - - update_timer_ints(vcpu); - } + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) + kvmppc_set_tsr(vcpu, sregs->u.e.tsr); return 0; } @@ -1409,84 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - int r = -EINVAL; + int r = 0; + union kvmppc_one_reg val; + int size; + long int i; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; switch (reg->id) { case KVM_REG_PPC_IAC1: case KVM_REG_PPC_IAC2: case KVM_REG_PPC_IAC3: - case KVM_REG_PPC_IAC4: { - int iac = reg->id - KVM_REG_PPC_IAC1; - r = copy_to_user((u64 __user *)(long)reg->addr, - &vcpu->arch.dbg_reg.iac[iac], sizeof(u64)); + case KVM_REG_PPC_IAC4: + i = reg->id - KVM_REG_PPC_IAC1; + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]); break; - } case KVM_REG_PPC_DAC1: - case KVM_REG_PPC_DAC2: { - int dac = reg->id - KVM_REG_PPC_DAC1; - r = copy_to_user((u64 __user *)(long)reg->addr, - &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); + case KVM_REG_PPC_DAC2: + i = reg->id - KVM_REG_PPC_DAC1; + val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]); break; - } case KVM_REG_PPC_EPR: { u32 epr = get_guest_epr(vcpu); - r = put_user(epr, (u32 __user *)(long)reg->addr); + val = get_reg_val(reg->id, epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: - r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); + val = get_reg_val(reg->id, vcpu->arch.epcr); break; #endif + case KVM_REG_PPC_TCR: + val = get_reg_val(reg->id, vcpu->arch.tcr); + break; + case KVM_REG_PPC_TSR: + val = get_reg_val(reg->id, vcpu->arch.tsr); + break; + case KVM_REG_PPC_DEBUG_INST: + val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); + break; default: + r = kvmppc_get_one_reg(vcpu, reg->id, &val); break; } + + if (r) + return r; + + if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) + r = -EFAULT; + return r; } int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - int r = -EINVAL; + int r = 0; + union kvmppc_one_reg val; + int size; + long int i; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) + return -EFAULT; switch (reg->id) { case KVM_REG_PPC_IAC1: case KVM_REG_PPC_IAC2: case KVM_REG_PPC_IAC3: - case KVM_REG_PPC_IAC4: { - int iac = reg->id - KVM_REG_PPC_IAC1; - r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], - (u64 __user *)(long)reg->addr, sizeof(u64)); + case KVM_REG_PPC_IAC4: + i = reg->id - KVM_REG_PPC_IAC1; + vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val); break; - } case KVM_REG_PPC_DAC1: - case KVM_REG_PPC_DAC2: { - int dac = reg->id - KVM_REG_PPC_DAC1; - r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], - (u64 __user *)(long)reg->addr, sizeof(u64)); + case KVM_REG_PPC_DAC2: + i = reg->id - KVM_REG_PPC_DAC1; + vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val); break; - } case KVM_REG_PPC_EPR: { - u32 new_epr; - r = get_user(new_epr, (u32 __user *)(long)reg->addr); - if (!r) - kvmppc_set_epr(vcpu, new_epr); + u32 new_epr = set_reg_val(reg->id, val); + kvmppc_set_epr(vcpu, new_epr); break; } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: { - u32 new_epcr; - r = get_user(new_epcr, (u32 __user *)(long)reg->addr); - if (r == 0) - kvmppc_set_epcr(vcpu, new_epcr); + u32 new_epcr = set_reg_val(reg->id, val); + kvmppc_set_epcr(vcpu, new_epcr); break; } #endif + case KVM_REG_PPC_OR_TSR: { + u32 tsr_bits = set_reg_val(reg->id, val); + kvmppc_set_tsr_bits(vcpu, tsr_bits); + break; + } + case KVM_REG_PPC_CLEAR_TSR: { + u32 tsr_bits = set_reg_val(reg->id, val); + kvmppc_clr_tsr_bits(vcpu, tsr_bits); + break; + } + case KVM_REG_PPC_TSR: { + u32 tsr = set_reg_val(reg->id, val); + kvmppc_set_tsr(vcpu, tsr); + break; + } + case KVM_REG_PPC_TCR: { + u32 tcr = set_reg_val(reg->id, val); + kvmppc_set_tcr(vcpu, tcr); + break; + } default: + r = kvmppc_set_one_reg(vcpu, reg->id, &val); break; } + return r; } +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + return -EINVAL; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -ENOTSUPP; @@ -1531,7 +1593,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old) + const struct kvm_memory_slot *old) { } diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index f4bb55c96517..2c6deb5ef2fe 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -54,8 +54,7 @@ (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ (1<<BOOKE_INTERRUPT_ALIGNMENT)) -.macro KVM_HANDLER ivor_nr scratch srr0 -_GLOBAL(kvmppc_handler_\ivor_nr) +.macro __KVM_HANDLER ivor_nr scratch srr0 /* Get pointer to vcpu and record exit number. */ mtspr \scratch , r4 mfspr r4, SPRN_SPRG_THREAD @@ -76,6 +75,43 @@ _GLOBAL(kvmppc_handler_\ivor_nr) bctr .endm +.macro KVM_HANDLER ivor_nr scratch srr0 +_GLOBAL(kvmppc_handler_\ivor_nr) + __KVM_HANDLER \ivor_nr \scratch \srr0 +.endm + +.macro KVM_DBG_HANDLER ivor_nr scratch srr0 +_GLOBAL(kvmppc_handler_\ivor_nr) + mtspr \scratch, r4 + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) + stw r3, VCPU_CRIT_SAVE(r4) + mfcr r3 + mfspr r4, SPRN_CSRR1 + andi. r4, r4, MSR_PR + bne 1f + /* debug interrupt happened in enter/exit path */ + mfspr r4, SPRN_CSRR1 + rlwinm r4, r4, 0, ~MSR_DE + mtspr SPRN_CSRR1, r4 + lis r4, 0xffff + ori r4, r4, 0xffff + mtspr SPRN_DBSR, r4 + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) + mtcr r3 + lwz r3, VCPU_CRIT_SAVE(r4) + mfspr r4, \scratch + rfci +1: /* debug interrupt happened in guest */ + mtcr r3 + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) + lwz r3, VCPU_CRIT_SAVE(r4) + mfspr r4, \scratch + __KVM_HANDLER \ivor_nr \scratch \srr0 +.endm + .macro KVM_HANDLER_ADDR ivor_nr .long kvmppc_handler_\ivor_nr .endm @@ -100,7 +136,7 @@ KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 -KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 +KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 6dd4de7802bf..ce6b73c29612 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return kvmppc_set_sregs_ivor(vcpu, sregs); } +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; +} + +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 33db48a8ce24..c2e5e98453a6 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -23,6 +23,10 @@ #include <asm/mmu-book3e.h> #include <asm/tlb.h> +enum vcpu_ftr { + VCPU_FTR_MMU_V2 +}; + #define E500_PID_NUM 3 #define E500_TLB_NUM 2 @@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val); +int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val); #ifdef CONFIG_KVM_E500V2 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) #define get_tlb_sts(gtlbe) (MAS1_TS) #endif /* !BOOKE_HV */ +static inline bool has_feature(const struct kvm_vcpu *vcpu, + enum vcpu_ftr ftr) +{ + bool has_ftr; + switch (ftr) { + case VCPU_FTR_MMU_V2: + has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2); + break; + default: + return false; + } + return has_ftr; +} + #endif /* KVM_E500_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e78f353a836a..b10a01243abd 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_TLB1CFG: *spr_val = vcpu->arch.tlbcfg[1]; break; + case SPRN_TLB0PS: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + *spr_val = vcpu->arch.tlbps[0]; + break; + case SPRN_TLB1PS: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + *spr_val = vcpu->arch.tlbps[1]; + break; case SPRN_L1CSR0: *spr_val = vcpu_e500->l1csr0; break; @@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_MMUCFG: *spr_val = vcpu->arch.mmucfg; break; + case SPRN_EPTCFG: + if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) + return EMULATE_FAIL; + /* + * Legacy Linux guests access EPTCFG register even if the E.PT + * category is disabled in the VM. Give them a chance to live. + */ + *spr_val = vcpu->arch.eptcfg; + break; /* extra exceptions */ case SPRN_IVOR32: diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 5c4475983f78..c41a5a96b558 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return 0; } +int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = 0; + long int i; + + switch (id) { + case KVM_REG_PPC_MAS0: + *val = get_reg_val(id, vcpu->arch.shared->mas0); + break; + case KVM_REG_PPC_MAS1: + *val = get_reg_val(id, vcpu->arch.shared->mas1); + break; + case KVM_REG_PPC_MAS2: + *val = get_reg_val(id, vcpu->arch.shared->mas2); + break; + case KVM_REG_PPC_MAS7_3: + *val = get_reg_val(id, vcpu->arch.shared->mas7_3); + break; + case KVM_REG_PPC_MAS4: + *val = get_reg_val(id, vcpu->arch.shared->mas4); + break; + case KVM_REG_PPC_MAS6: + *val = get_reg_val(id, vcpu->arch.shared->mas6); + break; + case KVM_REG_PPC_MMUCFG: + *val = get_reg_val(id, vcpu->arch.mmucfg); + break; + case KVM_REG_PPC_EPTCFG: + *val = get_reg_val(id, vcpu->arch.eptcfg); + break; + case KVM_REG_PPC_TLB0CFG: + case KVM_REG_PPC_TLB1CFG: + case KVM_REG_PPC_TLB2CFG: + case KVM_REG_PPC_TLB3CFG: + i = id - KVM_REG_PPC_TLB0CFG; + *val = get_reg_val(id, vcpu->arch.tlbcfg[i]); + break; + case KVM_REG_PPC_TLB0PS: + case KVM_REG_PPC_TLB1PS: + case KVM_REG_PPC_TLB2PS: + case KVM_REG_PPC_TLB3PS: + i = id - KVM_REG_PPC_TLB0PS; + *val = get_reg_val(id, vcpu->arch.tlbps[i]); + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = 0; + long int i; + + switch (id) { + case KVM_REG_PPC_MAS0: + vcpu->arch.shared->mas0 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS1: + vcpu->arch.shared->mas1 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS2: + vcpu->arch.shared->mas2 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS7_3: + vcpu->arch.shared->mas7_3 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS4: + vcpu->arch.shared->mas4 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MAS6: + vcpu->arch.shared->mas6 = set_reg_val(id, *val); + break; + /* Only allow MMU registers to be set to the config supported by KVM */ + case KVM_REG_PPC_MMUCFG: { + u32 reg = set_reg_val(id, *val); + if (reg != vcpu->arch.mmucfg) + r = -EINVAL; + break; + } + case KVM_REG_PPC_EPTCFG: { + u32 reg = set_reg_val(id, *val); + if (reg != vcpu->arch.eptcfg) + r = -EINVAL; + break; + } + case KVM_REG_PPC_TLB0CFG: + case KVM_REG_PPC_TLB1CFG: + case KVM_REG_PPC_TLB2CFG: + case KVM_REG_PPC_TLB3CFG: { + /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */ + u32 reg = set_reg_val(id, *val); + i = id - KVM_REG_PPC_TLB0CFG; + if (reg != vcpu->arch.tlbcfg[i]) + r = -EINVAL; + break; + } + case KVM_REG_PPC_TLB0PS: + case KVM_REG_PPC_TLB1PS: + case KVM_REG_PPC_TLB2PS: + case KVM_REG_PPC_TLB3PS: { + u32 reg = set_reg_val(id, *val); + i = id - KVM_REG_PPC_TLB0PS; + if (reg != vcpu->arch.tlbps[i]) + r = -EINVAL; + break; + } + default: + r = -EINVAL; + break; + } + + return r; +} + +static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_params *params) +{ + vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + if (params->tlb_sizes[0] <= 2048) + vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0]; + vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; + + vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1]; + vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; + return 0; +} + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg) { @@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; - vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; - - vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - if (params.tlb_sizes[0] <= 2048) - vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0]; - vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; - - vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1]; - vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; + /* Update vcpu's MMU geometry based on SW_TLB input */ + vcpu_mmu_geometry_update(vcpu, ¶ms); vcpu_e500->shared_tlb_pages = pages; vcpu_e500->num_shared_tlb_pages = num_pages; @@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, return 0; } +/* Vcpu's MMU default configuration */ +static int vcpu_mmu_init(struct kvm_vcpu *vcpu, + struct kvmppc_e500_tlb_params *params) +{ + /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/ + vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; + + /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/ + vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & + ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[0] |= params[0].entries; + vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT; + + vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & + ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[1] |= params[1].entries; + vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT; + + if (has_feature(vcpu, VCPU_FTR_MMU_V2)) { + vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS); + vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS); + + vcpu->arch.mmucfg &= ~MMUCFG_LRAT; + + /* Guest mmu emulation currently doesn't handle E.PT */ + vcpu->arch.eptcfg = 0; + vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT; + vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND; + } + + return 0; +} + int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; @@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->g2h_tlb1_map) goto err; - /* Init TLB configuration register */ - vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & - ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; - vcpu->arch.tlbcfg[0] |= - vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; - - vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & - ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries; - vcpu->arch.tlbcfg[1] |= - vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; + vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params); kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 2f4baa074b2e..753cc99eff2b 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -177,6 +177,8 @@ int kvmppc_core_check_processor_compat(void) r = 0; else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) r = 0; + else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) + r = 0; else r = -ENOTSUPP; @@ -260,6 +262,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return kvmppc_set_sregs_ivor(vcpu, sregs); } +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); + return r; +} + +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, + union kvmppc_one_reg *val) +{ + int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); + return r; +} + struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvmppc_vcpu_e500 *vcpu_e500; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 7a73b6f72a8b..631a2650e4e4 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -38,6 +38,7 @@ #define OP_31_XOP_TRAP 4 #define OP_31_XOP_LWZX 23 +#define OP_31_XOP_DCBST 54 #define OP_31_XOP_TRAP_64 68 #define OP_31_XOP_DCBF 86 #define OP_31_XOP_LBZX 87 @@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; + case OP_31_XOP_DCBST: case OP_31_XOP_DCBF: case OP_31_XOP_DCBI: /* Do nothing. The guest is performing dcbi because diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h new file mode 100644 index 000000000000..5a9a10b90762 --- /dev/null +++ b/arch/powerpc/kvm/irq.h @@ -0,0 +1,20 @@ +#ifndef __IRQ_H +#define __IRQ_H + +#include <linux/kvm_host.h> + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm->arch.mpic != NULL); +#endif +#ifdef CONFIG_KVM_XICS + ret = ret || (kvm->arch.xics != NULL); +#endif + smp_rmb(); + return ret; +} + +#endif diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c new file mode 100644 index 000000000000..2861ae9eaae6 --- /dev/null +++ b/arch/powerpc/kvm/mpic.c @@ -0,0 +1,1853 @@ +/* + * OpenPIC emulation + * + * Copyright (c) 2004 Jocelyn Mayer + * 2011 Alexander Graf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/kvm_host.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/anon_inodes.h> +#include <asm/uaccess.h> +#include <asm/mpic.h> +#include <asm/kvm_para.h> +#include <asm/kvm_host.h> +#include <asm/kvm_ppc.h> +#include "iodev.h" + +#define MAX_CPU 32 +#define MAX_SRC 256 +#define MAX_TMR 4 +#define MAX_IPI 4 +#define MAX_MSI 8 +#define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR) +#define VID 0x03 /* MPIC version ID */ + +/* OpenPIC capability flags */ +#define OPENPIC_FLAG_IDR_CRIT (1 << 0) +#define OPENPIC_FLAG_ILR (2 << 0) + +/* OpenPIC address map */ +#define OPENPIC_REG_SIZE 0x40000 +#define OPENPIC_GLB_REG_START 0x0 +#define OPENPIC_GLB_REG_SIZE 0x10F0 +#define OPENPIC_TMR_REG_START 0x10F0 +#define OPENPIC_TMR_REG_SIZE 0x220 +#define OPENPIC_MSI_REG_START 0x1600 +#define OPENPIC_MSI_REG_SIZE 0x200 +#define OPENPIC_SUMMARY_REG_START 0x3800 +#define OPENPIC_SUMMARY_REG_SIZE 0x800 +#define OPENPIC_SRC_REG_START 0x10000 +#define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20) +#define OPENPIC_CPU_REG_START 0x20000 +#define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000)) + +struct fsl_mpic_info { + int max_ext; +}; + +static struct fsl_mpic_info fsl_mpic_20 = { + .max_ext = 12, +}; + +static struct fsl_mpic_info fsl_mpic_42 = { + .max_ext = 12, +}; + +#define FRR_NIRQ_SHIFT 16 +#define FRR_NCPU_SHIFT 8 +#define FRR_VID_SHIFT 0 + +#define VID_REVISION_1_2 2 +#define VID_REVISION_1_3 3 + +#define VIR_GENERIC 0x00000000 /* Generic Vendor ID */ + +#define GCR_RESET 0x80000000 +#define GCR_MODE_PASS 0x00000000 +#define GCR_MODE_MIXED 0x20000000 +#define GCR_MODE_PROXY 0x60000000 + +#define TBCR_CI 0x80000000 /* count inhibit */ +#define TCCR_TOG 0x80000000 /* toggles when decrement to zero */ + +#define IDR_EP_SHIFT 31 +#define IDR_EP_MASK (1 << IDR_EP_SHIFT) +#define IDR_CI0_SHIFT 30 +#define IDR_CI1_SHIFT 29 +#define IDR_P1_SHIFT 1 +#define IDR_P0_SHIFT 0 + +#define ILR_INTTGT_MASK 0x000000ff +#define ILR_INTTGT_INT 0x00 +#define ILR_INTTGT_CINT 0x01 /* critical */ +#define ILR_INTTGT_MCP 0x02 /* machine check */ +#define NUM_OUTPUTS 3 + +#define MSIIR_OFFSET 0x140 +#define MSIIR_SRS_SHIFT 29 +#define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT) +#define MSIIR_IBS_SHIFT 24 +#define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT) + +static int get_current_cpu(void) +{ +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) + struct kvm_vcpu *vcpu = current->thread.kvm_vcpu; + return vcpu ? vcpu->arch.irq_cpu_id : -1; +#else + /* XXX */ + return -1; +#endif +} + +static int openpic_cpu_write_internal(void *opaque, gpa_t addr, + u32 val, int idx); +static int openpic_cpu_read_internal(void *opaque, gpa_t addr, + u32 *ptr, int idx); + +enum irq_type { + IRQ_TYPE_NORMAL = 0, + IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */ + IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */ +}; + +struct irq_queue { + /* Round up to the nearest 64 IRQs so that the queue length + * won't change when moving between 32 and 64 bit hosts. + */ + unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)]; + int next; + int priority; +}; + +struct irq_source { + uint32_t ivpr; /* IRQ vector/priority register */ + uint32_t idr; /* IRQ destination register */ + uint32_t destmask; /* bitmap of CPU destinations */ + int last_cpu; + int output; /* IRQ level, e.g. ILR_INTTGT_INT */ + int pending; /* TRUE if IRQ is pending */ + enum irq_type type; + bool level:1; /* level-triggered */ + bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */ +}; + +#define IVPR_MASK_SHIFT 31 +#define IVPR_MASK_MASK (1 << IVPR_MASK_SHIFT) +#define IVPR_ACTIVITY_SHIFT 30 +#define IVPR_ACTIVITY_MASK (1 << IVPR_ACTIVITY_SHIFT) +#define IVPR_MODE_SHIFT 29 +#define IVPR_MODE_MASK (1 << IVPR_MODE_SHIFT) +#define IVPR_POLARITY_SHIFT 23 +#define IVPR_POLARITY_MASK (1 << IVPR_POLARITY_SHIFT) +#define IVPR_SENSE_SHIFT 22 +#define IVPR_SENSE_MASK (1 << IVPR_SENSE_SHIFT) + +#define IVPR_PRIORITY_MASK (0xF << 16) +#define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16)) +#define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask) + +/* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */ +#define IDR_EP 0x80000000 /* external pin */ +#define IDR_CI 0x40000000 /* critical interrupt */ + +struct irq_dest { + struct kvm_vcpu *vcpu; + + int32_t ctpr; /* CPU current task priority */ + struct irq_queue raised; + struct irq_queue servicing; + + /* Count of IRQ sources asserting on non-INT outputs */ + uint32_t outputs_active[NUM_OUTPUTS]; +}; + +#define MAX_MMIO_REGIONS 10 + +struct openpic { + struct kvm *kvm; + struct kvm_device *dev; + struct kvm_io_device mmio; + const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS]; + int num_mmio_regions; + + gpa_t reg_base; + spinlock_t lock; + + /* Behavior control */ + struct fsl_mpic_info *fsl; + uint32_t model; + uint32_t flags; + uint32_t nb_irqs; + uint32_t vid; + uint32_t vir; /* Vendor identification register */ + uint32_t vector_mask; + uint32_t tfrr_reset; + uint32_t ivpr_reset; + uint32_t idr_reset; + uint32_t brr1; + uint32_t mpic_mode_mask; + + /* Global registers */ + uint32_t frr; /* Feature reporting register */ + uint32_t gcr; /* Global configuration register */ + uint32_t pir; /* Processor initialization register */ + uint32_t spve; /* Spurious vector register */ + uint32_t tfrr; /* Timer frequency reporting register */ + /* Source registers */ + struct irq_source src[MAX_IRQ]; + /* Local registers per output pin */ + struct irq_dest dst[MAX_CPU]; + uint32_t nb_cpus; + /* Timer registers */ + struct { + uint32_t tccr; /* Global timer current count register */ + uint32_t tbcr; /* Global timer base count register */ + } timers[MAX_TMR]; + /* Shared MSI registers */ + struct { + uint32_t msir; /* Shared Message Signaled Interrupt Register */ + } msi[MAX_MSI]; + uint32_t max_irq; + uint32_t irq_ipi0; + uint32_t irq_tim0; + uint32_t irq_msi; +}; + + +static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst, + int output) +{ + struct kvm_interrupt irq = { + .irq = KVM_INTERRUPT_SET_LEVEL, + }; + + if (!dst->vcpu) { + pr_debug("%s: destination cpu %d does not exist\n", + __func__, (int)(dst - &opp->dst[0])); + return; + } + + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, + output); + + if (output != ILR_INTTGT_INT) /* TODO */ + return; + + kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq); +} + +static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst, + int output) +{ + if (!dst->vcpu) { + pr_debug("%s: destination cpu %d does not exist\n", + __func__, (int)(dst - &opp->dst[0])); + return; + } + + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, + output); + + if (output != ILR_INTTGT_INT) /* TODO */ + return; + + kvmppc_core_dequeue_external(dst->vcpu); +} + +static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ) +{ + set_bit(n_IRQ, q->queue); +} + +static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ) +{ + clear_bit(n_IRQ, q->queue); +} + +static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ) +{ + return test_bit(n_IRQ, q->queue); +} + +static void IRQ_check(struct openpic *opp, struct irq_queue *q) +{ + int irq = -1; + int next = -1; + int priority = -1; + + for (;;) { + irq = find_next_bit(q->queue, opp->max_irq, irq + 1); + if (irq == opp->max_irq) + break; + + pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n", + irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority); + + if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) { + next = irq; + priority = IVPR_PRIORITY(opp->src[irq].ivpr); + } + } + + q->next = next; + q->priority = priority; +} + +static int IRQ_get_next(struct openpic *opp, struct irq_queue *q) +{ + /* XXX: optimize */ + IRQ_check(opp, q); + + return q->next; +} + +static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, + bool active, bool was_active) +{ + struct irq_dest *dst; + struct irq_source *src; + int priority; + + dst = &opp->dst[n_CPU]; + src = &opp->src[n_IRQ]; + + pr_debug("%s: IRQ %d active %d was %d\n", + __func__, n_IRQ, active, was_active); + + if (src->output != ILR_INTTGT_INT) { + pr_debug("%s: output %d irq %d active %d was %d count %d\n", + __func__, src->output, n_IRQ, active, was_active, + dst->outputs_active[src->output]); + + /* On Freescale MPIC, critical interrupts ignore priority, + * IACK, EOI, etc. Before MPIC v4.1 they also ignore + * masking. + */ + if (active) { + if (!was_active && + dst->outputs_active[src->output]++ == 0) { + pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n", + __func__, src->output, n_CPU, n_IRQ); + mpic_irq_raise(opp, dst, src->output); + } + } else { + if (was_active && + --dst->outputs_active[src->output] == 0) { + pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n", + __func__, src->output, n_CPU, n_IRQ); + mpic_irq_lower(opp, dst, src->output); + } + } + + return; + } + + priority = IVPR_PRIORITY(src->ivpr); + + /* Even if the interrupt doesn't have enough priority, + * it is still raised, in case ctpr is lowered later. + */ + if (active) + IRQ_setbit(&dst->raised, n_IRQ); + else + IRQ_resetbit(&dst->raised, n_IRQ); + + IRQ_check(opp, &dst->raised); + + if (active && priority <= dst->ctpr) { + pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n", + __func__, n_IRQ, priority, dst->ctpr, n_CPU); + active = 0; + } + + if (active) { + if (IRQ_get_next(opp, &dst->servicing) >= 0 && + priority <= dst->servicing.priority) { + pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n", + __func__, n_IRQ, dst->servicing.next, n_CPU); + } else { + pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n", + __func__, n_CPU, n_IRQ, dst->raised.next); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); + } + } else { + IRQ_get_next(opp, &dst->servicing); + if (dst->raised.priority > dst->ctpr && + dst->raised.priority > dst->servicing.priority) { + pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n", + __func__, n_IRQ, dst->raised.next, + dst->raised.priority, dst->ctpr, + dst->servicing.priority, n_CPU); + /* IRQ line stays asserted */ + } else { + pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n", + __func__, n_IRQ, dst->ctpr, + dst->servicing.priority, n_CPU); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); + } + } +} + +/* update pic state because registers for n_IRQ have changed value */ +static void openpic_update_irq(struct openpic *opp, int n_IRQ) +{ + struct irq_source *src; + bool active, was_active; + int i; + + src = &opp->src[n_IRQ]; + active = src->pending; + + if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) { + /* Interrupt source is disabled */ + pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ); + active = false; + } + + was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK); + + /* + * We don't have a similar check for already-active because + * ctpr may have changed and we need to withdraw the interrupt. + */ + if (!active && !was_active) { + pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ); + return; + } + + if (active) + src->ivpr |= IVPR_ACTIVITY_MASK; + else + src->ivpr &= ~IVPR_ACTIVITY_MASK; + + if (src->destmask == 0) { + /* No target */ + pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ); + return; + } + + if (src->destmask == (1 << src->last_cpu)) { + /* Only one CPU is allowed to receive this IRQ */ + IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active); + } else if (!(src->ivpr & IVPR_MODE_MASK)) { + /* Directed delivery mode */ + for (i = 0; i < opp->nb_cpus; i++) { + if (src->destmask & (1 << i)) { + IRQ_local_pipe(opp, i, n_IRQ, active, + was_active); + } + } + } else { + /* Distributed delivery mode */ + for (i = src->last_cpu + 1; i != src->last_cpu; i++) { + if (i == opp->nb_cpus) + i = 0; + + if (src->destmask & (1 << i)) { + IRQ_local_pipe(opp, i, n_IRQ, active, + was_active); + src->last_cpu = i; + break; + } + } + } +} + +static void openpic_set_irq(void *opaque, int n_IRQ, int level) +{ + struct openpic *opp = opaque; + struct irq_source *src; + + if (n_IRQ >= MAX_IRQ) { + WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ); + return; + } + + src = &opp->src[n_IRQ]; + pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n", + n_IRQ, level, src->ivpr); + if (src->level) { + /* level-sensitive irq */ + src->pending = level; + openpic_update_irq(opp, n_IRQ); + } else { + /* edge-sensitive irq */ + if (level) { + src->pending = 1; + openpic_update_irq(opp, n_IRQ); + } + + if (src->output != ILR_INTTGT_INT) { + /* Edge-triggered interrupts shouldn't be used + * with non-INT delivery, but just in case, + * try to make it do something sane rather than + * cause an interrupt storm. This is close to + * what you'd probably see happen in real hardware. + */ + src->pending = 0; + openpic_update_irq(opp, n_IRQ); + } + } +} + +static void openpic_reset(struct openpic *opp) +{ + int i; + + opp->gcr = GCR_RESET; + /* Initialise controller registers */ + opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) | + (opp->vid << FRR_VID_SHIFT); + + opp->pir = 0; + opp->spve = -1 & opp->vector_mask; + opp->tfrr = opp->tfrr_reset; + /* Initialise IRQ sources */ + for (i = 0; i < opp->max_irq; i++) { + opp->src[i].ivpr = opp->ivpr_reset; + opp->src[i].idr = opp->idr_reset; + + switch (opp->src[i].type) { + case IRQ_TYPE_NORMAL: + opp->src[i].level = + !!(opp->ivpr_reset & IVPR_SENSE_MASK); + break; + + case IRQ_TYPE_FSLINT: + opp->src[i].ivpr |= IVPR_POLARITY_MASK; + break; + + case IRQ_TYPE_FSLSPECIAL: + break; + } + } + /* Initialise IRQ destinations */ + for (i = 0; i < MAX_CPU; i++) { + opp->dst[i].ctpr = 15; + memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue)); + opp->dst[i].raised.next = -1; + memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue)); + opp->dst[i].servicing.next = -1; + } + /* Initialise timers */ + for (i = 0; i < MAX_TMR; i++) { + opp->timers[i].tccr = 0; + opp->timers[i].tbcr = TBCR_CI; + } + /* Go out of RESET state */ + opp->gcr = 0; +} + +static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ) +{ + return opp->src[n_IRQ].idr; +} + +static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ) +{ + if (opp->flags & OPENPIC_FLAG_ILR) + return opp->src[n_IRQ].output; + + return 0xffffffff; +} + +static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ) +{ + return opp->src[n_IRQ].ivpr; +} + +static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, + uint32_t val) +{ + struct irq_source *src = &opp->src[n_IRQ]; + uint32_t normal_mask = (1UL << opp->nb_cpus) - 1; + uint32_t crit_mask = 0; + uint32_t mask = normal_mask; + int crit_shift = IDR_EP_SHIFT - opp->nb_cpus; + int i; + + if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { + crit_mask = mask << crit_shift; + mask |= crit_mask | IDR_EP; + } + + src->idr = val & mask; + pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr); + + if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { + if (src->idr & crit_mask) { + if (src->idr & normal_mask) { + pr_debug("%s: IRQ configured for multiple output types, using critical\n", + __func__); + } + + src->output = ILR_INTTGT_CINT; + src->nomask = true; + src->destmask = 0; + + for (i = 0; i < opp->nb_cpus; i++) { + int n_ci = IDR_CI0_SHIFT - i; + + if (src->idr & (1UL << n_ci)) + src->destmask |= 1UL << i; + } + } else { + src->output = ILR_INTTGT_INT; + src->nomask = false; + src->destmask = src->idr & normal_mask; + } + } else { + src->destmask = src->idr; + } +} + +static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ, + uint32_t val) +{ + if (opp->flags & OPENPIC_FLAG_ILR) { + struct irq_source *src = &opp->src[n_IRQ]; + + src->output = val & ILR_INTTGT_MASK; + pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr, + src->output); + + /* TODO: on MPIC v4.0 only, set nomask for non-INT */ + } +} + +static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ, + uint32_t val) +{ + uint32_t mask; + + /* NOTE when implementing newer FSL MPIC models: starting with v4.0, + * the polarity bit is read-only on internal interrupts. + */ + mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK | + IVPR_POLARITY_MASK | opp->vector_mask; + + /* ACTIVITY bit is read-only */ + opp->src[n_IRQ].ivpr = + (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask); + + /* For FSL internal interrupts, The sense bit is reserved and zero, + * and the interrupt is always level-triggered. Timers and IPIs + * have no sense or polarity bits, and are edge-triggered. + */ + switch (opp->src[n_IRQ].type) { + case IRQ_TYPE_NORMAL: + opp->src[n_IRQ].level = + !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK); + break; + + case IRQ_TYPE_FSLINT: + opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK; + break; + + case IRQ_TYPE_FSLSPECIAL: + opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK); + break; + } + + openpic_update_irq(opp, n_IRQ); + pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val, + opp->src[n_IRQ].ivpr); +} + +static void openpic_gcr_write(struct openpic *opp, uint64_t val) +{ + if (val & GCR_RESET) { + openpic_reset(opp); + return; + } + + opp->gcr &= ~opp->mpic_mode_mask; + opp->gcr |= val & opp->mpic_mode_mask; +} + +static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + int err = 0; + + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); + if (addr & 0xF) + return 0; + + switch (addr) { + case 0x00: /* Block Revision Register1 (BRR1) is Readonly */ + break; + case 0x40: + case 0x50: + case 0x60: + case 0x70: + case 0x80: + case 0x90: + case 0xA0: + case 0xB0: + err = openpic_cpu_write_internal(opp, addr, val, + get_current_cpu()); + break; + case 0x1000: /* FRR */ + break; + case 0x1020: /* GCR */ + openpic_gcr_write(opp, val); + break; + case 0x1080: /* VIR */ + break; + case 0x1090: /* PIR */ + /* + * This register is used to reset a CPU core -- + * let userspace handle it. + */ + err = -ENXIO; + break; + case 0x10A0: /* IPI_IVPR */ + case 0x10B0: + case 0x10C0: + case 0x10D0: { + int idx; + idx = (addr - 0x10A0) >> 4; + write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val); + break; + } + case 0x10E0: /* SPVE */ + opp->spve = val & opp->vector_mask; + break; + default: + break; + } + + return err; +} + +static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + u32 retval; + int err = 0; + + pr_debug("%s: addr %#llx\n", __func__, addr); + retval = 0xFFFFFFFF; + if (addr & 0xF) + goto out; + + switch (addr) { + case 0x1000: /* FRR */ + retval = opp->frr; + retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT; + break; + case 0x1020: /* GCR */ + retval = opp->gcr; + break; + case 0x1080: /* VIR */ + retval = opp->vir; + break; + case 0x1090: /* PIR */ + retval = 0x00000000; + break; + case 0x00: /* Block Revision Register1 (BRR1) */ + retval = opp->brr1; + break; + case 0x40: + case 0x50: + case 0x60: + case 0x70: + case 0x80: + case 0x90: + case 0xA0: + case 0xB0: + err = openpic_cpu_read_internal(opp, addr, + &retval, get_current_cpu()); + break; + case 0x10A0: /* IPI_IVPR */ + case 0x10B0: + case 0x10C0: + case 0x10D0: + { + int idx; + idx = (addr - 0x10A0) >> 4; + retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx); + } + break; + case 0x10E0: /* SPVE */ + retval = opp->spve; + break; + default: + break; + } + +out: + pr_debug("%s: => 0x%08x\n", __func__, retval); + *ptr = retval; + return err; +} + +static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + int idx; + + addr += 0x10f0; + + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); + if (addr & 0xF) + return 0; + + if (addr == 0x10f0) { + /* TFRR */ + opp->tfrr = val; + return 0; + } + + idx = (addr >> 6) & 0x3; + addr = addr & 0x30; + + switch (addr & 0x30) { + case 0x00: /* TCCR */ + break; + case 0x10: /* TBCR */ + if ((opp->timers[idx].tccr & TCCR_TOG) != 0 && + (val & TBCR_CI) == 0 && + (opp->timers[idx].tbcr & TBCR_CI) != 0) + opp->timers[idx].tccr &= ~TCCR_TOG; + + opp->timers[idx].tbcr = val; + break; + case 0x20: /* TVPR */ + write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val); + break; + case 0x30: /* TDR */ + write_IRQreg_idr(opp, opp->irq_tim0 + idx, val); + break; + } + + return 0; +} + +static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + uint32_t retval = -1; + int idx; + + pr_debug("%s: addr %#llx\n", __func__, addr); + if (addr & 0xF) + goto out; + + idx = (addr >> 6) & 0x3; + if (addr == 0x0) { + /* TFRR */ + retval = opp->tfrr; + goto out; + } + + switch (addr & 0x30) { + case 0x00: /* TCCR */ + retval = opp->timers[idx].tccr; + break; + case 0x10: /* TBCR */ + retval = opp->timers[idx].tbcr; + break; + case 0x20: /* TIPV */ + retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx); + break; + case 0x30: /* TIDE (TIDR) */ + retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx); + break; + } + +out: + pr_debug("%s: => 0x%08x\n", __func__, retval); + *ptr = retval; + return 0; +} + +static int openpic_src_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + int idx; + + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); + + addr = addr & 0xffff; + idx = addr >> 5; + + switch (addr & 0x1f) { + case 0x00: + write_IRQreg_ivpr(opp, idx, val); + break; + case 0x10: + write_IRQreg_idr(opp, idx, val); + break; + case 0x18: + write_IRQreg_ilr(opp, idx, val); + break; + } + + return 0; +} + +static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + uint32_t retval; + int idx; + + pr_debug("%s: addr %#llx\n", __func__, addr); + retval = 0xFFFFFFFF; + + addr = addr & 0xffff; + idx = addr >> 5; + + switch (addr & 0x1f) { + case 0x00: + retval = read_IRQreg_ivpr(opp, idx); + break; + case 0x10: + retval = read_IRQreg_idr(opp, idx); + break; + case 0x18: + retval = read_IRQreg_ilr(opp, idx); + break; + } + + pr_debug("%s: => 0x%08x\n", __func__, retval); + *ptr = retval; + return 0; +} + +static int openpic_msi_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + int idx = opp->irq_msi; + int srs, ibs; + + pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); + if (addr & 0xF) + return 0; + + switch (addr) { + case MSIIR_OFFSET: + srs = val >> MSIIR_SRS_SHIFT; + idx += srs; + ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT; + opp->msi[srs].msir |= 1 << ibs; + openpic_set_irq(opp, idx, 1); + break; + default: + /* most registers are read-only, thus ignored */ + break; + } + + return 0; +} + +static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + uint32_t r = 0; + int i, srs; + + pr_debug("%s: addr %#llx\n", __func__, addr); + if (addr & 0xF) + return -ENXIO; + + srs = addr >> 4; + + switch (addr) { + case 0x00: + case 0x10: + case 0x20: + case 0x30: + case 0x40: + case 0x50: + case 0x60: + case 0x70: /* MSIRs */ + r = opp->msi[srs].msir; + /* Clear on read */ + opp->msi[srs].msir = 0; + openpic_set_irq(opp, opp->irq_msi + srs, 0); + break; + case 0x120: /* MSISR */ + for (i = 0; i < MAX_MSI; i++) + r |= (opp->msi[i].msir ? 1 : 0) << i; + break; + } + + pr_debug("%s: => 0x%08x\n", __func__, r); + *ptr = r; + return 0; +} + +static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr) +{ + uint32_t r = 0; + + pr_debug("%s: addr %#llx\n", __func__, addr); + + /* TODO: EISR/EIMR */ + + *ptr = r; + return 0; +} + +static int openpic_summary_write(void *opaque, gpa_t addr, u32 val) +{ + pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); + + /* TODO: EISR/EIMR */ + return 0; +} + +static int openpic_cpu_write_internal(void *opaque, gpa_t addr, + u32 val, int idx) +{ + struct openpic *opp = opaque; + struct irq_source *src; + struct irq_dest *dst; + int s_IRQ, n_IRQ; + + pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx, + addr, val); + + if (idx < 0) + return 0; + + if (addr & 0xF) + return 0; + + dst = &opp->dst[idx]; + addr &= 0xFF0; + switch (addr) { + case 0x40: /* IPIDR */ + case 0x50: + case 0x60: + case 0x70: + idx = (addr - 0x40) >> 4; + /* we use IDE as mask which CPUs to deliver the IPI to still. */ + opp->src[opp->irq_ipi0 + idx].destmask |= val; + openpic_set_irq(opp, opp->irq_ipi0 + idx, 1); + openpic_set_irq(opp, opp->irq_ipi0 + idx, 0); + break; + case 0x80: /* CTPR */ + dst->ctpr = val & 0x0000000F; + + pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n", + __func__, idx, dst->ctpr, dst->raised.priority, + dst->servicing.priority); + + if (dst->raised.priority <= dst->ctpr) { + pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n", + __func__, idx); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); + } else if (dst->raised.priority > dst->servicing.priority) { + pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n", + __func__, idx, dst->raised.next); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); + } + + break; + case 0x90: /* WHOAMI */ + /* Read-only register */ + break; + case 0xA0: /* IACK */ + /* Read-only register */ + break; + case 0xB0: { /* EOI */ + int notify_eoi; + + pr_debug("EOI\n"); + s_IRQ = IRQ_get_next(opp, &dst->servicing); + + if (s_IRQ < 0) { + pr_debug("%s: EOI with no interrupt in service\n", + __func__); + break; + } + + IRQ_resetbit(&dst->servicing, s_IRQ); + /* Notify listeners that the IRQ is over */ + notify_eoi = s_IRQ; + /* Set up next servicing IRQ */ + s_IRQ = IRQ_get_next(opp, &dst->servicing); + /* Check queued interrupts. */ + n_IRQ = IRQ_get_next(opp, &dst->raised); + src = &opp->src[n_IRQ]; + if (n_IRQ != -1 && + (s_IRQ == -1 || + IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) { + pr_debug("Raise OpenPIC INT output cpu %d irq %d\n", + idx, n_IRQ); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); + } + + spin_unlock(&opp->lock); + kvm_notify_acked_irq(opp->kvm, 0, notify_eoi); + spin_lock(&opp->lock); + + break; + } + default: + break; + } + + return 0; +} + +static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val) +{ + struct openpic *opp = opaque; + + return openpic_cpu_write_internal(opp, addr, val, + (addr & 0x1f000) >> 12); +} + +static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, + int cpu) +{ + struct irq_source *src; + int retval, irq; + + pr_debug("Lower OpenPIC INT output\n"); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); + + irq = IRQ_get_next(opp, &dst->raised); + pr_debug("IACK: irq=%d\n", irq); + + if (irq == -1) + /* No more interrupt pending */ + return opp->spve; + + src = &opp->src[irq]; + if (!(src->ivpr & IVPR_ACTIVITY_MASK) || + !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) { + pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n", + __func__, irq, dst->ctpr, src->ivpr); + openpic_update_irq(opp, irq); + retval = opp->spve; + } else { + /* IRQ enter servicing state */ + IRQ_setbit(&dst->servicing, irq); + retval = IVPR_VECTOR(opp, src->ivpr); + } + + if (!src->level) { + /* edge-sensitive IRQ */ + src->ivpr &= ~IVPR_ACTIVITY_MASK; + src->pending = 0; + IRQ_resetbit(&dst->raised, irq); + } + + if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) { + src->destmask &= ~(1 << cpu); + if (src->destmask && !src->level) { + /* trigger on CPUs that didn't know about it yet */ + openpic_set_irq(opp, irq, 1); + openpic_set_irq(opp, irq, 0); + /* if all CPUs knew about it, set active bit again */ + src->ivpr |= IVPR_ACTIVITY_MASK; + } + } + + return retval; +} + +void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) +{ + struct openpic *opp = vcpu->arch.mpic; + int cpu = vcpu->arch.irq_cpu_id; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + + if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY) + kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu)); + + spin_unlock_irqrestore(&opp->lock, flags); +} + +static int openpic_cpu_read_internal(void *opaque, gpa_t addr, + u32 *ptr, int idx) +{ + struct openpic *opp = opaque; + struct irq_dest *dst; + uint32_t retval; + + pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr); + retval = 0xFFFFFFFF; + + if (idx < 0) + goto out; + + if (addr & 0xF) + goto out; + + dst = &opp->dst[idx]; + addr &= 0xFF0; + switch (addr) { + case 0x80: /* CTPR */ + retval = dst->ctpr; + break; + case 0x90: /* WHOAMI */ + retval = idx; + break; + case 0xA0: /* IACK */ + retval = openpic_iack(opp, dst, idx); + break; + case 0xB0: /* EOI */ + retval = 0; + break; + default: + break; + } + pr_debug("%s: => 0x%08x\n", __func__, retval); + +out: + *ptr = retval; + return 0; +} + +static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr) +{ + struct openpic *opp = opaque; + + return openpic_cpu_read_internal(opp, addr, ptr, + (addr & 0x1f000) >> 12); +} + +struct mem_reg { + int (*read)(void *opaque, gpa_t addr, u32 *ptr); + int (*write)(void *opaque, gpa_t addr, u32 val); + gpa_t start_addr; + int size; +}; + +static const struct mem_reg openpic_gbl_mmio = { + .write = openpic_gbl_write, + .read = openpic_gbl_read, + .start_addr = OPENPIC_GLB_REG_START, + .size = OPENPIC_GLB_REG_SIZE, +}; + +static const struct mem_reg openpic_tmr_mmio = { + .write = openpic_tmr_write, + .read = openpic_tmr_read, + .start_addr = OPENPIC_TMR_REG_START, + .size = OPENPIC_TMR_REG_SIZE, +}; + +static const struct mem_reg openpic_cpu_mmio = { + .write = openpic_cpu_write, + .read = openpic_cpu_read, + .start_addr = OPENPIC_CPU_REG_START, + .size = OPENPIC_CPU_REG_SIZE, +}; + +static const struct mem_reg openpic_src_mmio = { + .write = openpic_src_write, + .read = openpic_src_read, + .start_addr = OPENPIC_SRC_REG_START, + .size = OPENPIC_SRC_REG_SIZE, +}; + +static const struct mem_reg openpic_msi_mmio = { + .read = openpic_msi_read, + .write = openpic_msi_write, + .start_addr = OPENPIC_MSI_REG_START, + .size = OPENPIC_MSI_REG_SIZE, +}; + +static const struct mem_reg openpic_summary_mmio = { + .read = openpic_summary_read, + .write = openpic_summary_write, + .start_addr = OPENPIC_SUMMARY_REG_START, + .size = OPENPIC_SUMMARY_REG_SIZE, +}; + +static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr) +{ + if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) { + WARN(1, "kvm mpic: too many mmio regions\n"); + return; + } + + opp->mmio_regions[opp->num_mmio_regions++] = mr; +} + +static void fsl_common_init(struct openpic *opp) +{ + int i; + int virq = MAX_SRC; + + add_mmio_region(opp, &openpic_msi_mmio); + add_mmio_region(opp, &openpic_summary_mmio); + + opp->vid = VID_REVISION_1_2; + opp->vir = VIR_GENERIC; + opp->vector_mask = 0xFFFF; + opp->tfrr_reset = 0; + opp->ivpr_reset = IVPR_MASK_MASK; + opp->idr_reset = 1 << 0; + opp->max_irq = MAX_IRQ; + + opp->irq_ipi0 = virq; + virq += MAX_IPI; + opp->irq_tim0 = virq; + virq += MAX_TMR; + + BUG_ON(virq > MAX_IRQ); + + opp->irq_msi = 224; + + for (i = 0; i < opp->fsl->max_ext; i++) + opp->src[i].level = false; + + /* Internal interrupts, including message and MSI */ + for (i = 16; i < MAX_SRC; i++) { + opp->src[i].type = IRQ_TYPE_FSLINT; + opp->src[i].level = true; + } + + /* timers and IPIs */ + for (i = MAX_SRC; i < virq; i++) { + opp->src[i].type = IRQ_TYPE_FSLSPECIAL; + opp->src[i].level = false; + } +} + +static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr) +{ + int i; + + for (i = 0; i < opp->num_mmio_regions; i++) { + const struct mem_reg *mr = opp->mmio_regions[i]; + + if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) + continue; + + return mr->read(opp, addr - mr->start_addr, ptr); + } + + return -ENXIO; +} + +static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) +{ + int i; + + for (i = 0; i < opp->num_mmio_regions; i++) { + const struct mem_reg *mr = opp->mmio_regions[i]; + + if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) + continue; + + return mr->write(opp, addr - mr->start_addr, val); + } + + return -ENXIO; +} + +static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, + int len, void *ptr) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + int ret; + union { + u32 val; + u8 bytes[4]; + } u; + + if (addr & (len - 1)) { + pr_debug("%s: bad alignment %llx/%d\n", + __func__, addr, len); + return -EINVAL; + } + + spin_lock_irq(&opp->lock); + ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val); + spin_unlock_irq(&opp->lock); + + /* + * Technically only 32-bit accesses are allowed, but be nice to + * people dumping registers a byte at a time -- it works in real + * hardware (reads only, not writes). + */ + if (len == 4) { + *(u32 *)ptr = u.val; + pr_debug("%s: addr %llx ret %d len 4 val %x\n", + __func__, addr, ret, u.val); + } else if (len == 1) { + *(u8 *)ptr = u.bytes[addr & 3]; + pr_debug("%s: addr %llx ret %d len 1 val %x\n", + __func__, addr, ret, u.bytes[addr & 3]); + } else { + pr_debug("%s: bad length %d\n", __func__, len); + return -EINVAL; + } + + return ret; +} + +static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, + int len, const void *ptr) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + int ret; + + if (len != 4) { + pr_debug("%s: bad length %d\n", __func__, len); + return -EOPNOTSUPP; + } + if (addr & 3) { + pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len); + return -EOPNOTSUPP; + } + + spin_lock_irq(&opp->lock); + ret = kvm_mpic_write_internal(opp, addr - opp->reg_base, + *(const u32 *)ptr); + spin_unlock_irq(&opp->lock); + + pr_debug("%s: addr %llx ret %d val %x\n", + __func__, addr, ret, *(const u32 *)ptr); + + return ret; +} + +static const struct kvm_io_device_ops mpic_mmio_ops = { + .read = kvm_mpic_read, + .write = kvm_mpic_write, +}; + +static void map_mmio(struct openpic *opp) +{ + kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops); + + kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS, + opp->reg_base, OPENPIC_REG_SIZE, + &opp->mmio); +} + +static void unmap_mmio(struct openpic *opp) +{ + kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio); +} + +static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr) +{ + u64 base; + + if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64))) + return -EFAULT; + + if (base & 0x3ffff) { + pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n", + __func__, base); + return -EINVAL; + } + + if (base == opp->reg_base) + return 0; + + mutex_lock(&opp->kvm->slots_lock); + + unmap_mmio(opp); + opp->reg_base = base; + + pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n", + __func__, base); + + if (base == 0) + goto out; + + map_mmio(opp); + +out: + mutex_unlock(&opp->kvm->slots_lock); + return 0; +} + +#define ATTR_SET 0 +#define ATTR_GET 1 + +static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type) +{ + int ret; + + if (addr & 3) + return -ENXIO; + + spin_lock_irq(&opp->lock); + + if (type == ATTR_SET) + ret = kvm_mpic_write_internal(opp, addr, *val); + else + ret = kvm_mpic_read_internal(opp, addr, val); + + spin_unlock_irq(&opp->lock); + + pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val); + + return ret; +} + +static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct openpic *opp = dev->private; + u32 attr32; + + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + return set_base_addr(opp, attr); + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + if (get_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return access_reg(opp, attr->attr, &attr32, ATTR_SET); + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + return -EINVAL; + + if (get_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + if (attr32 != 0 && attr32 != 1) + return -EINVAL; + + spin_lock_irq(&opp->lock); + openpic_set_irq(opp, attr->attr, attr32); + spin_unlock_irq(&opp->lock); + return 0; + } + + return -ENXIO; +} + +static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct openpic *opp = dev->private; + u64 attr64; + u32 attr32; + int ret; + + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + mutex_lock(&opp->kvm->slots_lock); + attr64 = opp->reg_base; + mutex_unlock(&opp->kvm->slots_lock); + + if (copy_to_user((u64 __user *)(long)attr->addr, + &attr64, sizeof(u64))) + return -EFAULT; + + return 0; + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + ret = access_reg(opp, attr->attr, &attr32, ATTR_GET); + if (ret) + return ret; + + if (put_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return 0; + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + return -EINVAL; + + spin_lock_irq(&opp->lock); + attr32 = opp->src[attr->attr].pending; + spin_unlock_irq(&opp->lock); + + if (put_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return 0; + } + + return -ENXIO; +} + +static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + return 0; + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + return 0; + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + break; + + return 0; + } + + return -ENXIO; +} + +static void mpic_destroy(struct kvm_device *dev) +{ + struct openpic *opp = dev->private; + + dev->kvm->arch.mpic = NULL; + kfree(opp); +} + +static int mpic_set_default_irq_routing(struct openpic *opp) +{ + struct kvm_irq_routing_entry *routing; + + /* Create a nop default map, so that dereferencing it still works */ + routing = kzalloc((sizeof(*routing)), GFP_KERNEL); + if (!routing) + return -ENOMEM; + + kvm_set_irq_routing(opp->kvm, routing, 0, 0); + + kfree(routing); + return 0; +} + +static int mpic_create(struct kvm_device *dev, u32 type) +{ + struct openpic *opp; + int ret; + + /* We only support one MPIC at a time for now */ + if (dev->kvm->arch.mpic) + return -EINVAL; + + opp = kzalloc(sizeof(struct openpic), GFP_KERNEL); + if (!opp) + return -ENOMEM; + + dev->private = opp; + opp->kvm = dev->kvm; + opp->dev = dev; + opp->model = type; + spin_lock_init(&opp->lock); + + add_mmio_region(opp, &openpic_gbl_mmio); + add_mmio_region(opp, &openpic_tmr_mmio); + add_mmio_region(opp, &openpic_src_mmio); + add_mmio_region(opp, &openpic_cpu_mmio); + + switch (opp->model) { + case KVM_DEV_TYPE_FSL_MPIC_20: + opp->fsl = &fsl_mpic_20; + opp->brr1 = 0x00400200; + opp->flags |= OPENPIC_FLAG_IDR_CRIT; + opp->nb_irqs = 80; + opp->mpic_mode_mask = GCR_MODE_MIXED; + + fsl_common_init(opp); + + break; + + case KVM_DEV_TYPE_FSL_MPIC_42: + opp->fsl = &fsl_mpic_42; + opp->brr1 = 0x00400402; + opp->flags |= OPENPIC_FLAG_ILR; + opp->nb_irqs = 196; + opp->mpic_mode_mask = GCR_MODE_PROXY; + + fsl_common_init(opp); + + break; + + default: + ret = -ENODEV; + goto err; + } + + ret = mpic_set_default_irq_routing(opp); + if (ret) + goto err; + + openpic_reset(opp); + + smp_wmb(); + dev->kvm->arch.mpic = opp; + + return 0; + +err: + kfree(opp); + return ret; +} + +struct kvm_device_ops kvm_mpic_ops = { + .name = "kvm-mpic", + .create = mpic_create, + .destroy = mpic_destroy, + .set_attr = mpic_set_attr, + .get_attr = mpic_get_attr, + .has_attr = mpic_has_attr, +}; + +int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, + u32 cpu) +{ + struct openpic *opp = dev->private; + int ret = 0; + + if (dev->ops != &kvm_mpic_ops) + return -EPERM; + if (opp->kvm != vcpu->kvm) + return -EPERM; + if (cpu < 0 || cpu >= MAX_CPU) + return -EPERM; + + spin_lock_irq(&opp->lock); + + if (opp->dst[cpu].vcpu) { + ret = -EEXIST; + goto out; + } + if (vcpu->arch.irq_type) { + ret = -EBUSY; + goto out; + } + + opp->dst[cpu].vcpu = vcpu; + opp->nb_cpus = max(opp->nb_cpus, cpu + 1); + + vcpu->arch.mpic = opp; + vcpu->arch.irq_cpu_id = cpu; + vcpu->arch.irq_type = KVMPPC_IRQ_MPIC; + + /* This might need to be changed if GCR gets extended */ + if (opp->mpic_mode_mask == GCR_MODE_PROXY) + vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL; + +out: + spin_unlock_irq(&opp->lock); + return ret; +} + +/* + * This should only happen immediately before the mpic is destroyed, + * so we shouldn't need to worry about anything still trying to + * access the vcpu pointer. + */ +void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu) +{ + BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu); + + opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL; +} + +/* + * Return value: + * < 0 Interrupt was ignored (masked or not delivered for other reasons) + * = 0 Interrupt was coalesced (previous irq is still pending) + * > 0 Number of CPUs interrupt was delivered to + */ +static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + u32 irq = e->irqchip.pin; + struct openpic *opp = kvm->arch.mpic; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + openpic_set_irq(opp, irq, level); + spin_unlock_irqrestore(&opp->lock, flags); + + /* All code paths we care about don't check for the return value */ + return 0; +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + struct openpic *opp = kvm->arch.mpic; + unsigned long flags; + + spin_lock_irqsave(&opp->lock, flags); + + /* + * XXX We ignore the target address for now, as we only support + * a single MSI bank. + */ + openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data); + spin_unlock_irqrestore(&opp->lock, flags); + + /* All code paths we care about don't check for the return value */ + return 0; +} + +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = mpic_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) + goto out; + rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; + default: + goto out; + } + + r = 0; +out: + return r; +} diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 934413cd3a1b..6316ee336e88 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -25,6 +25,7 @@ #include <linux/hrtimer.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/file.h> #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> @@ -32,6 +33,7 @@ #include <asm/cputhreads.h> #include <asm/irqflags.h> #include "timing.h" +#include "irq.h" #include "../mm/mmu_decl.h" #define CREATE_TRACE_POINTS @@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: r = 1; break; #ifndef CONFIG_KVM_BOOK3S_64_HV @@ -326,6 +329,9 @@ int kvm_dev_ioctl_check_extension(long ext) #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: #endif +#ifdef CONFIG_KVM_MPIC + case KVM_CAP_IRQ_MPIC: +#endif r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -335,6 +341,10 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_PPC_ALLOC_HTAB: + case KVM_CAP_PPC_RTAS: +#ifdef CONFIG_KVM_XICS + case KVM_CAP_IRQ_XICS: +#endif r = 1; break; #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -411,18 +421,17 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) } int kvm_arch_prepare_memory_region(struct kvm *kvm, - struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + struct kvm_userspace_memory_region *mem, + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { kvmppc_core_commit_memory_region(kvm, mem, old); } @@ -460,6 +469,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) tasklet_kill(&vcpu->arch.tasklet); kvmppc_remove_vcpu_debugfs(vcpu); + + switch (vcpu->arch.irq_type) { + case KVMPPC_IRQ_MPIC: + kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); + break; + case KVMPPC_IRQ_XICS: + kvmppc_xics_free_icp(vcpu); + break; + } + kvmppc_core_vcpu_free(vcpu); } @@ -532,12 +551,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg) -{ - return -EINVAL; -} - static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -612,6 +625,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_bigendian) { + int idx, ret; + if (bytes > sizeof(run->mmio.data)) { printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, run->mmio.len); @@ -627,8 +642,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->mmio_is_write = 0; vcpu->arch.mmio_sign_extend = 0; - if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, - bytes, &run->mmio.data)) { + idx = srcu_read_lock(&vcpu->kvm->srcu); + + ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + bytes, &run->mmio.data); + + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + if (!ret) { kvmppc_complete_mmio_load(vcpu, run); vcpu->mmio_needed = 0; return EMULATE_DONE; @@ -653,6 +674,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_bigendian) { void *data = run->mmio.data; + int idx, ret; if (bytes > sizeof(run->mmio.data)) { printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, @@ -682,9 +704,14 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, - bytes, &run->mmio.data)) { - kvmppc_complete_mmio_load(vcpu, run); + idx = srcu_read_lock(&vcpu->kvm->srcu); + + ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + bytes, &run->mmio.data); + + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + if (!ret) { vcpu->mmio_needed = 0; return EMULATE_DONE; } @@ -740,7 +767,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { - kvmppc_core_dequeue_external(vcpu, irq); + kvmppc_core_dequeue_external(vcpu); return 0; } @@ -770,7 +797,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; case KVM_CAP_PPC_EPR: r = 0; - vcpu->arch.epr_enabled = cap->args[0]; + if (cap->args[0]) + vcpu->arch.epr_flags |= KVMPPC_EPR_USER; + else + vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER; break; #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_WATCHDOG: @@ -791,6 +821,44 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; } #endif +#ifdef CONFIG_KVM_MPIC + case KVM_CAP_IRQ_MPIC: { + struct file *filp; + struct kvm_device *dev; + + r = -EBADF; + filp = fget(cap->args[0]); + if (!filp) + break; + + r = -EPERM; + dev = kvm_device_from_filp(filp); + if (dev) + r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]); + + fput(filp); + break; + } +#endif +#ifdef CONFIG_KVM_XICS + case KVM_CAP_IRQ_XICS: { + struct file *filp; + struct kvm_device *dev; + + r = -EBADF; + filp = fget(cap->args[0]); + if (!filp) + break; + + r = -EPERM; + dev = kvm_device_from_filp(filp); + if (dev) + r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); + + fput(filp); + break; + } +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; @@ -913,9 +981,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) return 0; } +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) +{ + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event->irq, irq_event->level, + line_status); + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + struct kvm *kvm __maybe_unused = filp->private_data; void __user *argp = (void __user *)arg; long r; @@ -934,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; - struct kvm *kvm = filp->private_data; r = -EFAULT; if (copy_from_user(&create_tce, argp, sizeof(create_tce))) @@ -946,8 +1026,8 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_ALLOCATE_RMA: { - struct kvm *kvm = filp->private_data; struct kvm_allocate_rma rma; + struct kvm *kvm = filp->private_data; r = kvm_vm_ioctl_allocate_rma(kvm, &rma); if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) @@ -956,7 +1036,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_ALLOCATE_HTAB: { - struct kvm *kvm = filp->private_data; u32 htab_order; r = -EFAULT; @@ -973,7 +1052,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_GET_HTAB_FD: { - struct kvm *kvm = filp->private_data; struct kvm_get_htab_fd ghf; r = -EFAULT; @@ -986,7 +1064,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_PPC_BOOK3S_64 case KVM_PPC_GET_SMMU_INFO: { - struct kvm *kvm = filp->private_data; struct kvm_ppc_smmu_info info; memset(&info, 0, sizeof(info)); @@ -995,6 +1072,12 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_RTAS_DEFINE_TOKEN: { + struct kvm *kvm = filp->private_data; + + r = kvm_vm_ioctl_rtas_define_token(kvm, argp); + break; + } #endif /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 89db29d17c25..7cd728b3b5e4 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS]; static inline unsigned int icp_native_get_xirr(void) { int cpu = smp_processor_id(); + unsigned int xirr; + + /* Handled an interrupt latched by KVM */ + xirr = kvmppc_get_xics_latch(); + if (xirr) + return xirr; return in_be32(&icp_native_regs[cpu]->xirr.word); } @@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void) static void icp_native_cause_ipi(int cpu, unsigned long data) { + kvmppc_set_host_ipi(cpu, 1); icp_native_set_qirr(cpu, IPI_PRIORITY); } @@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) { int cpu = smp_processor_id(); + kvmppc_set_host_ipi(cpu, 0); icp_native_set_qirr(cpu, 0xff); return smp_ipi_demux(); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b4622915bd15..4105b8221fdd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -306,6 +306,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x00200000UL #define RCP_GR_BIT 0x00040000UL #define RCP_GC_BIT 0x00020000UL +#define RCP_IN_BIT 0x00008000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x00008000UL @@ -373,6 +374,7 @@ extern unsigned long MODULES_END; #define RCP_HC_BIT 0x0020000000000000UL #define RCP_GR_BIT 0x0004000000000000UL #define RCP_GC_BIT 0x0002000000000000UL +#define RCP_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ /* User dirty / referenced bit for KVM's migration feature */ #define KVM_UR_BIT 0x0000800000000000UL @@ -746,30 +748,42 @@ struct gmap { /** * struct gmap_rmap - reverse mapping for segment table entries - * @next: pointer to the next gmap_rmap structure in the list + * @gmap: pointer to the gmap_struct * @entry: pointer to a segment table entry + * @vmaddr: virtual address in the guest address space */ struct gmap_rmap { struct list_head list; + struct gmap *gmap; unsigned long *entry; + unsigned long vmaddr; }; /** * struct gmap_pgtable - gmap information attached to a page table * @vmaddr: address of the 1MB segment in the process virtual memory - * @mapper: list of segment table entries maping a page table + * @mapper: list of segment table entries mapping a page table */ struct gmap_pgtable { unsigned long vmaddr; struct list_head mapper; }; +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long address); +}; + struct gmap *gmap_alloc(struct mm_struct *mm); void gmap_free(struct gmap *gmap); void gmap_enable(struct gmap *gmap); void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long length); + unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(unsigned long address, struct gmap *); unsigned long gmap_translate(unsigned long address, struct gmap *); @@ -777,6 +791,24 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); void gmap_discard(unsigned long from, unsigned long to, struct gmap *); +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); +void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); + +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & RCP_IN_BIT) { + pgste_val(pgste) &= ~RCP_IN_BIT; + gmap_do_ipte_notify(mm, addr, ptep); + } +#endif + return pgste; +} + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -1032,8 +1064,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1052,11 +1086,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + pgste_t pgste; pte_t pte; mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) - pgste_get_lock(ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!mm_exclusive(mm)) @@ -1082,8 +1119,10 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pgste_t pgste; pte_t pte; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } pte = *ptep; __ptep_ipte(address, ptep); @@ -1111,8 +1150,11 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pgste_t pgste; pte_t pte; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + if (!full) + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } pte = *ptep; if (!full) @@ -1135,8 +1177,10 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, if (pte_write(pte)) { mm->context.flush_mm = 1; - if (mm_has_pgste(mm)) + if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); + } if (!mm_exclusive(mm)) __ptep_ipte(address, ptep); @@ -1160,8 +1204,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) + if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + } __ptep_ipte(address, ptep); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index ff67d730c00c..59880dbaf360 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -33,8 +33,6 @@ #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 -#define CHUNK_OLDMEM 4 -#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -43,13 +41,12 @@ struct mem_chunk { }; extern struct mem_chunk memory_chunk[]; -extern unsigned long real_memory_size; extern int memory_end_set; extern unsigned long memory_end; -void detect_memory_layout(struct mem_chunk chunk[]); -void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, - unsigned long size, int type); +void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize); +void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, + unsigned long size); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 7bf68fff7c5d..9ccd1905bdad 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -44,5 +44,6 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h +header-y += virtio-ccw.h header-y += vtoc.h header-y += zcrypt.h diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h new file mode 100644 index 000000000000..a9a4ebf79fa7 --- /dev/null +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -0,0 +1,21 @@ +/* + * Definitions for virtio-ccw devices. + * + * Copyright IBM Corp. 2013 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Cornelia Huck <[email protected]> + */ +#ifndef __KVM_VIRTIO_CCW_H +#define __KVM_VIRTIO_CCW_H + +/* Alignment of vring buffers. */ +#define KVM_VIRTIO_CCW_RING_ALIGN 4096 + +/* Subcode for diagnose 500 (virtio hypercall). */ +#define KVM_S390_VIRTIO_CCW_NOTIFY 3 + +#endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 1386fcaf4ef6..4bb2a4656163 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -30,7 +30,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w obj-y := bitmap.o traps.o time.o process.o base.o early.o setup.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o mem_detect.o sclp.o vdso.o +obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += dumpstack.o diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index fb8d8781a011..f703d91bf720 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -88,8 +88,8 @@ static struct mem_chunk *get_memory_layout(void) struct mem_chunk *chunk_array; chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); - detect_memory_layout(chunk_array); - create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + detect_memory_layout(chunk_array, 0); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE); return chunk_array; } @@ -344,7 +344,7 @@ static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) for (i = 0; i < MEMORY_CHUNKS; i++) { mem_chunk = &chunk_array[i]; if (mem_chunk->size == 0) - break; + continue; if (chunk_array[i].type != CHUNK_READ_WRITE && chunk_array[i].type != CHUNK_READ_ONLY) continue; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index bda011e2f8ae..dc8770d7173c 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -482,7 +482,6 @@ void __init startup_init(void) detect_machine_facilities(); setup_topology(); sclp_facilities_detect(); - detect_memory_layout(memory_chunk); #ifdef CONFIG_DYNAMIC_FTRACE S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; #endif diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c deleted file mode 100644 index 22d502e885ed..000000000000 --- a/arch/s390/kernel/mem_detect.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens <[email protected]> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/ipl.h> -#include <asm/sclp.h> -#include <asm/setup.h> - -#define ADDR2G (1ULL << 31) - -static void find_memory_chunks(struct mem_chunk chunk[]) -{ - unsigned long long memsize, rnmax, rzm; - unsigned long addr = 0, size; - int i = 0, type; - - rzm = sclp_get_rzm(); - rnmax = sclp_get_rnmax(); - memsize = rzm * rnmax; - if (!rzm) - rzm = 1ULL << 17; - if (sizeof(long) == 4) { - rzm = min(ADDR2G, rzm); - memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; - } - do { - size = 0; - type = tprot(addr); - do { - size += rzm; - if (memsize && addr + size >= memsize) - break; - } while (type == tprot(addr + size)); - if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { - chunk[i].addr = addr; - chunk[i].size = size; - chunk[i].type = type; - i++; - } - addr += size; - } while (addr < memsize && i < MEMORY_CHUNKS); -} - -void detect_memory_layout(struct mem_chunk chunk[]) -{ - unsigned long flags, cr0; - - memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); - /* Disable IRQs, DAT and low address protection so tprot does the - * right thing and we don't get scheduled away with low address - * protection disabled. - */ - flags = __arch_local_irq_stnsm(0xf8); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); - find_memory_chunks(chunk); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} -EXPORT_SYMBOL(detect_memory_layout); - -/* - * Move memory chunks array from index "from" to index "to" - */ -static void mem_chunk_move(struct mem_chunk chunk[], int to, int from) -{ - int cnt = MEMORY_CHUNKS - to; - - memmove(&chunk[to], &chunk[from], cnt * sizeof(struct mem_chunk)); -} - -/* - * Initialize memory chunk - */ -static void mem_chunk_init(struct mem_chunk *chunk, unsigned long addr, - unsigned long size, int type) -{ - chunk->type = type; - chunk->addr = addr; - chunk->size = size; -} - -/* - * Create memory hole with given address, size, and type - */ -void create_mem_hole(struct mem_chunk chunk[], unsigned long addr, - unsigned long size, int type) -{ - unsigned long lh_start, lh_end, lh_size, ch_start, ch_end, ch_size; - int i, ch_type; - - for (i = 0; i < MEMORY_CHUNKS; i++) { - if (chunk[i].size == 0) - continue; - - /* Define chunk properties */ - ch_start = chunk[i].addr; - ch_size = chunk[i].size; - ch_end = ch_start + ch_size - 1; - ch_type = chunk[i].type; - - /* Is memory chunk hit by memory hole? */ - if (addr + size <= ch_start) - continue; /* No: memory hole in front of chunk */ - if (addr > ch_end) - continue; /* No: memory hole after chunk */ - - /* Yes: Define local hole properties */ - lh_start = max(addr, chunk[i].addr); - lh_end = min(addr + size - 1, ch_end); - lh_size = lh_end - lh_start + 1; - - if (lh_start == ch_start && lh_end == ch_end) { - /* Hole covers complete memory chunk */ - mem_chunk_init(&chunk[i], lh_start, lh_size, type); - } else if (lh_end == ch_end) { - /* Hole starts in memory chunk and convers chunk end */ - mem_chunk_move(chunk, i + 1, i); - mem_chunk_init(&chunk[i], ch_start, ch_size - lh_size, - ch_type); - mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); - i += 1; - } else if (lh_start == ch_start) { - /* Hole ends in memory chunk */ - mem_chunk_move(chunk, i + 1, i); - mem_chunk_init(&chunk[i], lh_start, lh_size, type); - mem_chunk_init(&chunk[i + 1], lh_end + 1, - ch_size - lh_size, ch_type); - break; - } else { - /* Hole splits memory chunk */ - mem_chunk_move(chunk, i + 2, i); - mem_chunk_init(&chunk[i], ch_start, - lh_start - ch_start, ch_type); - mem_chunk_init(&chunk[i + 1], lh_start, lh_size, type); - mem_chunk_init(&chunk[i + 2], lh_end + 1, - ch_end - lh_end, ch_type); - break; - } - } -} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0f419c5765c8..0a49095104c9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -226,25 +226,17 @@ static void __init conmode_default(void) } #ifdef CONFIG_ZFCPDUMP -static void __init setup_zfcpdump(unsigned int console_devno) +static void __init setup_zfcpdump(void) { - static char str[41]; - if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; if (OLDMEM_BASE) return; - if (console_devno != -1) - sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno, console_devno); - else - sprintf(str, " cio_ignore=all,!0.0.%04x", - ipl_info.data.fcp.dev_id.devno); - strcat(boot_command_line, str); + strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev"); console_loglevel = 2; } #else -static inline void setup_zfcpdump(unsigned int console_devno) {} +static inline void setup_zfcpdump(void) {} #endif /* CONFIG_ZFCPDUMP */ /* @@ -471,14 +463,10 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - if (memory_chunk[i].type == CHUNK_OLDMEM || - memory_chunk[i].type == CHUNK_CRASHK) - continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: - case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -510,12 +498,10 @@ static void __init setup_resources(void) } } -unsigned long real_memory_size; -EXPORT_SYMBOL_GPL(real_memory_size); - static void __init setup_memory_end(void) { unsigned long vmax, vmalloc_size, tmp; + unsigned long real_memory_size = 0; int i; @@ -525,7 +511,6 @@ static void __init setup_memory_end(void) memory_end_set = 1; } #endif - real_memory_size = 0; memory_end &= PAGE_MASK; /* @@ -538,6 +523,8 @@ static void __init setup_memory_end(void) unsigned long align; chunk = &memory_chunk[i]; + if (!chunk->size) + continue; align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1); start = (chunk->addr + align - 1) & ~(align - 1); end = (chunk->addr + chunk->size) & ~(align - 1); @@ -588,6 +575,8 @@ static void __init setup_memory_end(void) for (i = 0; i < MEMORY_CHUNKS; i++) { struct mem_chunk *chunk = &memory_chunk[i]; + if (!chunk->size) + continue; if (chunk->addr >= memory_end) { memset(chunk, 0, sizeof(*chunk)); continue; @@ -688,15 +677,6 @@ static int __init verify_crash_base(unsigned long crash_base, } /* - * Reserve kdump memory by creating a memory hole in the mem_chunk array - */ -static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, - int type) -{ - create_mem_hole(memory_chunk, addr, size, type); -} - -/* * When kdump is enabled, we have to ensure that no memory from * the area [0 - crashkernel memory size] and * [crashk_res.start - crashk_res.end] is set offline. @@ -727,16 +707,22 @@ static struct notifier_block kdump_mem_nb = { static void reserve_oldmem(void) { #ifdef CONFIG_CRASH_DUMP + unsigned long real_size = 0; + int i; + if (!OLDMEM_BASE) return; + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &memory_chunk[i]; - reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); - reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, - CHUNK_OLDMEM); - if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + real_size = max(real_size, chunk->addr + chunk->size); + } + create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE); + create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE); + if (OLDMEM_BASE + OLDMEM_SIZE == real_size) saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; else - saved_max_pfn = PFN_DOWN(real_memory_size) - 1; + saved_max_pfn = PFN_DOWN(real_size) - 1; #endif } @@ -775,7 +761,7 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); - reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); + create_mem_hole(memory_chunk, crash_base, crash_size); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", crash_size >> 20, crash_base >> 20, memory_end >> 20); @@ -847,11 +833,10 @@ static void __init setup_memory(void) * Register RAM areas with the bootmem allocator. */ - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + for (i = 0; i < MEMORY_CHUNKS; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE && - memory_chunk[i].type != CHUNK_CRASHK) + if (!memory_chunk[i].size) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); @@ -1067,12 +1052,12 @@ void __init setup_arch(char **cmdline_p) memcpy(&uaccess, &uaccess_std, sizeof(uaccess)); parse_early_param(); - + detect_memory_layout(memory_chunk, memory_end); os_info_init(); setup_ipl(); + reserve_oldmem(); setup_memory_end(); setup_addressing_mode(); - reserve_oldmem(); reserve_crashkernel(); setup_memory(); setup_resources(); @@ -1097,5 +1082,5 @@ void __init setup_arch(char **cmdline_p) set_preferred_console(); /* Setup zfcpdump support */ - setup_zfcpdump(console_devno); + setup_zfcpdump(); } diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 60f9f8ae0fc8..70b46eacf8e1 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_EVENTFD ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 3975722bb19d..8fe9d65a4585 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a390687feb13..1c01a9912989 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -13,6 +13,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" #include "trace-s390.h" @@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EREMOTE; } +static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) +{ + int ret, idx; + + /* No virtio-ccw notification? Get out quickly. */ + if (!vcpu->kvm->arch.css_support || + (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) + return -EOPNOTSUPP; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + /* + * The layout is as follows: + * - gpr 2 contains the subchannel id (passed as addr) + * - gpr 3 contains the virtqueue index (passed as datamatch) + */ + ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu->run->s.regs.gprs[2], + 8, &vcpu->run->s.regs.gprs[3]); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ + return ret < 0 ? ret : 0; +} + int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; @@ -118,6 +142,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) return __diag_time_slice_end_directed(vcpu); case 0x308: return __diag_ipl_functions(vcpu); + case 0x500: + return __diag_virtio_hypercall(vcpu); default: return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 4703f129e95e..302e0e52b009 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,369 +18,86 @@ #include <asm/uaccess.h> #include "kvm-s390.h" -static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, - unsigned long guestaddr) +static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, + void __user *gptr, + int prefixing) { unsigned long prefix = vcpu->arch.sie_block->prefix; - - if (guestaddr < 2 * PAGE_SIZE) - guestaddr += prefix; - else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) - guestaddr -= prefix; - - return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap); -} - -static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (unsigned long __user *) uptr); -} - -static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u32 __user *) uptr); -} - -static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); - - if (IS_ERR(uptr)) - return PTR_ERR(uptr); - - return get_user(*result, (u16 __user *) uptr); -} - -static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 *result) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return get_user(*result, (u8 __user *) uptr); -} - -static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u64 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 7); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u64 __user *) uptr); -} - -static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u32 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 3); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u32 __user *) uptr); -} - -static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u16 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - BUG_ON(guestaddr & 1); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u16 __user *) uptr); -} - -static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, - u8 value) -{ - void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - return put_user(value, (u8 __user *) uptr); -} - - -static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - int rc; - unsigned long i; - u8 *data = from; - - for (i = 0; i < n; i++) { - rc = put_guest_u8(vcpu, guestdest++, *(data++)); - if (rc < 0) - return rc; + unsigned long gaddr = (unsigned long) gptr; + unsigned long uaddr; + + if (prefixing) { + if (gaddr < 2 * PAGE_SIZE) + gaddr += prefix; + else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) + gaddr -= prefix; } - return 0; -} - -static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - int r; + uaddr = gmap_fault(gaddr, vcpu->arch.gmap); + if (IS_ERR_VALUE(uaddr)) + uaddr = -EFAULT; + return (void __user *)uaddr; +} + +#define get_guest(vcpu, x, gptr) \ +({ \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ + int __mask = sizeof(__typeof__(*(gptr))) - 1; \ + int __ret = PTR_RET((void __force *)__uptr); \ + \ + if (!__ret) { \ + BUG_ON((unsigned long)__uptr & __mask); \ + __ret = get_user(x, __uptr); \ + } \ + __ret; \ +}) + +#define put_guest(vcpu, x, gptr) \ +({ \ + __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ + int __mask = sizeof(__typeof__(*(gptr))) - 1; \ + int __ret = PTR_RET((void __force *)__uptr); \ + \ + if (!__ret) { \ + BUG_ON((unsigned long)__uptr & __mask); \ + __ret = put_user(x, __uptr); \ + } \ + __ret; \ +}) + +static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to, + unsigned long from, unsigned long len, + int to_guest, int prefixing) +{ + unsigned long _len, rc; void __user *uptr; - unsigned long size; - - if (guestdest + n < guestdest) - return -EFAULT; - - /* simple case: all within one segment table entry? */ - if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) { - uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, n); - - if (r) - r = -EFAULT; - - goto out; - } - - /* copy first segment */ - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - size = PMD_SIZE - (guestdest & ~PMD_MASK); - - r = copy_to_user(uptr, from, size); - - if (r) { - r = -EFAULT; - goto out; - } - from += size; - n -= size; - guestdest += size; - - /* copy full segments */ - while (n >= PMD_SIZE) { - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, PMD_SIZE); - - if (r) { - r = -EFAULT; - goto out; - } - from += PMD_SIZE; - n -= PMD_SIZE; - guestdest += PMD_SIZE; - } - - /* copy the tail segment */ - if (n) { - uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_to_user(uptr, from, n); - - if (r) - r = -EFAULT; - } -out: - return r; -} - -static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, - unsigned long guestdest, - void *from, unsigned long n) -{ - return __copy_to_guest_fast(vcpu, guestdest, from, n); -} - -static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, - void *from, unsigned long n) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - - if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) - goto slowpath; - - if ((guestdest < prefix) && (guestdest + n > prefix)) - goto slowpath; - - if ((guestdest < prefix + 2 * PAGE_SIZE) - && (guestdest + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestdest < 2 * PAGE_SIZE) - guestdest += prefix; - else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) - guestdest -= prefix; - - return __copy_to_guest_fast(vcpu, guestdest, from, n); -slowpath: - return __copy_to_guest_slow(vcpu, guestdest, from, n); -} - -static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - int rc; - unsigned long i; - u8 *data = to; - - for (i = 0; i < n; i++) { - rc = get_guest_u8(vcpu, guestsrc++, data++); - if (rc < 0) - return rc; + while (len) { + uptr = to_guest ? (void __user *)to : (void __user *)from; + uptr = __gptr_to_uptr(vcpu, uptr, prefixing); + if (IS_ERR((void __force *)uptr)) + return -EFAULT; + _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1)); + _len = min(_len, len); + if (to_guest) + rc = copy_to_user((void __user *) uptr, (void *)from, _len); + else + rc = copy_from_user((void *)to, (void __user *)uptr, _len); + if (rc) + return -EFAULT; + len -= _len; + from += _len; + to += _len; } return 0; } -static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - int r; - void __user *uptr; - unsigned long size; - - if (guestsrc + n < guestsrc) - return -EFAULT; - - /* simple case: all within one segment table entry? */ - if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) { - uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, n); - - if (r) - r = -EFAULT; - - goto out; - } - - /* copy first segment */ - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - size = PMD_SIZE - (guestsrc & ~PMD_MASK); - - r = copy_from_user(to, uptr, size); - - if (r) { - r = -EFAULT; - goto out; - } - to += size; - n -= size; - guestsrc += size; - - /* copy full segments */ - while (n >= PMD_SIZE) { - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, PMD_SIZE); - - if (r) { - r = -EFAULT; - goto out; - } - to += PMD_SIZE; - n -= PMD_SIZE; - guestsrc += PMD_SIZE; - } - - /* copy the tail segment */ - if (n) { - uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); - - if (IS_ERR((void __force *) uptr)) - return PTR_ERR((void __force *) uptr); - - r = copy_from_user(to, uptr, n); - - if (r) - r = -EFAULT; - } -out: - return r; -} - -static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, - unsigned long n) -{ - return __copy_from_guest_fast(vcpu, to, guestsrc, n); -} - -static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, - unsigned long guestsrc, unsigned long n) -{ - unsigned long prefix = vcpu->arch.sie_block->prefix; - - if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) - goto slowpath; +#define copy_to_guest(vcpu, to, from, size) \ + __copy_guest(vcpu, to, (unsigned long)from, size, 1, 1) +#define copy_from_guest(vcpu, to, from, size) \ + __copy_guest(vcpu, (unsigned long)to, from, size, 0, 1) +#define copy_to_guest_absolute(vcpu, to, from, size) \ + __copy_guest(vcpu, to, (unsigned long)from, size, 1, 0) +#define copy_from_guest_absolute(vcpu, to, from, size) \ + __copy_guest(vcpu, (unsigned long)to, from, size, 0, 0) - if ((guestsrc < prefix) && (guestsrc + n > prefix)) - goto slowpath; - - if ((guestsrc < prefix + 2 * PAGE_SIZE) - && (guestsrc + n > prefix + 2 * PAGE_SIZE)) - goto slowpath; - - if (guestsrc < 2 * PAGE_SIZE) - guestsrc += prefix; - else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) - guestsrc -= prefix; - - return __copy_from_guest_fast(vcpu, to, guestsrc, n); -slowpath: - return __copy_from_guest_slow(vcpu, to, guestsrc, n); -} -#endif +#endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index f26ff1e31bdb..b7d1b2edeeb3 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -43,12 +43,10 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); do { - rc = get_guest_u64(vcpu, useraddr, - &vcpu->arch.sie_block->gcr[reg]); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], + (u64 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); useraddr += 8; if (reg == reg3) break; @@ -78,11 +76,9 @@ static int handle_lctl(struct kvm_vcpu *vcpu) reg = reg1; do { - rc = get_guest_u32(vcpu, useraddr, &val); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - break; - } + rc = get_guest(vcpu, val, (u32 __user *) useraddr); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; vcpu->arch.sie_block->gcr[reg] |= val; useraddr += 4; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 37116a77cb4b..5c948177529e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -180,7 +180,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { const unsigned short table[] = { 2, 4, 4, 6 }; - int rc, exception = 0; + int rc = 0; switch (inti->type) { case KVM_S390_INT_EMERGENCY: @@ -188,74 +188,41 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->emerg.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, inti->emerg.code, + (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); break; - case KVM_S390_INT_EXTERNAL_CALL: VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); vcpu->stat.deliver_external_call++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->extcall.code, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, inti->extcall.code, + (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); break; - case KVM_S390_INT_SERVICE: VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", inti->ext.ext_params); vcpu->stat.deliver_service_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, 0); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params, + (u32 __user *)__LC_EXT_PARAMS); break; - case KVM_S390_INT_VIRTIO: VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", inti->ext.ext_params, inti->ext.ext_params2); @@ -263,34 +230,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->ext.ext_params, inti->ext.ext_params2); - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2, - inti->ext.ext_params2); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + rc |= put_guest(vcpu, inti->ext.ext_params, + (u32 __user *)__LC_EXT_PARAMS); + rc |= put_guest(vcpu, inti->ext.ext_params2, + (u64 __user *)__LC_EXT_PARAMS2); break; - case KVM_S390_SIGP_STOP: VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); vcpu->stat.deliver_stop_signal++; @@ -313,18 +263,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_restart_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0, 0); - rc = copy_to_guest(vcpu, offsetof(struct _lowcore, - restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = copy_to_guest(vcpu, + offsetof(struct _lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + offsetof(struct _lowcore, restart_psw), + sizeof(psw_t)); atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); break; - case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", inti->pgm.code, @@ -332,24 +278,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->pgm.code, 0); - rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_PGM_ILC, - table[vcpu->arch.sie_block->ipa >> 14]); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_PGM_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE); + rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14], + (u16 __user *)__LC_PGM_ILC); + rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_PGM_NEW_PSW, sizeof(psw_t)); break; case KVM_S390_MCHK: @@ -358,24 +293,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, inti->mchk.cr14, inti->mchk.mcic); - rc = kvm_s390_vcpu_store_status(vcpu, - KVM_S390_STORE_STATUS_PREFIXED); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_MCK_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE); + rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_MCK_NEW_PSW, sizeof(psw_t)); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: @@ -388,67 +312,44 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, param0, param1); - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, - inti->io.subchannel_id); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, - inti->io.subchannel_nr); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, - inti->io.io_int_parm); - if (rc == -EFAULT) - exception = 1; - - rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, - inti->io.io_int_word); - if (rc == -EFAULT) - exception = 1; - - rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_IO_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; + rc = put_guest(vcpu, inti->io.subchannel_id, + (u16 __user *) __LC_SUBCHANNEL_ID); + rc |= put_guest(vcpu, inti->io.subchannel_nr, + (u16 __user *) __LC_SUBCHANNEL_NR); + rc |= put_guest(vcpu, inti->io.io_int_parm, + (u32 __user *) __LC_IO_INT_PARM); + rc |= put_guest(vcpu, inti->io.io_int_word, + (u32 __user *) __LC_IO_INT_WORD); + rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_IO_NEW_PSW, sizeof(psw_t)); break; } default: BUG(); } - if (exception) { + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " - "delivery, killing userspace\n"); + "delivery, killing userspace\n"); do_exit(SIGKILL); } } static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) { - int rc, exception = 0; + int rc; if (psw_extint_disabled(vcpu)) return 0; if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) return 0; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); - if (rc == -EFAULT) - exception = 1; - rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, - __LC_EXT_NEW_PSW, sizeof(psw_t)); - if (rc == -EFAULT) - exception = 1; - if (exception) { + rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); + rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc) { printk("kvm: The guest lowcore is not mapped during interrupt " "delivery, killing userspace\n"); do_exit(SIGKILL); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cf35a0a79e7..c1c7c683fa26 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -142,12 +142,16 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: + case KVM_CAP_IOEVENTFD: r = 1; break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; @@ -632,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } else { VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); trace_kvm_s390_sie_fault(vcpu); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - rc = 0; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", @@ -974,22 +977,13 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { - /* A few sanity checks. We can have exactly one memory slot which has - to start at guest virtual zero and which has to be located at a - page boundary in userland and which has to end at a page boundary. - The memory in userland is ok to be fragmented into various different - vmas. It is okay to mmap() and munmap() stuff in this slot after - doing this call at any time */ - - if (mem->slot) - return -EINVAL; - - if (mem->guest_phys_addr) - return -EINVAL; + /* A few sanity checks. We can have memory slots which have to be + located/ended at a segment boundary (1MB). The memory in userland is + ok to be fragmented into various different vmas. It is okay to mmap() + and munmap() stuff in this slot after doing this call at any time */ if (mem->userspace_addr & 0xffffful) return -EINVAL; @@ -997,19 +991,26 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; - if (!user_alloc) - return -EINVAL; - return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { int rc; + /* If the basics of the memslot do not change, we do not want + * to update the gmap. Every update causes several unnecessary + * segment translation exceptions. This is usually handled just + * fine by the normal fault handler + gmap, but it will also + * cause faults on the prefix page of running guest CPUs. + */ + if (old->userspace_addr == mem->userspace_addr && + old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && + old->npages * PAGE_SIZE == mem->memory_size) + return; rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, mem->guest_phys_addr, mem->memory_size); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4d89d64a8161..efc14f687265 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt *s390int); -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); +int __must_check kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 0ef9894606e5..6bbd7b5a0bbe 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -14,6 +14,8 @@ #include <linux/kvm.h> #include <linux/gfp.h> #include <linux/errno.h> +#include <linux/compat.h> +#include <asm/asm-offsets.h> #include <asm/current.h> #include <asm/debug.h> #include <asm/ebcdic.h> @@ -35,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* get the value */ - if (get_guest_u32(vcpu, operand2, &address)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (get_guest(vcpu, address, (u32 __user *) operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); address = address & 0x7fffe000u; /* make sure that the new value is valid memory */ if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || - (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); kvm_s390_set_prefix(vcpu, address); VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 1, address); -out: return 0; } @@ -73,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu); /* must be word boundary */ - if (operand2 & 3) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); address = vcpu->arch.sie_block->prefix; address = address & 0x7fffe000u; /* get the value */ - if (put_guest_u32(vcpu, operand2, address)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, address, (u32 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 0, address); -out: return 0; } static int handle_store_cpu_address(struct kvm_vcpu *vcpu) { u64 useraddr; - int rc; vcpu->stat.instruction_stap++; useraddr = kvm_s390_get_base_disp_s(vcpu); - if (useraddr & 1) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (useraddr & 1) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); trace_kvm_s390_handle_stap(vcpu, useraddr); -out: return 0; } @@ -129,36 +112,38 @@ static int handle_skey(struct kvm_vcpu *vcpu) static int handle_tpi(struct kvm_vcpu *vcpu) { - u64 addr; struct kvm_s390_interrupt_info *inti; + u64 addr; int cc; addr = kvm_s390_get_base_disp_s(vcpu); - + if (addr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + cc = 0; inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); - if (inti) { - if (addr) { - /* - * Store the two-word I/O interruption code into the - * provided area. - */ - put_guest_u16(vcpu, addr, inti->io.subchannel_id); - put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); - put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); - } else { - /* - * Store the three-word I/O interruption code into - * the appropriate lowcore area. - */ - put_guest_u16(vcpu, 184, inti->io.subchannel_id); - put_guest_u16(vcpu, 186, inti->io.subchannel_nr); - put_guest_u32(vcpu, 188, inti->io.io_int_parm); - put_guest_u32(vcpu, 192, inti->io.io_int_word); - } - cc = 1; - } else - cc = 0; + if (!inti) + goto no_interrupt; + cc = 1; + if (addr) { + /* + * Store the two-word I/O interruption code into the + * provided area. + */ + put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); + put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); + put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); + } else { + /* + * Store the three-word I/O interruption code into + * the appropriate lowcore area. + */ + put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID); + put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR); + put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM); + put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD); + } kfree(inti); +no_interrupt: /* Set condition code and we're done. */ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; @@ -230,13 +215,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu) rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); - if (rc == -EFAULT) - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - else { - VCPU_EVENT(vcpu, 5, "store facility list value %x", - facility_list); - trace_kvm_s390_handle_stfl(vcpu, facility_list); - } + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); + trace_kvm_s390_handle_stfl(vcpu, facility_list); return 0; } @@ -249,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu) #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL -#define PSW_ADDR_24 0x00000000000fffffUL +#define PSW_ADDR_24 0x0000000000ffffffUL #define PSW_ADDR_31 0x000000007fffffffUL +static int is_valid_psw(psw_t *psw) { + if (psw->mask & PSW_MASK_UNASSIGNED) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { + if (psw->addr & ~PSW_ADDR_31) + return 0; + } + if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) + return 0; + if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) + return 0; + return 1; +} + int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) { - u64 addr; + psw_t *gpsw = &vcpu->arch.sie_block->gpsw; psw_compat_t new_psw; + u64 addr; - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OPERATION); - addr = kvm_s390_get_base_disp_s(vcpu); - - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } - - if (!(new_psw.mask & PSW32_MASK_BASE)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - vcpu->arch.sie_block->gpsw.mask = - (new_psw.mask & ~PSW32_MASK_BASE) << 32; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (!(new_psw.mask & PSW32_MASK_BASE)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; + gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; + gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; + if (!is_valid_psw(gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } static int handle_lpswe(struct kvm_vcpu *vcpu) { - u64 addr; psw_t new_psw; + u64 addr; addr = kvm_s390_get_base_disp_s(vcpu); - - if (addr & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - - if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } - - vcpu->arch.sie_block->gpsw.mask = new_psw.mask; - vcpu->arch.sie_block->gpsw.addr = new_psw.addr; - - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || - (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_BA) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || - (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && - (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || - ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == - PSW_MASK_EA)) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } - + if (addr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->arch.sie_block->gpsw = new_psw; + if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); handle_new_psw(vcpu); -out: return 0; } static int handle_stidp(struct kvm_vcpu *vcpu) { u64 operand2; - int rc; vcpu->stat.instruction_stidp++; operand2 = kvm_s390_get_base_disp_s(vcpu); - if (operand2 & 7) { - kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - goto out; - } + if (operand2 & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); - if (rc == -EFAULT) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out; - } + if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); -out: return 0; } @@ -394,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu) int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; + unsigned long mem = 0; u64 operand2; - unsigned long mem; + int rc = 0; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); @@ -414,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu) case 2: mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; if (stsi((void *) mem, fc, sel1, sel2)) - goto out_mem; + goto out_no_data; break; case 3: if (sel1 != 2 || sel2 != 2) - goto out_fail; + goto out_no_data; mem = get_zeroed_page(GFP_KERNEL); if (!mem) - goto out_fail; + goto out_no_data; handle_stsi_3_2_2(vcpu, (void *) mem); break; default: - goto out_fail; + goto out_no_data; } if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - goto out_mem; + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out_exception; } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); vcpu->run->s.regs.gprs[0] = 0; return 0; -out_mem: - free_page(mem); -out_fail: +out_no_data: /* condition code 3 */ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; - return 0; +out_exception: + free_page(mem); + return rc; } static const intercept_handler_t b2_handlers[256] = { @@ -575,20 +526,13 @@ static int handle_tprot(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) return -EOPNOTSUPP; - - /* we must resolve the address without holding the mmap semaphore. - * This is ok since the userspace hypervisor is not supposed to change - * the mapping while the guest queries the memory. Otherwise the guest - * might crash or get wrong info anyway. */ - user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); - down_read(¤t->mm->mmap_sem); + user_address = __gmap_translate(address1, vcpu->arch.gmap); + if (IS_ERR_VALUE(user_address)) + goto out_inject; vma = find_vma(current->mm, user_address); - if (!vma) { - up_read(¤t->mm->mmap_sem); - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - + if (!vma) + goto out_inject; vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); @@ -597,6 +541,10 @@ static int handle_tprot(struct kvm_vcpu *vcpu) up_read(¤t->mm->mmap_sem); return 0; + +out_inject: + up_read(¤t->mm->mmap_sem); + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 466fb3383960..50ea137a2d3c 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -89,16 +89,19 @@ static unsigned long follow_table(struct mm_struct *mm, if (unlikely(*table & _REGION_ENTRY_INV)) return -0x39UL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_REGION2: table = table + ((address >> 42) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) return -0x3aUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_REGION3: table = table + ((address >> 31) & 0x7ff); if (unlikely(*table & _REGION_ENTRY_INV)) return -0x3bUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + /* fallthrough */ case _ASCE_TYPE_SEGMENT: table = table + ((address >> 20) & 0x7ff); if (unlikely(*table & _SEGMENT_ENTRY_INV)) diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 640bea12303c..839592ca265c 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,7 +3,7 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o -obj-y += page-states.o gup.o extable.o pageattr.o +obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0b09b2342302..89ebae4008f2 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/pagemap.h> #include <linux/bootmem.h> +#include <linux/memory.h> #include <linux/pfn.h> #include <linux/poison.h> #include <linux/initrd.h> @@ -36,6 +37,7 @@ #include <asm/tlbflush.h> #include <asm/sections.h> #include <asm/ctl_reg.h> +#include <asm/sclp.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); @@ -214,6 +216,15 @@ int arch_add_memory(int nid, u64 start, u64 size) return rc; } +unsigned long memory_block_size_bytes(void) +{ + /* + * Make sure the memory block size is always greater + * or equal than the memory increment size. + */ + return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm()); +} + #ifdef CONFIG_MEMORY_HOTREMOVE int arch_remove_memory(u64 start, u64 size) { diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c new file mode 100644 index 000000000000..3cbd3b8bf311 --- /dev/null +++ b/arch/s390/mm/mem_detect.c @@ -0,0 +1,134 @@ +/* + * Copyright IBM Corp. 2008, 2009 + * + * Author(s): Heiko Carstens <[email protected]> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/ipl.h> +#include <asm/sclp.h> +#include <asm/setup.h> + +#define ADDR2G (1ULL << 31) + +static void find_memory_chunks(struct mem_chunk chunk[], unsigned long maxsize) +{ + unsigned long long memsize, rnmax, rzm; + unsigned long addr = 0, size; + int i = 0, type; + + rzm = sclp_get_rzm(); + rnmax = sclp_get_rnmax(); + memsize = rzm * rnmax; + if (!rzm) + rzm = 1ULL << 17; + if (sizeof(long) == 4) { + rzm = min(ADDR2G, rzm); + memsize = memsize ? min(ADDR2G, memsize) : ADDR2G; + } + if (maxsize) + memsize = memsize ? min((unsigned long)memsize, maxsize) : maxsize; + do { + size = 0; + type = tprot(addr); + do { + size += rzm; + if (memsize && addr + size >= memsize) + break; + } while (type == tprot(addr + size)); + if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) { + if (memsize && (addr + size > memsize)) + size = memsize - addr; + chunk[i].addr = addr; + chunk[i].size = size; + chunk[i].type = type; + i++; + } + addr += size; + } while (addr < memsize && i < MEMORY_CHUNKS); +} + +/** + * detect_memory_layout - fill mem_chunk array with memory layout data + * @chunk: mem_chunk array to be filled + * @maxsize: maximum address where memory detection should stop + * + * Fills the passed in memory chunk array with the memory layout of the + * machine. The array must have a size of at least MEMORY_CHUNKS and will + * be fully initialized afterwards. + * If the maxsize paramater has a value > 0 memory detection will stop at + * that address. It is guaranteed that all chunks have an ending address + * that is smaller than maxsize. + * If maxsize is 0 all memory will be detected. + */ +void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize) +{ + unsigned long flags, flags_dat, cr0; + + memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk)); + /* + * Disable IRQs, DAT and low address protection so tprot does the + * right thing and we don't get scheduled away with low address + * protection disabled. + */ + local_irq_save(flags); + flags_dat = __arch_local_irq_stnsm(0xfb); + /* + * In case DAT was enabled, make sure chunk doesn't reside in vmalloc + * space. We have disabled DAT and any access to vmalloc area will + * cause an exception. + * If DAT was disabled we are called from early ipl code. + */ + if (test_bit(5, &flags_dat)) { + if (WARN_ON_ONCE(is_vmalloc_or_module_addr(chunk))) + goto out; + } + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); + find_memory_chunks(chunk, maxsize); + __ctl_load(cr0, 0, 0); +out: + __arch_local_irq_ssm(flags_dat); + local_irq_restore(flags); +} +EXPORT_SYMBOL(detect_memory_layout); + +/* + * Create memory hole with given address and size. + */ +void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, + unsigned long size) +{ + int i; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + struct mem_chunk *chunk = &mem_chunk[i]; + + if (chunk->size == 0) + continue; + if (addr > chunk->addr + chunk->size) + continue; + if (addr + size <= chunk->addr) + continue; + /* Split */ + if ((addr > chunk->addr) && + (addr + size < chunk->addr + chunk->size)) { + struct mem_chunk *new = chunk + 1; + + memmove(new, chunk, (MEMORY_CHUNKS-i-1) * sizeof(*new)); + new->addr = addr + size; + new->size = chunk->addr + chunk->size - new->addr; + chunk->size = addr - chunk->addr; + continue; + } else if ((addr <= chunk->addr) && + (addr + size >= chunk->addr + chunk->size)) { + memset(chunk, 0 , sizeof(*chunk)); + } else if (addr + size < chunk->addr + chunk->size) { + chunk->size = chunk->addr + chunk->size - addr - size; + chunk->addr = addr + size; + } else if (addr > chunk->addr) { + chunk->size = addr - chunk->addr; + } + } +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index bd954e96f51c..7805ddca833d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -454,9 +454,8 @@ unsigned long gmap_translate(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_translate); -static int gmap_connect_pgtable(unsigned long segment, - unsigned long *segment_ptr, - struct gmap *gmap) +static int gmap_connect_pgtable(unsigned long address, unsigned long segment, + unsigned long *segment_ptr, struct gmap *gmap) { unsigned long vmaddr; struct vm_area_struct *vma; @@ -491,7 +490,9 @@ static int gmap_connect_pgtable(unsigned long segment, /* Link gmap segment table entry location to page table. */ page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; + rmap->gmap = gmap; rmap->entry = segment_ptr; + rmap->vmaddr = address; spin_lock(&mm->page_table_lock); if (*segment_ptr == segment) { list_add(&rmap->list, &mp->mapper); @@ -553,7 +554,7 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) if (!(segment & _SEGMENT_ENTRY_RO)) /* Nothing mapped in the gmap address space. */ break; - rc = gmap_connect_pgtable(segment, segment_ptr, gmap); + rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); if (rc) return rc; } @@ -619,6 +620,118 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_discard); +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @address: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep, entry; + pgste_t pgste; + int rc = 0; + + if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + /* Convert gmap address and connect the page tables */ + addr = __gmap_fault(start, gmap); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (get_user_pages(current, gmap->mm, addr, 1, 1, 0, + NULL, NULL) != 1) { + rc = -EFAULT; + break; + } + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + if (unlikely(!ptep)) + continue; + /* Set notification bit in the pgste of the pte */ + entry = *ptep; + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= RCP_IN_BIT; + pgste_set_unlock(ptep, pgste); + start += PAGE_SIZE; + len -= PAGE_SIZE; + } + spin_unlock(ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) +{ + unsigned long segment_offset; + struct gmap_notifier *nb; + struct gmap_pgtable *mp; + struct gmap_rmap *rmap; + struct page *page; + + segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + segment_offset = segment_offset * (4096 / sizeof(pte_t)); + page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + spin_lock(&gmap_notifier_lock); + list_for_each_entry(rmap, &mp->mapper, list) { + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(rmap->gmap, + rmap->vmaddr + segment_offset); + } + spin_unlock(&gmap_notifier_lock); +} + static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 35837054f734..8b268fcc4612 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -375,9 +375,8 @@ void __init vmem_map_init(void) ro_start = PFN_ALIGN((unsigned long)&_stext); ro_end = (unsigned long)&_eshared & PAGE_MASK; - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type == CHUNK_CRASHK || - memory_chunk[i].type == CHUNK_OLDMEM) + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (!memory_chunk[i].size) continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; @@ -412,9 +411,6 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; - if (memory_chunk[i].type == CHUNK_CRASHK || - memory_chunk[i].type == CHUNK_OLDMEM) - continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); diff --git a/arch/sh/drivers/dma/dma-api.c b/arch/sh/drivers/dma/dma-api.c index 851e5106e580..c0eec08d8f95 100644 --- a/arch/sh/drivers/dma/dma-api.c +++ b/arch/sh/drivers/dma/dma-api.c @@ -348,7 +348,7 @@ static const struct file_operations dma_proc_fops = { .open = dma_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; int register_dmac(struct dma_info *info) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index f5041d741dea..a639c0d07b8b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -99,6 +99,9 @@ config HAVE_LATENCYTOP_SUPPORT bool default y if SPARC64 +config ARCH_HIBERNATION_POSSIBLE + def_bool y if SPARC64 + config AUDIT_ARCH bool default y @@ -303,6 +306,10 @@ config ARCH_SPARSEMEM_DEFAULT source "mm/Kconfig" +if SPARC64 +source "kernel/power/Kconfig" +endif + config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on SPARC64 && SMP @@ -472,7 +479,18 @@ config LEON_PCI depends on PCI && SPARC_LEON default y -config GRPCI2 +config SPARC_GRPCI1 + bool "GRPCI Host Bridge Support" + depends on LEON_PCI + default y + help + Say Y here to include the GRPCI Host Bridge Driver. The GRPCI + PCI host controller is typically found in GRLIB SPARC32/LEON + systems. The driver has one property (all_pci_errors) controlled + from the bootloader that makes the GRPCI to generate interrupts + on detected PCI Parity and System errors. + +config SPARC_GRPCI2 bool "GRPCI2 Host Bridge Support" depends on LEON_PCI default y diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 541b8b075c7d..9ff423678cbc 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -57,6 +57,7 @@ core-y += arch/sparc/ libs-y += arch/sparc/prom/ libs-y += arch/sparc/lib/ +drivers-$(CONFIG_PM) += arch/sparc/power/ drivers-$(CONFIG_OPROFILE) += arch/sparc/oprofile/ boot := arch/sparc/boot diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index b30eb37294c5..4adefe8e2885 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -141,5 +141,6 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg_local((ptr), (o), (n)); \ }) +#define cmpxchg64(ptr, o, n) cmpxchg64_local((ptr), (o), (n)) #endif /* __ARCH_SPARC64_CMPXCHG__ */ diff --git a/arch/sparc/include/asm/head_32.h b/arch/sparc/include/asm/head_32.h index a76874838f61..5f1dbe315bc8 100644 --- a/arch/sparc/include/asm/head_32.h +++ b/arch/sparc/include/asm/head_32.h @@ -55,15 +55,15 @@ /* The Get Condition Codes software trap for userland. */ #define GETCC_TRAP \ - b getcc_trap_handler; mov %psr, %l0; nop; nop; + b getcc_trap_handler; rd %psr, %l0; nop; nop; /* The Set Condition Codes software trap for userland. */ #define SETCC_TRAP \ - b setcc_trap_handler; mov %psr, %l0; nop; nop; + b setcc_trap_handler; rd %psr, %l0; nop; nop; /* The Get PSR software trap for userland. */ #define GETPSR_TRAP \ - mov %psr, %i0; jmp %l2; rett %l2 + 4; nop; + rd %psr, %i0; jmp %l2; rett %l2 + 4; nop; /* This is for hard interrupts from level 1-14, 15 is non-maskable (nmi) and * gets handled with another macro. diff --git a/arch/sparc/include/asm/hibernate.h b/arch/sparc/include/asm/hibernate.h new file mode 100644 index 000000000000..2ec34f842249 --- /dev/null +++ b/arch/sparc/include/asm/hibernate.h @@ -0,0 +1,23 @@ +/* + * hibernate.h: Hibernaton support specific for sparc64. + * + * Copyright (C) 2013 Kirill V Tkhai ([email protected]) + */ + +#ifndef ___SPARC_HIBERNATE_H +#define ___SPARC_HIBERNATE_H + +struct saved_context { + unsigned long fp; + unsigned long cwp; + unsigned long wstate; + + unsigned long tick; + unsigned long pstate; + + unsigned long g4; + unsigned long g5; + unsigned long g6; +}; + +#endif diff --git a/arch/sparc/include/asm/leon_pci.h b/arch/sparc/include/asm/leon_pci.h index f48527ebdd8f..bfd3ab3092b5 100644 --- a/arch/sparc/include/asm/leon_pci.h +++ b/arch/sparc/include/asm/leon_pci.h @@ -12,6 +12,7 @@ struct leon_pci_info { struct pci_ops *ops; struct resource io_space; struct resource mem_space; + struct resource busn; int (*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin); }; diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 9191ca62ed9c..3d528f06e4b0 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -68,7 +68,7 @@ extern void smp_tsb_sync(struct mm_struct *mm); extern void __flush_tlb_mm(unsigned long, unsigned long); -/* Switch the current MM context. Interrupts are disabled. */ +/* Switch the current MM context. */ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { unsigned long ctx_valid, flags; diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index cce72ce4c334..4c3f7f01c709 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -18,9 +18,6 @@ #include <asm/ptrace.h> #include <asm/page.h> -/* Don't hold the runqueue lock over context switch */ -#define __ARCH_WANT_UNLOCKED_CTXSW - /* The sparc has no problems with write protection */ #define wp_works_ok 1 #define wp_works_ok__is_a_macro /* for versions in ksyms.c */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 5276fd4e9d03..d432fb20358e 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -74,7 +74,8 @@ obj-y += dma.o obj-$(CONFIG_PCIC_PCI) += pcic.o obj-$(CONFIG_LEON_PCI) += leon_pci.o -obj-$(CONFIG_GRPCI2) += leon_pci_grpci2.o +obj-$(CONFIG_SPARC_GRPCI2)+= leon_pci_grpci2.o +obj-$(CONFIG_SPARC_GRPCI1)+= leon_pci_grpci1.o obj-$(CONFIG_SMP) += trampoline_$(BITS).o smp_$(BITS).o obj-$(CONFIG_SPARC32_SMP) += sun4m_smp.o sun4d_smp.o leon_smp.o diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c index 68f7e1118e9b..961b87f99e69 100644 --- a/arch/sparc/kernel/asm-offsets.c +++ b/arch/sparc/kernel/asm-offsets.c @@ -14,6 +14,8 @@ // #include <linux/mm.h> #include <linux/kbuild.h> +#include <asm/hibernate.h> + #ifdef CONFIG_SPARC32 int sparc32_foo(void) { @@ -24,6 +26,19 @@ int sparc32_foo(void) #else int sparc64_foo(void) { +#ifdef CONFIG_HIBERNATION + BLANK(); + OFFSET(SC_REG_FP, saved_context, fp); + OFFSET(SC_REG_CWP, saved_context, cwp); + OFFSET(SC_REG_WSTATE, saved_context, wstate); + + OFFSET(SC_REG_TICK, saved_context, tick); + OFFSET(SC_REG_PSTATE, saved_context, pstate); + + OFFSET(SC_REG_G4, saved_context, g4); + OFFSET(SC_REG_G5, saved_context, g5); + OFFSET(SC_REG_G6, saved_context, g6); +#endif return 0; } #endif diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 87f60ee65433..7c0231dabe44 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -213,6 +213,7 @@ unsigned int leon_build_device_irq(unsigned int real_irq, { unsigned int irq; unsigned long mask; + struct irq_desc *desc; irq = 0; mask = leon_get_irqmask(real_irq); @@ -226,9 +227,12 @@ unsigned int leon_build_device_irq(unsigned int real_irq, if (do_ack) mask |= LEON_DO_ACK_HW; - irq_set_chip_and_handler_name(irq, &leon_irq, - flow_handler, name); - irq_set_chip_data(irq, (void *)mask); + desc = irq_to_desc(irq); + if (!desc || !desc->handle_irq || desc->handle_irq == handle_bad_irq) { + irq_set_chip_and_handler_name(irq, &leon_irq, + flow_handler, name); + irq_set_chip_data(irq, (void *)mask); + } out: return irq; diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 852dc8430528..88aaaa57bb64 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -29,6 +29,8 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) pci_add_resource_offset(&resources, &info->io_space, info->io_space.start - 0x1000); pci_add_resource(&resources, &info->mem_space); + info->busn.flags = IORESOURCE_BUS; + pci_add_resource(&resources, &info->busn); root_bus = pci_scan_root_bus(&ofdev->dev, 0, info->ops, info, &resources); diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c new file mode 100644 index 000000000000..7739a54315e2 --- /dev/null +++ b/arch/sparc/kernel/leon_pci_grpci1.c @@ -0,0 +1,724 @@ +/* + * leon_pci_grpci1.c: GRPCI1 Host PCI driver + * + * Copyright (C) 2013 Aeroflex Gaisler AB + * + * This GRPCI1 driver does not support PCI interrupts taken from + * GPIO pins. Interrupt generation at PCI parity and system error + * detection is by default turned off since some GRPCI1 cores does + * not support detection. It can be turned on from the bootloader + * using the all_pci_errors property. + * + * Contributors: Daniel Hellstrom <[email protected]> + */ + +#include <linux/of_device.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/of_irq.h> +#include <linux/delay.h> +#include <linux/pci.h> + +#include <asm/leon_pci.h> +#include <asm/sections.h> +#include <asm/vaddrs.h> +#include <asm/leon.h> +#include <asm/io.h> + +#include "irq.h" + +/* Enable/Disable Debugging Configuration Space Access */ +#undef GRPCI1_DEBUG_CFGACCESS + +/* + * GRPCI1 APB Register MAP + */ +struct grpci1_regs { + unsigned int cfg_stat; /* 0x00 Configuration / Status */ + unsigned int bar0; /* 0x04 BAR0 (RO) */ + unsigned int page0; /* 0x08 PAGE0 (RO) */ + unsigned int bar1; /* 0x0C BAR1 (RO) */ + unsigned int page1; /* 0x10 PAGE1 */ + unsigned int iomap; /* 0x14 IO Map */ + unsigned int stat_cmd; /* 0x18 PCI Status & Command (RO) */ + unsigned int irq; /* 0x1C Interrupt register */ +}; + +#define REGLOAD(a) (be32_to_cpu(__raw_readl(&(a)))) +#define REGSTORE(a, v) (__raw_writel(cpu_to_be32(v), &(a))) + +#define PAGE0_BTEN_BIT 0 +#define PAGE0_BTEN (1 << PAGE0_BTEN_BIT) + +#define CFGSTAT_HOST_BIT 13 +#define CFGSTAT_CTO_BIT 8 +#define CFGSTAT_HOST (1 << CFGSTAT_HOST_BIT) +#define CFGSTAT_CTO (1 << CFGSTAT_CTO_BIT) + +#define IRQ_DPE (1 << 9) +#define IRQ_SSE (1 << 8) +#define IRQ_RMA (1 << 7) +#define IRQ_RTA (1 << 6) +#define IRQ_STA (1 << 5) +#define IRQ_DPED (1 << 4) +#define IRQ_INTD (1 << 3) +#define IRQ_INTC (1 << 2) +#define IRQ_INTB (1 << 1) +#define IRQ_INTA (1 << 0) +#define IRQ_DEF_ERRORS (IRQ_RMA | IRQ_RTA | IRQ_STA) +#define IRQ_ALL_ERRORS (IRQ_DPED | IRQ_DEF_ERRORS | IRQ_SSE | IRQ_DPE) +#define IRQ_INTX (IRQ_INTA | IRQ_INTB | IRQ_INTC | IRQ_INTD) +#define IRQ_MASK_BIT 16 + +#define DEF_PCI_ERRORS (PCI_STATUS_SIG_TARGET_ABORT | \ + PCI_STATUS_REC_TARGET_ABORT | \ + PCI_STATUS_REC_MASTER_ABORT) +#define ALL_PCI_ERRORS (PCI_STATUS_PARITY | PCI_STATUS_DETECTED_PARITY | \ + PCI_STATUS_SIG_SYSTEM_ERROR | DEF_PCI_ERRORS) + +#define TGT 256 + +struct grpci1_priv { + struct leon_pci_info info; /* must be on top of this structure */ + struct grpci1_regs *regs; /* GRPCI register map */ + struct device *dev; + int pci_err_mask; /* STATUS register error mask */ + int irq; /* LEON irqctrl GRPCI IRQ */ + unsigned char irq_map[4]; /* GRPCI nexus PCI INTX# IRQs */ + unsigned int irq_err; /* GRPCI nexus Virt Error IRQ */ + + /* AHB PCI Windows */ + unsigned long pci_area; /* MEMORY */ + unsigned long pci_area_end; + unsigned long pci_io; /* I/O */ + unsigned long pci_conf; /* CONFIGURATION */ + unsigned long pci_conf_end; + unsigned long pci_io_va; +}; + +static struct grpci1_priv *grpci1priv; + +static int grpci1_cfg_w32(struct grpci1_priv *priv, unsigned int bus, + unsigned int devfn, int where, u32 val); + +int grpci1_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + struct grpci1_priv *priv = dev->bus->sysdata; + int irq_group; + + /* Use default IRQ decoding on PCI BUS0 according slot numbering */ + irq_group = slot & 0x3; + pin = ((pin - 1) + irq_group) & 0x3; + + return priv->irq_map[pin]; +} + +static int grpci1_cfg_r32(struct grpci1_priv *priv, unsigned int bus, + unsigned int devfn, int where, u32 *val) +{ + u32 *pci_conf, tmp, cfg; + + if (where & 0x3) + return -EINVAL; + + if (bus == 0) { + devfn += (0x8 * 6); /* start at AD16=Device0 */ + } else if (bus == TGT) { + bus = 0; + devfn = 0; /* special case: bridge controller itself */ + } + + /* Select bus */ + cfg = REGLOAD(priv->regs->cfg_stat); + REGSTORE(priv->regs->cfg_stat, (cfg & ~(0xf << 23)) | (bus << 23)); + + /* do read access */ + pci_conf = (u32 *) (priv->pci_conf | (devfn << 8) | (where & 0xfc)); + tmp = LEON3_BYPASS_LOAD_PA(pci_conf); + + /* check if master abort was received */ + if (REGLOAD(priv->regs->cfg_stat) & CFGSTAT_CTO) { + *val = 0xffffffff; + /* Clear Master abort bit in PCI cfg space (is set) */ + tmp = REGLOAD(priv->regs->stat_cmd); + grpci1_cfg_w32(priv, TGT, 0, PCI_COMMAND, tmp); + } else { + /* Bus always little endian (unaffected by byte-swapping) */ + *val = flip_dword(tmp); + } + + return 0; +} + +static int grpci1_cfg_r16(struct grpci1_priv *priv, unsigned int bus, + unsigned int devfn, int where, u32 *val) +{ + u32 v; + int ret; + + if (where & 0x1) + return -EINVAL; + ret = grpci1_cfg_r32(priv, bus, devfn, where & ~0x3, &v); + *val = 0xffff & (v >> (8 * (where & 0x3))); + return ret; +} + +static int grpci1_cfg_r8(struct grpci1_priv *priv, unsigned int bus, + unsigned int devfn, int where, u32 *val) +{ + u32 v; + int ret; + + ret = grpci1_cfg_r32(priv, bus, devfn, where & ~0x3, &v); + *val = 0xff & (v >> (8 * (where & 3))); + + return ret; +} + +static int grpci1_cfg_w32(struct grpci1_priv *priv, unsigned int bus, + unsigned int devfn, int where, u32 val) +{ + unsigned int *pci_conf; + u32 cfg; + + if (where & 0x3) + return -EINVAL; + + if (bus == 0) { + devfn += (0x8 * 6); /* start at AD16=Device0 */ + } else if (bus == TGT) { + bus = 0; + devfn = 0; /* special case: bridge controller itself */ + } + + /* Select bus */ + cfg = REGLOAD(priv->regs->cfg_stat); + REGSTORE(priv->regs->cfg_stat, (cfg & ~(0xf << 23)) | (bus << 23)); + + pci_conf = (unsigned int *) (priv->pci_conf | + (devfn << 8) | (where & 0xfc)); + LEON3_BYPASS_STORE_PA(pci_conf, flip_dword(val)); + + return 0; +} + +static int grpci1_cfg_w16(struct grpci1_priv *priv, unsigned int bus, + unsigned int devfn, int where, u32 val) +{ + int ret; + u32 v; + + if (where & 0x1) + return -EINVAL; + ret = grpci1_cfg_r32(priv, bus, devfn, where&~3, &v); + if (ret) + return ret; + v = (v & ~(0xffff << (8 * (where & 0x3)))) | + ((0xffff & val) << (8 * (where & 0x3))); + return grpci1_cfg_w32(priv, bus, devfn, where & ~0x3, v); +} + +static int grpci1_cfg_w8(struct grpci1_priv *priv, unsigned int bus, + unsigned int devfn, int where, u32 val) +{ + int ret; + u32 v; + + ret = grpci1_cfg_r32(priv, bus, devfn, where & ~0x3, &v); + if (ret != 0) + return ret; + v = (v & ~(0xff << (8 * (where & 0x3)))) | + ((0xff & val) << (8 * (where & 0x3))); + return grpci1_cfg_w32(priv, bus, devfn, where & ~0x3, v); +} + +/* Read from Configuration Space. When entering here the PCI layer has taken + * the pci_lock spinlock and IRQ is off. + */ +static int grpci1_read_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + struct grpci1_priv *priv = grpci1priv; + unsigned int busno = bus->number; + int ret; + + if (PCI_SLOT(devfn) > 15 || busno > 15) { + *val = ~0; + return 0; + } + + switch (size) { + case 1: + ret = grpci1_cfg_r8(priv, busno, devfn, where, val); + break; + case 2: + ret = grpci1_cfg_r16(priv, busno, devfn, where, val); + break; + case 4: + ret = grpci1_cfg_r32(priv, busno, devfn, where, val); + break; + default: + ret = -EINVAL; + break; + } + +#ifdef GRPCI1_DEBUG_CFGACCESS + printk(KERN_INFO + "grpci1_read_config: [%02x:%02x:%x] ofs=%d val=%x size=%d\n", + busno, PCI_SLOT(devfn), PCI_FUNC(devfn), where, *val, size); +#endif + + return ret; +} + +/* Write to Configuration Space. When entering here the PCI layer has taken + * the pci_lock spinlock and IRQ is off. + */ +static int grpci1_write_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct grpci1_priv *priv = grpci1priv; + unsigned int busno = bus->number; + + if (PCI_SLOT(devfn) > 15 || busno > 15) + return 0; + +#ifdef GRPCI1_DEBUG_CFGACCESS + printk(KERN_INFO + "grpci1_write_config: [%02x:%02x:%x] ofs=%d size=%d val=%x\n", + busno, PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); +#endif + + switch (size) { + default: + return -EINVAL; + case 1: + return grpci1_cfg_w8(priv, busno, devfn, where, val); + case 2: + return grpci1_cfg_w16(priv, busno, devfn, where, val); + case 4: + return grpci1_cfg_w32(priv, busno, devfn, where, val); + } +} + +static struct pci_ops grpci1_ops = { + .read = grpci1_read_config, + .write = grpci1_write_config, +}; + +/* GENIRQ IRQ chip implementation for grpci1 irqmode=0..2. In configuration + * 3 where all PCI Interrupts has a separate IRQ on the system IRQ controller + * this is not needed and the standard IRQ controller can be used. + */ + +static void grpci1_mask_irq(struct irq_data *data) +{ + u32 irqidx; + struct grpci1_priv *priv = grpci1priv; + + irqidx = (u32)data->chip_data - 1; + if (irqidx > 3) /* only mask PCI interrupts here */ + return; + irqidx += IRQ_MASK_BIT; + + REGSTORE(priv->regs->irq, REGLOAD(priv->regs->irq) & ~(1 << irqidx)); +} + +static void grpci1_unmask_irq(struct irq_data *data) +{ + u32 irqidx; + struct grpci1_priv *priv = grpci1priv; + + irqidx = (u32)data->chip_data - 1; + if (irqidx > 3) /* only unmask PCI interrupts here */ + return; + irqidx += IRQ_MASK_BIT; + + REGSTORE(priv->regs->irq, REGLOAD(priv->regs->irq) | (1 << irqidx)); +} + +static unsigned int grpci1_startup_irq(struct irq_data *data) +{ + grpci1_unmask_irq(data); + return 0; +} + +static void grpci1_shutdown_irq(struct irq_data *data) +{ + grpci1_mask_irq(data); +} + +static struct irq_chip grpci1_irq = { + .name = "grpci1", + .irq_startup = grpci1_startup_irq, + .irq_shutdown = grpci1_shutdown_irq, + .irq_mask = grpci1_mask_irq, + .irq_unmask = grpci1_unmask_irq, +}; + +/* Handle one or multiple IRQs from the PCI core */ +static void grpci1_pci_flow_irq(unsigned int irq, struct irq_desc *desc) +{ + struct grpci1_priv *priv = grpci1priv; + int i, ack = 0; + unsigned int irqreg; + + irqreg = REGLOAD(priv->regs->irq); + irqreg = (irqreg >> IRQ_MASK_BIT) & irqreg; + + /* Error Interrupt? */ + if (irqreg & IRQ_ALL_ERRORS) { + generic_handle_irq(priv->irq_err); + ack = 1; + } + + /* PCI Interrupt? */ + if (irqreg & IRQ_INTX) { + /* Call respective PCI Interrupt handler */ + for (i = 0; i < 4; i++) { + if (irqreg & (1 << i)) + generic_handle_irq(priv->irq_map[i]); + } + ack = 1; + } + + /* + * Call "first level" IRQ chip end-of-irq handler. It will ACK LEON IRQ + * Controller, this must be done after IRQ sources have been handled to + * avoid double IRQ generation + */ + if (ack) + desc->irq_data.chip->irq_eoi(&desc->irq_data); +} + +/* Create a virtual IRQ */ +static unsigned int grpci1_build_device_irq(unsigned int irq) +{ + unsigned int virq = 0, pil; + + pil = 1 << 8; + virq = irq_alloc(irq, pil); + if (virq == 0) + goto out; + + irq_set_chip_and_handler_name(virq, &grpci1_irq, handle_simple_irq, + "pcilvl"); + irq_set_chip_data(virq, (void *)irq); + +out: + return virq; +} + +/* + * Initialize mappings AMBA<->PCI, clear IRQ state, setup PCI interface + * + * Target BARs: + * BAR0: unused in this implementation + * BAR1: peripheral DMA to host's memory (size at least 256MByte) + * BAR2..BAR5: not implemented in hardware + */ +void grpci1_hw_init(struct grpci1_priv *priv) +{ + u32 ahbadr, bar_sz, data, pciadr; + struct grpci1_regs *regs = priv->regs; + + /* set 1:1 mapping between AHB -> PCI memory space */ + REGSTORE(regs->cfg_stat, priv->pci_area & 0xf0000000); + + /* map PCI accesses to target BAR1 to Linux kernel memory 1:1 */ + ahbadr = 0xf0000000 & (u32)__pa(PAGE_ALIGN((unsigned long) &_end)); + REGSTORE(regs->page1, ahbadr); + + /* translate I/O accesses to 0, I/O Space always @ PCI low 64Kbytes */ + REGSTORE(regs->iomap, REGLOAD(regs->iomap) & 0x0000ffff); + + /* disable and clear pending interrupts */ + REGSTORE(regs->irq, 0); + + /* Setup BAR0 outside access range so that it does not conflict with + * peripheral DMA. There is no need to set up the PAGE0 register. + */ + grpci1_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_0, 0xffffffff); + grpci1_cfg_r32(priv, TGT, 0, PCI_BASE_ADDRESS_0, &bar_sz); + bar_sz = ~bar_sz + 1; + pciadr = priv->pci_area - bar_sz; + grpci1_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_0, pciadr); + + /* + * Setup the Host's PCI Target BAR1 for other peripherals to access, + * and do DMA to the host's memory. + */ + grpci1_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_1, ahbadr); + + /* + * Setup Latency Timer and cache line size. Default cache line + * size will result in poor performance (256 word fetches), 0xff + * will set it according to the max size of the PCI FIFO. + */ + grpci1_cfg_w8(priv, TGT, 0, PCI_CACHE_LINE_SIZE, 0xff); + grpci1_cfg_w8(priv, TGT, 0, PCI_LATENCY_TIMER, 0x40); + + /* set as bus master, enable pci memory responses, clear status bits */ + grpci1_cfg_r32(priv, TGT, 0, PCI_COMMAND, &data); + data |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + grpci1_cfg_w32(priv, TGT, 0, PCI_COMMAND, data); +} + +static irqreturn_t grpci1_jump_interrupt(int irq, void *arg) +{ + struct grpci1_priv *priv = arg; + dev_err(priv->dev, "Jump IRQ happened\n"); + return IRQ_NONE; +} + +/* Handle GRPCI1 Error Interrupt */ +static irqreturn_t grpci1_err_interrupt(int irq, void *arg) +{ + struct grpci1_priv *priv = arg; + u32 status; + + grpci1_cfg_r16(priv, TGT, 0, PCI_STATUS, &status); + status &= priv->pci_err_mask; + + if (status == 0) + return IRQ_NONE; + + if (status & PCI_STATUS_PARITY) + dev_err(priv->dev, "Data Parity Error\n"); + + if (status & PCI_STATUS_SIG_TARGET_ABORT) + dev_err(priv->dev, "Signalled Target Abort\n"); + + if (status & PCI_STATUS_REC_TARGET_ABORT) + dev_err(priv->dev, "Received Target Abort\n"); + + if (status & PCI_STATUS_REC_MASTER_ABORT) + dev_err(priv->dev, "Received Master Abort\n"); + + if (status & PCI_STATUS_SIG_SYSTEM_ERROR) + dev_err(priv->dev, "Signalled System Error\n"); + + if (status & PCI_STATUS_DETECTED_PARITY) + dev_err(priv->dev, "Parity Error\n"); + + /* Clear handled INT TYPE IRQs */ + grpci1_cfg_w16(priv, TGT, 0, PCI_STATUS, status); + + return IRQ_HANDLED; +} + +static int grpci1_of_probe(struct platform_device *ofdev) +{ + struct grpci1_regs *regs; + struct grpci1_priv *priv; + int err, len; + const int *tmp; + u32 cfg, size, err_mask; + struct resource *res; + + if (grpci1priv) { + dev_err(&ofdev->dev, "only one GRPCI1 supported\n"); + return -ENODEV; + } + + if (ofdev->num_resources < 3) { + dev_err(&ofdev->dev, "not enough APB/AHB resources\n"); + return -EIO; + } + + priv = devm_kzalloc(&ofdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + dev_err(&ofdev->dev, "memory allocation failed\n"); + return -ENOMEM; + } + platform_set_drvdata(ofdev, priv); + priv->dev = &ofdev->dev; + + /* find device register base address */ + res = platform_get_resource(ofdev, IORESOURCE_MEM, 0); + regs = devm_request_and_ioremap(&ofdev->dev, res); + if (!regs) { + dev_err(&ofdev->dev, "io-regs mapping failed\n"); + return -EADDRNOTAVAIL; + } + + /* + * check that we're in Host Slot and that we can act as a Host Bridge + * and not only as target/peripheral. + */ + cfg = REGLOAD(regs->cfg_stat); + if ((cfg & CFGSTAT_HOST) == 0) { + dev_err(&ofdev->dev, "not in host system slot\n"); + return -EIO; + } + + /* check that BAR1 support 256 MByte so that we can map kernel space */ + REGSTORE(regs->page1, 0xffffffff); + size = ~REGLOAD(regs->page1) + 1; + if (size < 0x10000000) { + dev_err(&ofdev->dev, "BAR1 must be at least 256MByte\n"); + return -EIO; + } + + /* hardware must support little-endian PCI (byte-twisting) */ + if ((REGLOAD(regs->page0) & PAGE0_BTEN) == 0) { + dev_err(&ofdev->dev, "byte-twisting is required\n"); + return -EIO; + } + + priv->regs = regs; + priv->irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); + dev_info(&ofdev->dev, "host found at 0x%p, irq%d\n", regs, priv->irq); + + /* Find PCI Memory, I/O and Configuration Space Windows */ + priv->pci_area = ofdev->resource[1].start; + priv->pci_area_end = ofdev->resource[1].end+1; + priv->pci_io = ofdev->resource[2].start; + priv->pci_conf = ofdev->resource[2].start + 0x10000; + priv->pci_conf_end = priv->pci_conf + 0x10000; + priv->pci_io_va = (unsigned long)ioremap(priv->pci_io, 0x10000); + if (!priv->pci_io_va) { + dev_err(&ofdev->dev, "unable to map PCI I/O area\n"); + return -EIO; + } + + printk(KERN_INFO + "GRPCI1: MEMORY SPACE [0x%08lx - 0x%08lx]\n" + " I/O SPACE [0x%08lx - 0x%08lx]\n" + " CONFIG SPACE [0x%08lx - 0x%08lx]\n", + priv->pci_area, priv->pci_area_end-1, + priv->pci_io, priv->pci_conf-1, + priv->pci_conf, priv->pci_conf_end-1); + + /* + * I/O Space resources in I/O Window mapped into Virtual Adr Space + * We never use low 4KB because some devices seem have problems using + * address 0. + */ + priv->info.io_space.name = "GRPCI1 PCI I/O Space"; + priv->info.io_space.start = priv->pci_io_va + 0x1000; + priv->info.io_space.end = priv->pci_io_va + 0x10000 - 1; + priv->info.io_space.flags = IORESOURCE_IO; + + /* + * grpci1 has no prefetchable memory, map everything as + * non-prefetchable memory + */ + priv->info.mem_space.name = "GRPCI1 PCI MEM Space"; + priv->info.mem_space.start = priv->pci_area; + priv->info.mem_space.end = priv->pci_area_end - 1; + priv->info.mem_space.flags = IORESOURCE_MEM; + + if (request_resource(&iomem_resource, &priv->info.mem_space) < 0) { + dev_err(&ofdev->dev, "unable to request PCI memory area\n"); + err = -ENOMEM; + goto err1; + } + + if (request_resource(&ioport_resource, &priv->info.io_space) < 0) { + dev_err(&ofdev->dev, "unable to request PCI I/O area\n"); + err = -ENOMEM; + goto err2; + } + + /* setup maximum supported PCI buses */ + priv->info.busn.name = "GRPCI1 busn"; + priv->info.busn.start = 0; + priv->info.busn.end = 15; + + grpci1priv = priv; + + /* Initialize hardware */ + grpci1_hw_init(priv); + + /* + * Get PCI Interrupt to System IRQ mapping and setup IRQ handling + * Error IRQ. All PCI and PCI-Error interrupts are shared using the + * same system IRQ. + */ + leon_update_virq_handling(priv->irq, grpci1_pci_flow_irq, "pcilvl", 0); + + priv->irq_map[0] = grpci1_build_device_irq(1); + priv->irq_map[1] = grpci1_build_device_irq(2); + priv->irq_map[2] = grpci1_build_device_irq(3); + priv->irq_map[3] = grpci1_build_device_irq(4); + priv->irq_err = grpci1_build_device_irq(5); + + printk(KERN_INFO " PCI INTA..D#: IRQ%d, IRQ%d, IRQ%d, IRQ%d\n", + priv->irq_map[0], priv->irq_map[1], priv->irq_map[2], + priv->irq_map[3]); + + /* Enable IRQs on LEON IRQ controller */ + err = devm_request_irq(&ofdev->dev, priv->irq, grpci1_jump_interrupt, 0, + "GRPCI1_JUMP", priv); + if (err) { + dev_err(&ofdev->dev, "ERR IRQ request failed: %d\n", err); + goto err3; + } + + /* Setup IRQ handler for access errors */ + err = devm_request_irq(&ofdev->dev, priv->irq_err, + grpci1_err_interrupt, IRQF_SHARED, "GRPCI1_ERR", + priv); + if (err) { + dev_err(&ofdev->dev, "ERR VIRQ request failed: %d\n", err); + goto err3; + } + + tmp = of_get_property(ofdev->dev.of_node, "all_pci_errors", &len); + if (tmp && (len == 4)) { + priv->pci_err_mask = ALL_PCI_ERRORS; + err_mask = IRQ_ALL_ERRORS << IRQ_MASK_BIT; + } else { + priv->pci_err_mask = DEF_PCI_ERRORS; + err_mask = IRQ_DEF_ERRORS << IRQ_MASK_BIT; + } + + /* + * Enable Error Interrupts. PCI interrupts are unmasked once request_irq + * is called by the PCI Device drivers + */ + REGSTORE(regs->irq, err_mask); + + /* Init common layer and scan buses */ + priv->info.ops = &grpci1_ops; + priv->info.map_irq = grpci1_map_irq; + leon_pci_init(ofdev, &priv->info); + + return 0; + +err3: + release_resource(&priv->info.io_space); +err2: + release_resource(&priv->info.mem_space); +err1: + iounmap((void *)priv->pci_io_va); + grpci1priv = NULL; + return err; +} + +static struct of_device_id grpci1_of_match[] = { + { + .name = "GAISLER_PCIFBRG", + }, + { + .name = "01_014", + }, + {}, +}; + +static struct platform_driver grpci1_of_driver = { + .driver = { + .name = "grpci1", + .owner = THIS_MODULE, + .of_match_table = grpci1_of_match, + }, + .probe = grpci1_of_probe, +}; + +static int __init grpci1_init(void) +{ + return platform_driver_register(&grpci1_of_driver); +} + +subsys_initcall(grpci1_init); diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index 4d1487138d26..5f0402aab7fb 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -799,6 +799,11 @@ static int grpci2_of_probe(struct platform_device *ofdev) if (request_resource(&ioport_resource, &priv->info.io_space) < 0) goto err4; + /* setup maximum supported PCI buses */ + priv->info.busn.name = "GRPCI2 busn"; + priv->info.busn.start = 0; + priv->info.busn.end = 255; + grpci2_hw_init(priv); /* diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c index 708bca435219..bdf53d9a8d46 100644 --- a/arch/sparc/kernel/leon_pmc.c +++ b/arch/sparc/kernel/leon_pmc.c @@ -48,7 +48,7 @@ void pmc_leon_idle_fixup(void) */ register unsigned int address = (unsigned int)leon3_irqctrl_regs; __asm__ __volatile__ ( - "mov %%g0, %%asr19\n" + "wr %%g0, %%asr19\n" "lda [%0] %1, %%g0\n" : : "r"(address), "i"(ASI_LEON_BYPASS)); @@ -61,7 +61,7 @@ void pmc_leon_idle_fixup(void) void pmc_leon_idle(void) { /* For systems without power-down, this will be no-op */ - __asm__ __volatile__ ("mov %g0, %asr19\n\t"); + __asm__ __volatile__ ("wr %g0, %asr19\n\t"); } /* Install LEON Power Down function */ diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index 3e244f31e56b..8647fcc5ca6c 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -342,6 +342,7 @@ static void vio_remove(struct mdesc_handle *hp, u64 node) printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev)); device_unregister(dev); + put_device(dev); } } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 6ac99d64a13c..cf72a8a5b3aa 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -681,10 +681,9 @@ void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; unsigned long orig_pgsz_bits; - unsigned long flags; int new_version; - spin_lock_irqsave(&ctx_alloc_lock, flags); + spin_lock(&ctx_alloc_lock); orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); @@ -720,7 +719,7 @@ void get_new_mmu_context(struct mm_struct *mm) out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; - spin_unlock_irqrestore(&ctx_alloc_lock, flags); + spin_unlock(&ctx_alloc_lock); if (unlikely(new_version)) smp_new_mmu_context_version(); @@ -2125,7 +2124,6 @@ void free_initmem(void) ClearPageReserved(p); init_page_count(p); __free_page(p); - num_physpages++; totalram_pages++; } } @@ -2142,7 +2140,6 @@ void free_initrd_mem(unsigned long start, unsigned long end) ClearPageReserved(p); init_page_count(p); __free_page(p); - num_physpages++; totalram_pages++; } } diff --git a/arch/sparc/power/Makefile b/arch/sparc/power/Makefile new file mode 100644 index 000000000000..3201ace0ddbd --- /dev/null +++ b/arch/sparc/power/Makefile @@ -0,0 +1,3 @@ +# Makefile for Sparc-specific hibernate files. + +obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate_asm.o diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c new file mode 100644 index 000000000000..42b0b8ce699a --- /dev/null +++ b/arch/sparc/power/hibernate.c @@ -0,0 +1,42 @@ +/* + * hibernate.c: Hibernaton support specific for sparc64. + * + * Copyright (C) 2013 Kirill V Tkhai ([email protected]) + */ + +#include <linux/mm.h> + +#include <asm/hibernate.h> +#include <asm/visasm.h> +#include <asm/page.h> +#include <asm/tlb.h> + +/* References to section boundaries */ +extern const void __nosave_begin, __nosave_end; + +struct saved_context saved_context; + +/* + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN((unsigned long)&__nosave_begin); + unsigned long nosave_end_pfn = PFN_DOWN((unsigned long)&__nosave_end); + + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +void save_processor_state(void) +{ + save_and_clear_fpu(); +} + +void restore_processor_state(void) +{ + struct mm_struct *mm = current->active_mm; + + load_secondary_context(mm); + tsb_context_switch(mm); +} diff --git a/arch/sparc/power/hibernate_asm.S b/arch/sparc/power/hibernate_asm.S new file mode 100644 index 000000000000..79942166df84 --- /dev/null +++ b/arch/sparc/power/hibernate_asm.S @@ -0,0 +1,131 @@ +/* + * hibernate_asm.S: Hibernaton support specific for sparc64. + * + * Copyright (C) 2013 Kirill V Tkhai ([email protected]) + */ + +#include <linux/linkage.h> + +#include <asm/asm-offsets.h> +#include <asm/cpudata.h> +#include <asm/page.h> + +ENTRY(swsusp_arch_suspend) + save %sp, -128, %sp + save %sp, -128, %sp + flushw + + setuw saved_context, %g3 + + /* Save window regs */ + rdpr %cwp, %g2 + stx %g2, [%g3 + SC_REG_CWP] + rdpr %wstate, %g2 + stx %g2, [%g3 + SC_REG_WSTATE] + stx %fp, [%g3 + SC_REG_FP] + + /* Save state regs */ + rdpr %tick, %g2 + stx %g2, [%g3 + SC_REG_TICK] + rdpr %pstate, %g2 + stx %g2, [%g3 + SC_REG_PSTATE] + + /* Save global regs */ + stx %g4, [%g3 + SC_REG_G4] + stx %g5, [%g3 + SC_REG_G5] + stx %g6, [%g3 + SC_REG_G6] + + call swsusp_save + nop + + mov %o0, %i0 + restore + + mov %o0, %i0 + ret + restore + +ENTRY(swsusp_arch_resume) + /* Write restore_pblist to %l0 */ + sethi %hi(restore_pblist), %l0 + ldx [%l0 + %lo(restore_pblist)], %l0 + + call __flush_tlb_all + nop + + /* Write PAGE_OFFSET to %g7 */ + sethi %uhi(PAGE_OFFSET), %g7 + sllx %g7, 32, %g7 + + setuw (PAGE_SIZE-8), %g3 + + /* Use MMU Bypass */ + rd %asi, %g1 + wr %g0, ASI_PHYS_USE_EC, %asi + + ba fill_itlb + nop + +pbe_loop: + cmp %l0, %g0 + be restore_ctx + sub %l0, %g7, %l0 + + ldxa [%l0 ] %asi, %l1 /* address */ + ldxa [%l0 + 8] %asi, %l2 /* orig_address */ + + /* phys addr */ + sub %l1, %g7, %l1 + sub %l2, %g7, %l2 + + mov %g3, %l3 /* PAGE_SIZE-8 */ +copy_loop: + ldxa [%l1 + %l3] ASI_PHYS_USE_EC, %g2 + stxa %g2, [%l2 + %l3] ASI_PHYS_USE_EC + cmp %l3, %g0 + bne copy_loop + sub %l3, 8, %l3 + + /* next pbe */ + ba pbe_loop + ldxa [%l0 + 16] %asi, %l0 + +restore_ctx: + setuw saved_context, %g3 + + /* Restore window regs */ + wrpr %g0, 0, %canrestore + wrpr %g0, 0, %otherwin + wrpr %g0, 6, %cansave + wrpr %g0, 0, %cleanwin + + ldxa [%g3 + SC_REG_CWP] %asi, %g2 + wrpr %g2, %cwp + ldxa [%g3 + SC_REG_WSTATE] %asi, %g2 + wrpr %g2, %wstate + ldxa [%g3 + SC_REG_FP] %asi, %fp + + /* Restore state regs */ + ldxa [%g3 + SC_REG_PSTATE] %asi, %g2 + wrpr %g2, %pstate + ldxa [%g3 + SC_REG_TICK] %asi, %g2 + wrpr %g2, %tick + + /* Restore global regs */ + ldxa [%g3 + SC_REG_G4] %asi, %g4 + ldxa [%g3 + SC_REG_G5] %asi, %g5 + ldxa [%g3 + SC_REG_G6] %asi, %g6 + + wr %g1, %g0, %asi + + restore + restore + + wrpr %g0, 14, %pil + + retl + mov %g0, %o0 + +fill_itlb: + ba pbe_loop + wrpr %g0, 15, %pil diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 2df313b6a586..c92306809029 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -30,8 +30,8 @@ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); #ifdef CONFIG_PRINTK DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); #endif -#ifdef CONFIG_NO_HZ -DEFINE(UML_CONFIG_NO_HZ, CONFIG_NO_HZ); +#ifdef CONFIG_NO_HZ_COMMON +DEFINE(UML_CONFIG_NO_HZ_COMMON, CONFIG_NO_HZ_COMMON); #endif #ifdef CONFIG_UML_X86 DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index fac388cb464f..e9824d5dd7d5 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -79,7 +79,7 @@ long long os_nsecs(void) return timeval_to_ns(&tv); } -#ifdef UML_CONFIG_NO_HZ +#ifdef UML_CONFIG_NO_HZ_COMMON static int after_sleep_interval(struct timespec *ts) { return 0; diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa0005c69..9bd4ecac72be 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) +#ifdef CONFIG_HAVE_KVM +BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) +#endif + /* * every pentium local APIC has two 'local interrupts', with a * soft-definable vector attached to both interrupts, one of diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 81f04cee5f74..ab0ae1aa6d0a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -12,6 +12,9 @@ typedef struct { unsigned int irq_spurious_count; unsigned int icr_read_retry_count; #endif +#ifdef CONFIG_HAVE_KVM + unsigned int kvm_posted_intr_ipis; +#endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; unsigned int apic_irq_work_irqs; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 10a78c3d3d5a..1da97efad08a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -28,6 +28,7 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void x86_platform_ipi(void); +extern void kvm_posted_intr_ipi(void); extern void error_interrupt(void); extern void irq_work_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index aac5fa62a86c..5702d7e3111d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,11 @@ */ #define X86_PLATFORM_IPI_VECTOR 0xf7 +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#endif + /* * IRQ work vector: */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4979778cc7fb..3741c653767c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -31,7 +31,7 @@ #include <asm/msr-index.h> #include <asm/asm.h> -#define KVM_MAX_VCPUS 254 +#define KVM_MAX_VCPUS 255 #define KVM_SOFT_MAX_VCPUS 160 #define KVM_USER_MEM_SLOTS 125 /* memory slots that are not exposed to userspace */ @@ -43,6 +43,8 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS + #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ @@ -94,9 +96,6 @@ #define ASYNC_PF_PER_VCPU 64 -extern raw_spinlock_t kvm_lock; -extern struct list_head vm_list; - struct kvm_vcpu; struct kvm; struct kvm_async_pf; @@ -230,6 +229,7 @@ struct kvm_mmu_page { #endif int write_flooding_count; + bool mmio_cached; }; struct kvm_pio_request { @@ -345,7 +345,6 @@ struct kvm_vcpu_arch { unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; - int sipi_vector; u64 ia32_misc_enable_msr; bool tpr_access_reporting; @@ -643,7 +642,7 @@ struct kvm_x86_ops { /* Create, but do not attach this VCPU */ struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); - int (*vcpu_reset)(struct kvm_vcpu *vcpu); + void (*vcpu_reset)(struct kvm_vcpu *vcpu); void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); @@ -696,14 +695,16 @@ struct kvm_x86_ops { int (*nmi_allowed)(struct kvm_vcpu *vcpu); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); - void (*enable_nmi_window)(struct kvm_vcpu *vcpu); - void (*enable_irq_window)(struct kvm_vcpu *vcpu); + int (*enable_nmi_window)(struct kvm_vcpu *vcpu); + int (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); int (*vm_has_apicv)(struct kvm *kvm); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); @@ -730,6 +731,7 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); + void (*handle_external_intr)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -767,6 +769,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); @@ -797,6 +800,7 @@ enum emulation_result { #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_RETRY (1 << 3) +#define EMULTYPE_NO_REEXECUTE (1 << 4) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); @@ -807,6 +811,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, } void kvm_enable_efer_bits(u64); +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); @@ -819,6 +824,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); @@ -973,7 +979,6 @@ enum { * Trap the fault and ignore the instruction if that happens. */ asmlinkage void kvm_spurious_fault(void); -extern bool kvm_rebooting; #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ "666: " insn "\n\t" \ @@ -1002,6 +1007,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +void kvm_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); @@ -1027,7 +1033,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); void kvm_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b6fbf860e398..f3e01a2cbaa1 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -65,11 +65,16 @@ #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 @@ -81,6 +86,8 @@ #define VM_EXIT_LOAD_IA32_EFER 0x00200000 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 @@ -89,9 +96,15 @@ #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + /* VMCS Encodings */ enum vmcs_field { VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, GUEST_ES_SELECTOR = 0x00000800, GUEST_CS_SELECTOR = 0x00000802, GUEST_SS_SELECTOR = 0x00000804, @@ -126,6 +139,8 @@ enum vmcs_field { VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, APIC_ACCESS_ADDR = 0x00002014, APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, @@ -136,6 +151,8 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMWRITE_BITMAP = 0x00002028, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, @@ -209,6 +226,7 @@ enum vmcs_field { GUEST_INTERRUPTIBILITY_INFO = 0x00004824, GUEST_ACTIVITY_STATE = 0X00004826, GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, HOST_IA32_SYSENTER_CS = 0x00004c00, CR0_GUEST_HOST_MASK = 0x00006000, CR4_GUEST_HOST_MASK = 0x00006002, diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a65ec29e6ffb..5d9a3033b3d7 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -29,7 +29,6 @@ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_DEVICE_ASSIGNMENT #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI #define __KVM_HAVE_GUEST_DEBUG diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index b5757885d7a4..b3a4866661c5 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -528,6 +528,8 @@ #define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU +/* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 2871fccfee68..d651082c7cf7 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -65,6 +65,7 @@ #define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 @@ -110,7 +111,7 @@ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ - { EXIT_REASON_INVPCID, "INVPCID" } - + { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ffd6050a1de4..f60d41ff9a97 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -128,10 +128,15 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + /* + * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT + * siblings; disable these events because they can corrupt unrelated + * counters. + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index da02e9cc3754..d978353c939b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -310,7 +310,7 @@ void intel_pmu_lbr_read(void) * - in case there is no HW filter * - in case the HW filter has errata or limitations */ -static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) { u64 br_type = event->attr.branch_sample_type; int mask = 0; @@ -318,8 +318,11 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_USER) mask |= X86_BR_USER; - if (br_type & PERF_SAMPLE_BRANCH_KERNEL) + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) { + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; mask |= X86_BR_KERNEL; + } /* we ignore BRANCH_HV here */ @@ -339,6 +342,8 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) * be used by fixup code for some CPU */ event->hw.branch_reg.reg = mask; + + return 0; } /* @@ -386,7 +391,9 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) /* * setup SW LBR filter */ - intel_pmu_setup_sw_lbr_filter(event); + ret = intel_pmu_setup_sw_lbr_filter(event); + if (ret) + return ret; /* * setup HW LBR filter, if any @@ -442,8 +449,18 @@ static int branch_type(unsigned long from, unsigned long to) return X86_BR_NONE; addr = buf; - } else - addr = (void *)from; + } else { + /* + * The LBR logs any address in the IP, even if the IP just + * faulted. This means userspace can control the from address. + * Ensure we don't blindy read any address by validating it is + * a known text address. + */ + if (kernel_text_address(from)) + addr = (void *)from; + else + return X86_BR_NONE; + } /* * decoder needs to know the ABI especially diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index d0f9e5aa2151..52441a2af538 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -3093,7 +3093,7 @@ static void __init uncore_types_exit(struct intel_uncore_type **types) static int __init uncore_type_init(struct intel_uncore_type *type) { struct intel_uncore_pmu *pmus; - struct attribute_group *events_group; + struct attribute_group *attr_group; struct attribute **attrs; int i, j; @@ -3120,19 +3120,19 @@ static int __init uncore_type_init(struct intel_uncore_type *type) while (type->event_descs[i].attr.attr.name) i++; - events_group = kzalloc(sizeof(struct attribute *) * (i + 1) + - sizeof(*events_group), GFP_KERNEL); - if (!events_group) + attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + + sizeof(*attr_group), GFP_KERNEL); + if (!attr_group) goto fail; - attrs = (struct attribute **)(events_group + 1); - events_group->name = "events"; - events_group->attrs = attrs; + attrs = (struct attribute **)(attr_group + 1); + attr_group->name = "events"; + attr_group->attrs = attrs; for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->events_group = events_group; + type->events_group = attr_group; } type->pmu_group = &uncore_pmu_attr_group; @@ -3545,11 +3545,12 @@ static int __init uncore_cpu_init(void) msr_uncores = nhm_msr_uncores; break; case 42: /* Sandy Bridge */ + case 58: /* Ivy Bridge */ if (snb_uncore_cbox.num_boxes > max_cores) snb_uncore_cbox.num_boxes = max_cores; msr_uncores = snb_msr_uncores; break; - case 45: /* Sandy Birdge-EP */ + case 45: /* Sandy Bridge-EP */ if (snbep_uncore_cbox.num_boxes > max_cores) snbep_uncore_cbox.num_boxes = max_cores; msr_uncores = snbep_msr_uncores; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c1d01e6ca790..727208941030 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ apicinterrupt X86_PLATFORM_IPI_VECTOR \ x86_platform_ipi smp_x86_platform_ipi +#ifdef CONFIG_HAVE_KVM +apicinterrupt POSTED_INTR_VECTOR \ + kvm_posted_intr_ipi smp_kvm_posted_intr_ipi +#endif + apicinterrupt THRESHOLD_APIC_VECTOR \ threshold_interrupt smp_threshold_interrupt apicinterrupt THERMAL_APIC_VECTOR \ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e4595f105910..ac0631d8996f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -165,10 +165,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) u64 arch_irq_stat(void) { u64 sum = atomic_read(&irq_err_count); - -#ifdef CONFIG_X86_IO_APIC - sum += atomic_read(&irq_mis_count); -#endif return sum; } @@ -228,6 +224,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs) set_irq_regs(old_regs); } +#ifdef CONFIG_HAVE_KVM +/* + * Handler for POSTED_INTERRUPT_VECTOR. + */ +void smp_kvm_posted_intr_ipi(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + ack_APIC_irq(); + + irq_enter(); + + exit_idle(); + + inc_irq_stat(kvm_posted_intr_ipis); + + irq_exit(); + + set_irq_regs(old_regs); +} +#endif + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7dc4e459c2b3..a2a1fbc594ff 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -172,6 +172,10 @@ static void __init apic_intr_init(void) /* IPI for X86 platform specific use */ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); +#ifdef CONFIG_HAVE_KVM + /* IPI for KVM to deliver posted interrupt */ + alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); +#endif /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 0732f0089a3d..d2c381280e3c 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -160,8 +160,12 @@ int kvm_register_clock(char *txt) { int cpu = smp_processor_id(); int low, high, ret; - struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; + struct pvclock_vcpu_time_info *src; + + if (!hv_clock) + return 0; + src = &hv_clock[cpu].pvti; low = (int)slow_virt_to_phys(src) | 1; high = ((u64)slow_virt_to_phys(src) >> 32); ret = native_write_msr_safe(msr_kvm_system_time, low, high); @@ -276,6 +280,9 @@ int __init kvm_setup_vsyscall_timeinfo(void) struct pvclock_vcpu_time_info *vcpu_time; unsigned int size; + if (!hv_clock) + return 0; + size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); preempt_disable(); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 586f00059805..a47a3e54b964 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -21,14 +21,13 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM depends on HIGH_RES_TIMERS - # for device assignment: - depends on PCI # for TASKSTATS/TASK_DELAY_ACCT: depends on NET select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE select KVM_ASYNC_PF @@ -82,6 +81,17 @@ config KVM_MMU_AUDIT This option adds a R/W kVM module parameter 'mmu_audit', which allows audit KVM MMU at runtime. +config KVM_DEVICE_ASSIGNMENT + bool "KVM legacy PCI device assignment support" + depends on KVM && PCI && IOMMU_API + default y + ---help--- + Provide support for legacy PCI device assignment through KVM. The + kernel now also supports a full featured userspace device driver + framework through VFIO, which supersedes much of this support. + + If unsure, say Y. + # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/vhost/Kconfig diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 04d30401c5cb..d609e1d84048 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,8 +7,9 @@ CFLAGS_vmx.o := -I. kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o eventfd.o \ - assigned-dev.o) -kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) + irqchip.o) +kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ + assigned-dev.o iommu.o) kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a335cc6cde72..8e517bba6a7c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -132,8 +132,9 @@ #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ #define No64 (1<<28) #define PageTable (1 << 29) /* instruction used to write page table */ +#define NotImpl (1 << 30) /* instruction is not implemented */ /* Source 2 operand type */ -#define Src2Shift (30) +#define Src2Shift (31) #define Src2None (OpNone << Src2Shift) #define Src2CL (OpCL << Src2Shift) #define Src2ImmByte (OpImmByte << Src2Shift) @@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, memset(&seg_desc, 0, sizeof seg_desc); - if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) - || ctxt->mode == X86EMUL_MODE_REAL) { - /* set real mode segment descriptor */ + if (ctxt->mode == X86EMUL_MODE_REAL) { + /* set real mode segment descriptor (keep limit etc. for + * unreal mode) */ ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); set_desc_base(&seg_desc, selector << 4); goto load; + } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) { + /* VM86 needs a clean new segment descriptor */ + set_desc_base(&seg_desc, selector << 4); + set_desc_limit(&seg_desc, 0xffff); + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = 3; + goto load; } rpl = selector & 3; @@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ .check_perm = (_p) } -#define N D(0) +#define N D(NotImpl) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } @@ -3713,7 +3723,7 @@ static const struct opcode group5[] = { I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), I(SrcMem | Stack, em_grp45), I(SrcMemFAddr | ImplicitOps, em_grp45), - I(SrcMem | Stack, em_grp45), N, + I(SrcMem | Stack, em_grp45), D(Undefined), }; static const struct opcode group6[] = { @@ -4162,6 +4172,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, break; case OpMem8: ctxt->memop.bytes = 1; + if (ctxt->memop.type == OP_REG) { + ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); + fetch_register_operand(&ctxt->memop); + } goto mem_common; case OpMem16: ctxt->memop.bytes = 2; @@ -4373,7 +4387,7 @@ done_prefixes: ctxt->intercept = opcode.intercept; /* Unrecognised? */ - if (ctxt->d == 0 || (ctxt->d & Undefined)) + if (ctxt->d == 0 || (ctxt->d & NotImpl)) return EMULATION_FAILED; if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) @@ -4511,7 +4525,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) ctxt->mem_read.pos = 0; - if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) { + if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || + (ctxt->d & Undefined)) { rc = emulate_ud(ctxt); goto done; } diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c1d30b2fc9bb..412a5aa0ef94 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work) } spin_unlock(&ps->inject_lock); if (inject) { - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); - kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false); /* * Provides NMI watchdog support via Virtual Wire mode. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f77df1c5de6e..e1adbb4aca75 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap) return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + return apic_test_vector(vector, apic->regs + APIC_ISR) || + apic_test_vector(vector, apic->regs + APIC_IRR); +} + static inline void apic_set_vector(int vec, void *bitmap) { set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } -void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - struct kvm_lapic_irq *irq, - u64 *eoi_exit_bitmap) -{ - struct kvm_lapic **dst; - struct kvm_apic_map *map; - unsigned long bitmap = 1; - int i; - - rcu_read_lock(); - map = rcu_dereference(vcpu->kvm->arch.apic_map); - - if (unlikely(!map)) { - __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); - goto out; - } - - if (irq->dest_mode == 0) { /* physical mode */ - if (irq->delivery_mode == APIC_DM_LOWEST || - irq->dest_id == 0xff) { - __set_bit(irq->vector, - (unsigned long *)eoi_exit_bitmap); - goto out; - } - dst = &map->phys_map[irq->dest_id & 0xff]; - } else { - u32 mda = irq->dest_id << (32 - map->ldr_bits); - - dst = map->logical_map[apic_cluster_id(map, mda)]; - - bitmap = apic_logical_id(map, mda); - } - - for_each_set_bit(i, &bitmap, 16) { - if (!dst[i]) - continue; - if (dst[i]->vcpu == vcpu) { - __set_bit(irq->vector, - (unsigned long *)eoi_exit_bitmap); - break; - } - } - -out: - rcu_read_unlock(); -} - static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -256,7 +217,7 @@ out: if (old) kfree_rcu(old, rcu); - kvm_ioapic_make_eoibitmap_request(kvm); + kvm_vcpu_request_scan_ioapic(kvm); } static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) @@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap) return count; } +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) +{ + u32 i, pir_val; + struct kvm_lapic *apic = vcpu->arch.apic; + + for (i = 0; i <= 7; i++) { + pir_val = xchg(&pir[i], 0); + if (pir_val) + *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; + } +} +EXPORT_SYMBOL_GPL(kvm_apic_update_irr); + static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) { apic->irr_pending = true; @@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result >= 16); @@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) } static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode); + int vector, int level, int trig_mode, + unsigned long *dest_map); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map) { struct kvm_lapic *apic = vcpu->arch.apic; return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, - irq->level, irq->trig_mode); + irq->level, irq->trig_mode, dest_map); } static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) @@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) return result; } +void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + int i; + + for (i = 0; i < 8; i++) + apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); +} + static void apic_update_ppr(struct kvm_lapic *apic) { u32 tpr, isrv, ppr, old_ppr; @@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, } bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r) + struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) { struct kvm_apic_map *map; unsigned long bitmap = 1; @@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { - *r = kvm_apic_set_irq(src->vcpu, irq); + *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); return true; } @@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, continue; if (*r < 0) *r = 0; - *r += kvm_apic_set_irq(dst[i]->vcpu, irq); + *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } ret = true; @@ -681,7 +667,8 @@ out: * Return 1 if successfully added and 0 if discarded. */ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode) + int vector, int level, int trig_mode, + unsigned long *dest_map) { int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; @@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (unlikely(!apic_enabled(apic))) break; - if (trig_mode) { - apic_debug("level trig mode for vector %d", vector); - apic_set_vector(vector, apic->regs + APIC_TMR); - } else - apic_clear_vector(vector, apic->regs + APIC_TMR); + if (dest_map) + __set_bit(vcpu->vcpu_id, dest_map); - result = !apic_test_and_set_irr(vector, apic); - trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector, !result); - if (!result) { - if (trig_mode) - apic_debug("level trig mode repeatedly for " - "vector %d", vector); - break; - } + if (kvm_x86_ops->deliver_posted_interrupt) { + result = 1; + kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); + } else { + result = !apic_test_and_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); + if (!result) { + if (trig_mode) + apic_debug("level trig mode repeatedly " + "for vector %d", vector); + goto out; + } + + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } +out: + trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, + trig_mode, vector, !result); break; case APIC_DM_REMRD: @@ -731,7 +722,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + /* assumes that there are only KVM_APIC_INIT/SIPI */ + apic->pending_events = (1UL << KVM_APIC_INIT); + /* make sure pending_events is visible before sending + * the request */ + smp_wmb(); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } else { @@ -743,13 +738,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug("SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { - result = 1; - vcpu->arch.sipi_vector = vector; - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); - } + result = 1; + apic->sipi_vector = vector; + /* make sure sipi_vector is visible for the receiver */ + smp_wmb(); + set_bit(KVM_APIC_SIPI, &apic->pending_events); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); break; case APIC_DM_EXTINT: @@ -782,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) trigger_mode = IOAPIC_LEVEL_TRIG; else trigger_mode = IOAPIC_EDGE_TRIG; - kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); + kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); } } @@ -848,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, irq.vector); - kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); } static u32 apic_get_tmcct(struct kvm_lapic *apic) @@ -1484,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode); + return __apic_accept_irq(apic, mode, vector, 1, trig_mode, + NULL); } return 0; } @@ -1654,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, apic->highest_isr_cache = -1; kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_rtc_eoi_tracking_restore_one(vcpu); } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) @@ -1860,6 +1857,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) addr, sizeof(u8)); } +void kvm_apic_accept_events(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + unsigned int sipi_vector; + + if (!kvm_vcpu_has_lapic(vcpu)) + return; + + if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { + kvm_lapic_reset(vcpu); + kvm_vcpu_reset(vcpu); + if (kvm_vcpu_is_bsp(apic->vcpu)) + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + else + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + } + if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + /* evaluate pending_events before reading the vector */ + smp_rmb(); + sipi_vector = apic->sipi_vector; + pr_debug("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, sipi_vector); + kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + } +} + void kvm_lapic_init(void) { /* do not patch jump label more than once per second */ diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1676d34ddb4e..c730ac9fe801 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -5,6 +5,9 @@ #include <linux/kvm_host.h> +#define KVM_APIC_INIT 0 +#define KVM_APIC_SIPI 1 + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ @@ -32,6 +35,8 @@ struct kvm_lapic { void *regs; gpa_t vapic_addr; struct page *vapic_page; + unsigned long pending_events; + unsigned int sipi_vector; }; int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_lapic(struct kvm_vcpu *vcpu); @@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); +void kvm_apic_accept_events(struct kvm_vcpu *vcpu); void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); @@ -47,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); void kvm_apic_set_version(struct kvm_vcpu *vcpu); +void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); +void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, + unsigned long *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq, int *r); + struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); @@ -154,8 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) return ldr & map->lid_mask; } -void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - struct kvm_lapic_irq *irq, - u64 *eoi_bitmap); +static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.apic->pending_events; +} + +bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 956ca358108a..004cc87b781c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + access &= ACC_WRITE_MASK | ACC_USER_MASK; + sp->mmio_cached = true; trace_mark_mmio_spte(sptep, gfn, access); mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); } @@ -1502,6 +1505,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte, int direct) { struct kvm_mmu_page *sp; + sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); if (!direct) @@ -1644,16 +1648,14 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); -#define for_each_gfn_sp(kvm, sp, gfn) \ - hlist_for_each_entry(sp, \ - &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ - if ((sp)->gfn != (gfn)) {} else +#define for_each_gfn_sp(_kvm, _sp, _gfn) \ + hlist_for_each_entry(_sp, \ + &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ + if ((_sp)->gfn != (_gfn)) {} else -#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ - hlist_for_each_entry(sp, \ - &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ - if ((sp)->gfn != (gfn) || (sp)->role.direct || \ - (sp)->role.invalid) {} else +#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ + for_each_gfn_sp(_kvm, _sp, _gfn) \ + if ((_sp)->role.direct || (_sp)->role.invalid) {} else /* @sp->gfn should be write-protected at the call site */ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, @@ -2089,7 +2091,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list) { - struct kvm_mmu_page *sp; + struct kvm_mmu_page *sp, *nsp; if (list_empty(invalid_list)) return; @@ -2106,11 +2108,25 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, */ kvm_flush_remote_tlbs(kvm); - do { - sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); + list_for_each_entry_safe(sp, nsp, invalid_list, link) { WARN_ON(!sp->role.invalid || sp->root_count); kvm_mmu_free_page(sp); - } while (!list_empty(invalid_list)); + } +} + +static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, + struct list_head *invalid_list) +{ + struct kvm_mmu_page *sp; + + if (list_empty(&kvm->arch.active_mmu_pages)) + return false; + + sp = list_entry(kvm->arch.active_mmu_pages.prev, + struct kvm_mmu_page, link); + kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); + + return true; } /* @@ -2120,23 +2136,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) { LIST_HEAD(invalid_list); - /* - * If we set the number of mmu pages to be smaller be than the - * number of actived pages , we must to free some mmu pages before we - * change the value - */ spin_lock(&kvm->mmu_lock); if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { - while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && - !list_empty(&kvm->arch.active_mmu_pages)) { - struct kvm_mmu_page *page; + /* Need to free some mmu pages to achieve the goal. */ + while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) + if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list)) + break; - page = container_of(kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); - } kvm_mmu_commit_zap_page(kvm, &invalid_list); goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; } @@ -2794,6 +2802,7 @@ exit: static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, pfn_t *pfn, bool write, bool *writable); +static void make_mmu_pages_available(struct kvm_vcpu *vcpu); static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn_t gfn, bool prefault) @@ -2835,7 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, @@ -2913,7 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL, NULL); ++sp->root_count; @@ -2925,7 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL, @@ -2964,7 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); root = __pa(sp->spt); @@ -2998,7 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) return 1; } spin_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 0, ACC_ALL, NULL); @@ -3304,7 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); r = __direct_map(vcpu, gpa, write, map_writable, @@ -4006,17 +4015,17 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); -void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) +static void make_mmu_pages_available(struct kvm_vcpu *vcpu) { LIST_HEAD(invalid_list); - while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES && - !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { - struct kvm_mmu_page *sp; + if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) + return; + + while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { + if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) + break; - sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); ++vcpu->kvm->stat.mmu_recycled; } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); @@ -4185,17 +4194,22 @@ restart: spin_unlock(&kvm->mmu_lock); } -static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, - struct list_head *invalid_list) +void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) { - struct kvm_mmu_page *page; + struct kvm_mmu_page *sp, *node; + LIST_HEAD(invalid_list); - if (list_empty(&kvm->arch.active_mmu_pages)) - return; + spin_lock(&kvm->mmu_lock); +restart: + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { + if (!sp->mmio_cached) + continue; + if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) + goto restart; + } - page = container_of(kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); - kvm_mmu_prepare_zap_page(kvm, page, invalid_list); + kvm_mmu_commit_zap_page(kvm, &invalid_list); + spin_unlock(&kvm->mmu_lock); } static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) @@ -4232,7 +4246,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list); + prepare_zap_oldest_mmu_page(kvm, &invalid_list); kvm_mmu_commit_zap_page(kvm, &invalid_list); spin_unlock(&kvm->mmu_lock); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 69871080e866..2adcbc2cac6d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -57,14 +57,11 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { - return kvm->arch.n_max_mmu_pages - - kvm->arch.n_used_mmu_pages; -} + if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) + return kvm->arch.n_max_mmu_pages - + kvm->arch.n_used_mmu_pages; -static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) -{ - if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES)) - __kvm_mmu_free_some_pages(vcpu); + return 0; } static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 105dd5bd550e..da20860b457a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -627,7 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); - kvm_mmu_free_some_pages(vcpu); + make_mmu_pages_available(vcpu); if (!force_pt_level) transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cfc258a6bf97..c53e797e7369 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) return 1; } -int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) +int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; + u32 index = msr_info->index; + u64 data = msr_info->data; switch (index) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) } break; case MSR_CORE_PERF_GLOBAL_STATUS: + if (msr_info->host_initiated) { + pmu->global_status = data; + return 0; + } break; /* RO MSR */ case MSR_CORE_PERF_GLOBAL_CTRL: if (pmu->global_ctrl == data) @@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { - pmu->global_status &= ~data; + if (!msr_info->host_initiated) + pmu->global_status &= ~data; pmu->global_ovf_ctrl = data; return 0; } @@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) default: if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || (pmc = get_fixed_pmc(pmu, index))) { - data = (s64)(s32)data; + if (!msr_info->host_initiated) + data = (s64)(s32)data; pmc->counter += data - read_pmc(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7d39d70647e3..a14a6eaf871d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1131,17 +1131,11 @@ static void init_vmcb(struct vcpu_svm *svm) init_seg(&save->gs); save->cs.selector = 0xf000; + save->cs.base = 0xffff0000; /* Executable/Readable Code Segment */ save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; save->cs.limit = 0xffff; - /* - * cs.base should really be 0xffff0000, but vmx can't handle that, so - * be consistent with it. - * - * Replace when we have real mode working for vmx. - */ - save->cs.base = 0xf0000; save->gdtr.limit = 0xffff; save->idtr.limit = 0xffff; @@ -1191,7 +1185,7 @@ static void init_vmcb(struct vcpu_svm *svm) enable_gif(svm); } -static int svm_vcpu_reset(struct kvm_vcpu *vcpu) +static void svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u32 dummy; @@ -1199,16 +1193,8 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) init_vmcb(svm); - if (!kvm_vcpu_is_bsp(vcpu)) { - kvm_rip_write(vcpu, 0); - svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; - svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; - } - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); - - return 0; } static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) @@ -3487,7 +3473,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " + printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " "exit_code 0x%x\n", __func__, svm->vmcb->control.exit_int_info, exit_code); @@ -3591,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr) return; } +static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3641,7 +3632,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) return ret; } -static void enable_irq_window(struct kvm_vcpu *vcpu) +static int enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3655,15 +3646,16 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } + return 0; } -static void enable_nmi_window(struct kvm_vcpu *vcpu) +static int enable_nmi_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) == HF_NMI_MASK) - return; /* IRET will cause a vm exit */ + return 0; /* IRET will cause a vm exit */ /* * Something prevents NMI from been injected. Single step over possible @@ -3672,6 +3664,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); update_db_bp_intercept(vcpu); + return 0; } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4247,6 +4240,11 @@ out: return ret; } +static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +{ + local_irq_enable(); +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -4314,6 +4312,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_isr_update = svm_hwapic_isr_update, + .sync_pir_to_irr = svm_sync_pir_to_irr, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, @@ -4342,6 +4341,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, + .handle_external_intr = svm_handle_external_intr, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 867b81037f96..25a791ed21c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,8 +84,11 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv_reg_vid; +static bool __read_mostly enable_apicv = 1; +module_param(enable_apicv, bool, S_IRUGO); +static bool __read_mostly enable_shadow_vmcs = 1; +module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); /* * If nested=1, nested virtualization is supported, i.e., guests may use * VMX and be a hypervisor for its own guests. If nested=0, guests may not @@ -298,7 +301,8 @@ struct __packed vmcs12 { u32 guest_activity_state; u32 guest_sysenter_cs; u32 host_ia32_sysenter_cs; - u32 padding32[8]; /* room for future expansion */ + u32 vmx_preemption_timer_value; + u32 padding32[7]; /* room for future expansion */ u16 virtual_processor_id; u16 guest_es_selector; u16 guest_cs_selector; @@ -351,6 +355,12 @@ struct nested_vmx { /* The host-usable pointer to the above */ struct page *current_vmcs12_page; struct vmcs12 *current_vmcs12; + struct vmcs *current_shadow_vmcs; + /* + * Indicates if the shadow vmcs must be updated with the + * data hold by vmcs12 + */ + bool sync_shadow_vmcs; /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; @@ -365,6 +375,31 @@ struct nested_vmx { struct page *apic_access_page; }; +#define POSTED_INTR_ON 0 +/* Posted-Interrupt Descriptor */ +struct pi_desc { + u32 pir[8]; /* Posted interrupt requested */ + u32 control; /* bit 0 of control is outstanding notification bit */ + u32 rsvd[7]; +} __aligned(64); + +static bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); +} + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -377,6 +412,7 @@ struct vcpu_vmx { struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; + unsigned long host_idt_base; #ifdef CONFIG_X86_64 u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; @@ -428,6 +464,9 @@ struct vcpu_vmx { bool rdtscp_enabled; + /* Posted interrupt descriptor */ + struct pi_desc pi_desc; + /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; }; @@ -451,6 +490,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ [number##_HIGH] = VMCS12_OFFSET(name)+4 + +static const unsigned long shadow_read_only_fields[] = { + /* + * We do NOT shadow fields that are modified when L0 + * traps and emulates any vmx instruction (e.g. VMPTRLD, + * VMXON...) executed by L1. + * For example, VM_INSTRUCTION_ERROR is read + * by L1 if a vmx instruction fails (part of the error path). + * Note the code assumes this logic. If for some reason + * we start shadowing these fields then we need to + * force a shadow sync when L0 emulates vmx instructions + * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified + * by nested_vmx_failValid) + */ + VM_EXIT_REASON, + VM_EXIT_INTR_INFO, + VM_EXIT_INSTRUCTION_LEN, + IDT_VECTORING_INFO_FIELD, + IDT_VECTORING_ERROR_CODE, + VM_EXIT_INTR_ERROR_CODE, + EXIT_QUALIFICATION, + GUEST_LINEAR_ADDRESS, + GUEST_PHYSICAL_ADDRESS +}; +static const int max_shadow_read_only_fields = + ARRAY_SIZE(shadow_read_only_fields); + +static const unsigned long shadow_read_write_fields[] = { + GUEST_RIP, + GUEST_RSP, + GUEST_CR0, + GUEST_CR3, + GUEST_CR4, + GUEST_INTERRUPTIBILITY_INFO, + GUEST_RFLAGS, + GUEST_CS_SELECTOR, + GUEST_CS_AR_BYTES, + GUEST_CS_LIMIT, + GUEST_CS_BASE, + GUEST_ES_BASE, + CR0_GUEST_HOST_MASK, + CR0_READ_SHADOW, + CR4_READ_SHADOW, + TSC_OFFSET, + EXCEPTION_BITMAP, + CPU_BASED_VM_EXEC_CONTROL, + VM_ENTRY_EXCEPTION_ERROR_CODE, + VM_ENTRY_INTR_INFO_FIELD, + VM_ENTRY_INSTRUCTION_LEN, + VM_ENTRY_EXCEPTION_ERROR_CODE, + HOST_FS_BASE, + HOST_GS_BASE, + HOST_FS_SELECTOR, + HOST_GS_SELECTOR +}; +static const int max_shadow_read_write_fields = + ARRAY_SIZE(shadow_read_write_fields); + static const unsigned short vmcs_field_to_offset_table[] = { FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), FIELD(GUEST_ES_SELECTOR, guest_es_selector), @@ -537,6 +634,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), + FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value), FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), FIELD(CR0_READ_SHADOW, cr0_read_shadow), @@ -624,6 +722,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); +static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); +static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); +static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -640,6 +741,8 @@ static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; static unsigned long *vmx_msr_bitmap_legacy_x2apic; static unsigned long *vmx_msr_bitmap_longmode_x2apic; +static unsigned long *vmx_vmread_bitmap; +static unsigned long *vmx_vmwrite_bitmap; static bool cpu_has_load_ia32_efer; static bool cpu_has_load_perf_global_ctrl; @@ -782,6 +885,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +static inline bool cpu_has_vmx_posted_intr(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; +} + +static inline bool cpu_has_vmx_apicv(void) +{ + return cpu_has_vmx_apic_register_virt() && + cpu_has_vmx_virtual_intr_delivery() && + cpu_has_vmx_posted_intr(); +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -895,6 +1010,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void) SECONDARY_EXEC_WBINVD_EXITING; } +static inline bool cpu_has_vmx_shadow_vmcs(void) +{ + u64 vmx_msr; + rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); + /* check if the cpu supports writing r/o exit information fields */ + if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) + return false; + + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_SHADOW_VMCS; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -1790,7 +1917,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, u32 intr_info = nr | INTR_INFO_VALID_MASK; if (nr == PF_VECTOR && is_guest_mode(vcpu) && - nested_pf_handled(vcpu)) + !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) return; if (has_error_code) { @@ -2022,6 +2149,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; +static u32 nested_vmx_misc_low, nested_vmx_misc_high; static __init void nested_vmx_setup_ctls_msrs(void) { /* @@ -2040,30 +2168,40 @@ static __init void nested_vmx_setup_ctls_msrs(void) */ /* pin-based controls */ + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, + nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); /* * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. */ - nested_vmx_pinbased_ctls_low = 0x16 ; - nested_vmx_pinbased_ctls_high = 0x16 | - PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | - PIN_BASED_VIRTUAL_NMIS; + nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; + nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | + PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | + PIN_BASED_VMX_PREEMPTION_TIMER; + nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; - /* exit controls */ - nested_vmx_exit_ctls_low = 0; + /* + * Exit controls + * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and + * 17 must be 1. + */ + nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ #ifdef CONFIG_X86_64 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; #else nested_vmx_exit_ctls_high = 0; #endif + nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - nested_vmx_entry_ctls_low = 0; + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ + nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high &= VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; + nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, @@ -2080,6 +2218,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | + CPU_BASED_PAUSE_EXITING | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* * We can allow some features even when not supported by the @@ -2094,7 +2233,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); nested_vmx_secondary_ctls_low = 0; nested_vmx_secondary_ctls_high &= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_WBINVD_EXITING; + + /* miscellaneous data */ + rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); + nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | + VMX_MISC_SAVE_EFER_LMA; + nested_vmx_misc_high = 0; } static inline bool vmx_control_verify(u32 control, u32 low, u32 high) @@ -2165,7 +2311,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_entry_ctls_high); break; case MSR_IA32_VMX_MISC: - *pdata = 0; + *pdata = vmx_control_msr(nested_vmx_misc_low, + nested_vmx_misc_high); break; /* * These MSRs specify bits which the guest must keep fixed (on or off) @@ -2529,12 +2676,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmexit_control = 0; u32 _vmentry_control = 0; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS; - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, - &_pin_based_exec_control) < 0) - return -EIO; - min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | @@ -2573,7 +2714,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + SECONDARY_EXEC_SHADOW_VMCS; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -2605,11 +2747,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; + opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | + VM_EXIT_ACK_INTR_ON_EXIT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, + &_pin_based_exec_control) < 0) + return -EIO; + + if (!(_cpu_based_2nd_exec_control & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || + !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) + _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; + min = 0; opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, @@ -2762,6 +2916,8 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_vpid()) enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) + enable_shadow_vmcs = 0; if (!cpu_has_vmx_ept() || !cpu_has_vmx_ept_4levels()) { @@ -2788,14 +2944,16 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; - if (!cpu_has_vmx_apic_register_virt() || - !cpu_has_vmx_virtual_intr_delivery()) - enable_apicv_reg_vid = 0; + if (!cpu_has_vmx_apicv()) + enable_apicv = 0; - if (enable_apicv_reg_vid) + if (enable_apicv) kvm_x86_ops->update_cr8_intercept = NULL; - else + else { kvm_x86_ops->hwapic_irr_update = NULL; + kvm_x86_ops->deliver_posted_interrupt = NULL; + kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; + } if (nested) nested_vmx_setup_ctls_msrs(); @@ -2876,22 +3034,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->cpl = 0; } -static gva_t rmode_tss_base(struct kvm *kvm) -{ - if (!kvm->arch.tss_addr) { - struct kvm_memslots *slots; - struct kvm_memory_slot *slot; - gfn_t base_gfn; - - slots = kvm_memslots(kvm); - slot = id_to_memslot(slots, 0); - base_gfn = slot->base_gfn + slot->npages - 3; - - return base_gfn << PAGE_SHIFT; - } - return kvm->arch.tss_addr; -} - static void fix_rmode_seg(int seg, struct kvm_segment *save) { const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -2942,19 +3084,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) /* * Very old userspace does not call KVM_SET_TSS_ADDR before entering - * vcpu. Call it here with phys address pointing 16M below 4G. + * vcpu. Warn the user that an update is overdue. */ - if (!vcpu->kvm->arch.tss_addr) { + if (!vcpu->kvm->arch.tss_addr) printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " "called before entering vcpu\n"); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - } vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); + vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); @@ -3214,7 +3352,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) */ if (!nested_vmx_allowed(vcpu)) return 1; - } else if (to_vmx(vcpu)->nested.vmxon) + } + if (to_vmx(vcpu)->nested.vmxon && + ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) return 1; vcpu->arch.cr4 = cr4; @@ -3550,7 +3690,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) return true; /* real mode guest state checks */ - if (!is_protmode(vcpu)) { + if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) return false; if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) @@ -3599,7 +3739,7 @@ static int init_rmode_tss(struct kvm *kvm) int r, idx, ret = 0; idx = srcu_read_lock(&kvm->srcu); - fn = rmode_tss_base(kvm) >> PAGE_SHIFT; + fn = kvm->arch.tss_addr >> PAGE_SHIFT; r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -3692,7 +3832,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -3722,7 +3862,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); if (r) goto out; @@ -3869,13 +4009,59 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } +static int vmx_vm_has_apicv(struct kvm *kvm) +{ + return enable_apicv && irqchip_in_kernel(kvm); +} + +/* + * Send interrupt to vcpu via posted interrupt way. + * 1. If target vcpu is running(non-root mode), send posted interrupt + * notification to vcpu and hardware will sync PIR to vIRR atomically. + * 2. If target vcpu isn't running(root mode), kick it to pick up the + * interrupt from PIR in next vmentry. + */ +static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int r; + + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) + return; + + r = pi_test_and_set_on(&vmx->pi_desc); + kvm_make_request(KVM_REQ_EVENT, vcpu); +#ifdef CONFIG_SMP + if (!r && (vcpu->mode == IN_GUEST_MODE)) + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + POSTED_INTR_VECTOR); + else +#endif + kvm_vcpu_kick(vcpu); +} + +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!pi_test_and_clear_on(&vmx->pi_desc)) + return; + + kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); +} + +static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) +{ + return; +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. * Note that host-state that does change is set elsewhere. E.g., host-state * that is set differently for each CPU is set in vmx_vcpu_load(), not here. */ -static void vmx_set_constant_host_state(void) +static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) { u32 low32, high32; unsigned long tmpl; @@ -3903,6 +4089,7 @@ static void vmx_set_constant_host_state(void) native_store_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ + vmx->host_idt_base = dt.address; vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ @@ -3928,6 +4115,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); } +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) +{ + u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; + + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) + pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + return pin_based_exec_ctrl; +} + static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -3945,11 +4141,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } -static int vmx_vm_has_apicv(struct kvm *kvm) -{ - return enable_apicv_reg_vid && irqchip_in_kernel(kvm); -} - static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -3971,6 +4162,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD + (handle_vmptrld). + We can NOT enable shadow_vmcs here because we don't have yet + a current VMCS12 + */ + exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; return exec_control; } @@ -3999,14 +4196,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); + if (enable_shadow_vmcs) { + vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); + vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); + } if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, - vmcs_config.pin_based_exec_ctrl); + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); @@ -4015,13 +4215,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx_secondary_exec_control(vmx)); } - if (enable_apicv_reg_vid) { + if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); vmcs_write64(EOI_EXIT_BITMAP3, 0); vmcs_write16(GUEST_INTR_STATUS, 0); + + vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } if (ple_gap) { @@ -4035,7 +4238,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ - vmx_set_constant_host_state(); + vmx_set_constant_host_state(vmx); #ifdef CONFIG_X86_64 rdmsrl(MSR_FS_BASE, a); vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ @@ -4089,11 +4292,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) return 0; } -static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 msr; - int ret; vmx->rmode.vm86_active = 0; @@ -4109,12 +4311,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); seg_setup(VCPU_SREG_CS); - if (kvm_vcpu_is_bsp(&vmx->vcpu)) - vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - else { - vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); - vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); - } + vmcs_write16(GUEST_CS_SELECTOR, 0xf000); + vmcs_write32(GUEST_CS_BASE, 0xffff0000); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); @@ -4137,10 +4335,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_SYSENTER_EIP, 0); vmcs_writel(GUEST_RFLAGS, 0x02); - if (kvm_vcpu_is_bsp(&vmx->vcpu)) - kvm_rip_write(vcpu, 0xfff0); - else - kvm_rip_write(vcpu, 0); + kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); @@ -4171,23 +4366,20 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); + if (vmx_vm_has_apicv(vcpu->kvm)) + memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); + if (vmx->vpid != 0) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); vmx_set_cr4(&vmx->vcpu, 0); vmx_set_efer(&vmx->vcpu, 0); vmx_fpu_activate(&vmx->vcpu); update_exception_bitmap(&vmx->vcpu); vpid_sync_context(vmx); - - ret = 0; - - return ret; } /* @@ -4200,40 +4392,45 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) PIN_BASED_EXT_INTR_MASK; } -static void enable_irq_window(struct kvm_vcpu *vcpu) +static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) +{ + return get_vmcs12(vcpu)->pin_based_vm_exec_control & + PIN_BASED_NMI_EXITING; +} + +static int enable_irq_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { + + if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) /* * We get here if vmx_interrupt_allowed() said we can't - * inject to L1 now because L2 must run. Ask L2 to exit - * right after entry, so we can inject to L1 more promptly. + * inject to L1 now because L2 must run. The caller will have + * to make L2 exit right after entry, so we can inject to L1 + * more promptly. */ - kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); - return; - } + return -EBUSY; cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + return 0; } -static void enable_nmi_window(struct kvm_vcpu *vcpu) +static int enable_nmi_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; - if (!cpu_has_virtual_nmis()) { - enable_irq_window(vcpu); - return; - } + if (!cpu_has_virtual_nmis()) + return enable_irq_window(vcpu); + + if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) + return enable_irq_window(vcpu); - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { - enable_irq_window(vcpu); - return; - } cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + return 0; } static void vmx_inject_irq(struct kvm_vcpu *vcpu) @@ -4294,16 +4491,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) -{ - if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) - return 0; - - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); -} - static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { if (!cpu_has_virtual_nmis()) @@ -4333,18 +4520,52 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) } } +static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + if (to_vmx(vcpu)->nested.nested_run_pending) + return 0; + if (nested_exit_on_nmi(vcpu)) { + nested_vmx_vmexit(vcpu); + vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; + vmcs12->vm_exit_intr_info = NMI_VECTOR | + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; + /* + * The NMI-triggered VM exit counts as injection: + * clear this one and block further NMIs. + */ + vcpu->arch.nmi_pending = 0; + vmx_set_nmi_mask(vcpu, true); + return 0; + } + } + + if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) + return 0; + + return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI + | GUEST_INTR_STATE_NMI)); +} + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { + if (is_guest_mode(vcpu)) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_run_pending || - (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_VALID_MASK)) + + if (to_vmx(vcpu)->nested.nested_run_pending) return 0; - nested_vmx_vmexit(vcpu); - vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; - vmcs12->vm_exit_intr_info = 0; - /* fall through to normal code, but now in L1, not L2 */ + if (nested_exit_on_intr(vcpu)) { + nested_vmx_vmexit(vcpu); + vmcs12->vm_exit_reason = + EXIT_REASON_EXTERNAL_INTERRUPT; + vmcs12->vm_exit_intr_info = 0; + /* + * fall through to normal code, but now in L1, not L2 + */ + } } return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && @@ -4362,7 +4583,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) .flags = 0, }; - ret = kvm_set_memory_region(kvm, &tss_mem, false); + ret = kvm_set_memory_region(kvm, &tss_mem); if (ret) return ret; kvm->arch.tss_addr = addr; @@ -4603,34 +4824,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { - if (to_vmx(vcpu)->nested.vmxon && - ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) - return 1; - if (is_guest_mode(vcpu)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + /* * We get here when L2 changed cr0 in a way that did not change * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. + * but did change L0 shadowed bits. So we first calculate the + * effective cr0 value that L1 would like to write into the + * hardware. It consists of the L2-owned bits from the new + * value combined with the L1-owned bits from L1's guest_cr0. */ - if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) | - (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits))) + val = (val & ~vmcs12->cr0_guest_host_mask) | + (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); + + /* TODO: will have to take unrestricted guest mode into + * account */ + if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) return 1; - vmcs_writel(CR0_READ_SHADOW, val); + + if (kvm_set_cr0(vcpu, val)) + return 1; + vmcs_writel(CR0_READ_SHADOW, orig_val); return 0; - } else + } else { + if (to_vmx(vcpu)->nested.vmxon && + ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) + return 1; return kvm_set_cr0(vcpu, val); + } } static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) { if (is_guest_mode(vcpu)) { - if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) | - (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits))) + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + + /* analogously to handle_set_cr0 */ + val = (val & ~vmcs12->cr4_guest_host_mask) | + (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); + if (kvm_set_cr4(vcpu, val)) return 1; - vmcs_writel(CR4_READ_SHADOW, val); + vmcs_writel(CR4_READ_SHADOW, orig_val); return 0; } else return kvm_set_cr4(vcpu, val); @@ -5183,7 +5420,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) return 1; - err = emulate_instruction(vcpu, 0); + err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); if (err == EMULATE_DO_MMIO) { ret = 0; @@ -5259,8 +5496,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) } /* Create a new VMCS */ - item = (struct vmcs02_list *) - kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); + item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); @@ -5309,6 +5545,9 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) free_loaded_vmcs(&vmx->vmcs01); } +static void nested_vmx_failValid(struct kvm_vcpu *vcpu, + u32 vm_instruction_error); + /* * Emulate the VMXON instruction. * Currently, we just remember that VMX is active, and do not save or even @@ -5321,6 +5560,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) { struct kvm_segment cs; struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs *shadow_vmcs; /* The Intel VMX Instruction Reference lists a bunch of bits that * are prerequisite to running VMXON, most notably cr4.VMXE must be @@ -5344,6 +5584,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) kvm_inject_gp(vcpu, 0); return 1; } + if (vmx->nested.vmxon) { + nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); + skip_emulated_instruction(vcpu); + return 1; + } + if (enable_shadow_vmcs) { + shadow_vmcs = alloc_vmcs(); + if (!shadow_vmcs) + return -ENOMEM; + /* mark vmcs as shadow */ + shadow_vmcs->revision_id |= (1u << 31); + /* init shadow vmcs */ + vmcs_clear(shadow_vmcs); + vmx->nested.current_shadow_vmcs = shadow_vmcs; + } INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); vmx->nested.vmcs02_num = 0; @@ -5384,6 +5639,25 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) return 1; } +static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) +{ + u32 exec_control; + if (enable_shadow_vmcs) { + if (vmx->nested.current_vmcs12 != NULL) { + /* copy to memory all shadowed fields in case + they were modified */ + copy_shadow_to_vmcs12(vmx); + vmx->nested.sync_shadow_vmcs = false; + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + vmcs_write64(VMCS_LINK_POINTER, -1ull); + } + } + kunmap(vmx->nested.current_vmcs12_page); + nested_release_page(vmx->nested.current_vmcs12_page); +} + /* * Free whatever needs to be freed from vmx->nested when L1 goes down, or * just stops using VMX. @@ -5394,11 +5668,12 @@ static void free_nested(struct vcpu_vmx *vmx) return; vmx->nested.vmxon = false; if (vmx->nested.current_vmptr != -1ull) { - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); + nested_release_vmcs12(vmx); vmx->nested.current_vmptr = -1ull; vmx->nested.current_vmcs12 = NULL; } + if (enable_shadow_vmcs) + free_vmcs(vmx->nested.current_shadow_vmcs); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { nested_release_page(vmx->nested.apic_access_page); @@ -5507,6 +5782,10 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, X86_EFLAGS_SF | X86_EFLAGS_OF)) | X86_EFLAGS_ZF); get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; + /* + * We don't need to force a shadow sync because + * VM_INSTRUCTION_ERROR is not shadowed + */ } /* Emulate the VMCLEAR instruction */ @@ -5539,8 +5818,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) } if (vmptr == vmx->nested.current_vmptr) { - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); + nested_release_vmcs12(vmx); vmx->nested.current_vmptr = -1ull; vmx->nested.current_vmcs12 = NULL; } @@ -5639,6 +5917,111 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, } } + +static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, + unsigned long field, u64 field_value){ + short offset = vmcs_field_to_offset(field); + char *p = ((char *) get_vmcs12(vcpu)) + offset; + if (offset < 0) + return false; + + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U16: + *(u16 *)p = field_value; + return true; + case VMCS_FIELD_TYPE_U32: + *(u32 *)p = field_value; + return true; + case VMCS_FIELD_TYPE_U64: + *(u64 *)p = field_value; + return true; + case VMCS_FIELD_TYPE_NATURAL_WIDTH: + *(natural_width *)p = field_value; + return true; + default: + return false; /* can never happen. */ + } + +} + +static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) +{ + int i; + unsigned long field; + u64 field_value; + struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + unsigned long *fields = (unsigned long *)shadow_read_write_fields; + int num_fields = max_shadow_read_write_fields; + + vmcs_load(shadow_vmcs); + + for (i = 0; i < num_fields; i++) { + field = fields[i]; + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U16: + field_value = vmcs_read16(field); + break; + case VMCS_FIELD_TYPE_U32: + field_value = vmcs_read32(field); + break; + case VMCS_FIELD_TYPE_U64: + field_value = vmcs_read64(field); + break; + case VMCS_FIELD_TYPE_NATURAL_WIDTH: + field_value = vmcs_readl(field); + break; + } + vmcs12_write_any(&vmx->vcpu, field, field_value); + } + + vmcs_clear(shadow_vmcs); + vmcs_load(vmx->loaded_vmcs->vmcs); +} + +static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) +{ + unsigned long *fields[] = { + (unsigned long *)shadow_read_write_fields, + (unsigned long *)shadow_read_only_fields + }; + int num_lists = ARRAY_SIZE(fields); + int max_fields[] = { + max_shadow_read_write_fields, + max_shadow_read_only_fields + }; + int i, q; + unsigned long field; + u64 field_value = 0; + struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + + vmcs_load(shadow_vmcs); + + for (q = 0; q < num_lists; q++) { + for (i = 0; i < max_fields[q]; i++) { + field = fields[q][i]; + vmcs12_read_any(&vmx->vcpu, field, &field_value); + + switch (vmcs_field_type(field)) { + case VMCS_FIELD_TYPE_U16: + vmcs_write16(field, (u16)field_value); + break; + case VMCS_FIELD_TYPE_U32: + vmcs_write32(field, (u32)field_value); + break; + case VMCS_FIELD_TYPE_U64: + vmcs_write64(field, (u64)field_value); + break; + case VMCS_FIELD_TYPE_NATURAL_WIDTH: + vmcs_writel(field, (long)field_value); + break; + } + } + } + + vmcs_clear(shadow_vmcs); + vmcs_load(vmx->loaded_vmcs->vmcs); +} + /* * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was * used before) all generate the same failure when it is missing. @@ -5703,8 +6086,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) gva_t gva; unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - char *p; - short offset; /* The value to write might be 32 or 64 bits, depending on L1's long * mode, and eventually we need to write that into a field of several * possible lengths. The code below first zero-extends the value to 64 @@ -5741,28 +6122,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return 1; } - offset = vmcs_field_to_offset(field); - if (offset < 0) { - nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); - skip_emulated_instruction(vcpu); - return 1; - } - p = ((char *) get_vmcs12(vcpu)) + offset; - - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_U16: - *(u16 *)p = field_value; - break; - case VMCS_FIELD_TYPE_U32: - *(u32 *)p = field_value; - break; - case VMCS_FIELD_TYPE_U64: - *(u64 *)p = field_value; - break; - case VMCS_FIELD_TYPE_NATURAL_WIDTH: - *(natural_width *)p = field_value; - break; - default: + if (!vmcs12_write_any(vcpu, field, field_value)) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); skip_emulated_instruction(vcpu); return 1; @@ -5780,6 +6140,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) gva_t gva; gpa_t vmptr; struct x86_exception e; + u32 exec_control; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -5818,14 +6179,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); return 1; } - if (vmx->nested.current_vmptr != -1ull) { - kunmap(vmx->nested.current_vmcs12_page); - nested_release_page(vmx->nested.current_vmcs12_page); - } + if (vmx->nested.current_vmptr != -1ull) + nested_release_vmcs12(vmx); vmx->nested.current_vmptr = vmptr; vmx->nested.current_vmcs12 = new_vmcs12; vmx->nested.current_vmcs12_page = page; + if (enable_shadow_vmcs) { + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + exec_control |= SECONDARY_EXEC_SHADOW_VMCS; + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + vmcs_write64(VMCS_LINK_POINTER, + __pa(vmx->nested.current_shadow_vmcs)); + vmx->nested.sync_shadow_vmcs = true; + } } nested_vmx_succeed(vcpu); @@ -5908,6 +6275,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + gpa_t bitmap, last_bitmap; + unsigned int port; + int size; + u8 b; + + if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) + return 1; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return 0; + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + last_bitmap = (gpa_t)-1; + b = -1; + + while (size > 0) { + if (port < 0x8000) + bitmap = vmcs12->io_bitmap_a; + else if (port < 0x10000) + bitmap = vmcs12->io_bitmap_b; + else + return 1; + bitmap += (port & 0x7fff) / 8; + + if (last_bitmap != bitmap) + if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) + return 1; + if (b & (1 << (port & 7))) + return 1; + + port++; + size--; + last_bitmap = bitmap; + } + + return 0; +} + /* * Return 1 if we should exit from L2 to L1 to handle an MSR access access, * rather than handle it ourselves in L0. I.e., check whether L1 expressed @@ -5939,7 +6352,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, /* Then read the msr_index'th bit from this bitmap: */ if (msr_index < 1024*8) { unsigned char b; - kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1); + if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) + return 1; return 1 & (b >> (msr_index & 7)); } else return 1; /* let L1 handle the wrong parameter */ @@ -6033,10 +6447,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, */ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) { - u32 exit_reason = vmcs_read32(VM_EXIT_REASON); u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 exit_reason = vmx->exit_reason; if (vmx->nested.nested_run_pending) return 0; @@ -6060,14 +6474,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_TRIPLE_FAULT: return 1; case EXIT_REASON_PENDING_INTERRUPT: + return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); case EXIT_REASON_NMI_WINDOW: - /* - * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit - * (aka Interrupt Window Exiting) only when L1 turned it on, - * so if we got a PENDING_INTERRUPT exit, this must be for L1. - * Same for NMI Window Exiting. - */ - return 1; + return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); case EXIT_REASON_TASK_SWITCH: return 1; case EXIT_REASON_CPUID: @@ -6097,8 +6506,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_DR_ACCESS: return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); case EXIT_REASON_IO_INSTRUCTION: - /* TODO: support IO bitmaps */ - return 1; + return nested_vmx_exit_handled_io(vcpu, vmcs12); case EXIT_REASON_MSR_READ: case EXIT_REASON_MSR_WRITE: return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); @@ -6122,6 +6530,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_EPT_VIOLATION: case EXIT_REASON_EPT_MISCONFIG: return 0; + case EXIT_REASON_PREEMPTION_TIMER: + return vmcs12->pin_based_vm_exec_control & + PIN_BASED_VMX_PREEMPTION_TIMER; case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: @@ -6316,6 +6727,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { + if (!vmx_vm_has_apicv(vcpu->kvm)) + return; + vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); @@ -6346,6 +6760,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) } } +static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +{ + u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + /* + * If external interrupt exists, IF bit is set in rflags/eflags on the + * interrupt stack frame, and interrupt will be enabled on a return + * from interrupt handler. + */ + if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) + == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { + unsigned int vector; + unsigned long entry; + gate_desc *desc; + struct vcpu_vmx *vmx = to_vmx(vcpu); +#ifdef CONFIG_X86_64 + unsigned long tmp; +#endif + + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; + desc = (gate_desc *)vmx->host_idt_base + vector; + entry = gate_offset(*desc); + asm volatile( +#ifdef CONFIG_X86_64 + "mov %%" _ASM_SP ", %[sp]\n\t" + "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" + "push $%c[ss]\n\t" + "push %[sp]\n\t" +#endif + "pushf\n\t" + "orl $0x200, (%%" _ASM_SP ")\n\t" + __ASM_SIZE(push) " $%c[cs]\n\t" + "call *%[entry]\n\t" + : +#ifdef CONFIG_X86_64 + [sp]"=&r"(tmp) +#endif + : + [entry]"r"(entry), + [ss]"i"(__KERNEL_DS), + [cs]"i"(__KERNEL_CS) + ); + } else + local_irq_enable(); +} + static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -6388,7 +6848,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); } -static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, +static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, u32 idt_vectoring_info, int instr_len_field, int error_code_field) @@ -6399,46 +6859,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; - vmx->vcpu.arch.nmi_injected = false; - kvm_clear_exception_queue(&vmx->vcpu); - kvm_clear_interrupt_queue(&vmx->vcpu); + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); if (!idtv_info_valid) return; - kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); + kvm_make_request(KVM_REQ_EVENT, vcpu); vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; switch (type) { case INTR_TYPE_NMI_INTR: - vmx->vcpu.arch.nmi_injected = true; + vcpu->arch.nmi_injected = true; /* * SDM 3: 27.7.1.2 (September 2008) * Clear bit "block by NMI" before VM entry if a NMI * delivery faulted. */ - vmx_set_nmi_mask(&vmx->vcpu, false); + vmx_set_nmi_mask(vcpu, false); break; case INTR_TYPE_SOFT_EXCEPTION: - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(instr_len_field); + vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); /* fall through */ case INTR_TYPE_HARD_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { u32 err = vmcs_read32(error_code_field); - kvm_queue_exception_e(&vmx->vcpu, vector, err); + kvm_queue_exception_e(vcpu, vector, err); } else - kvm_queue_exception(&vmx->vcpu, vector); + kvm_queue_exception(vcpu, vector); break; case INTR_TYPE_SOFT_INTR: - vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(instr_len_field); + vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); /* fall through */ case INTR_TYPE_EXT_INTR: - kvm_queue_interrupt(&vmx->vcpu, vector, - type == INTR_TYPE_SOFT_INTR); + kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); break; default: break; @@ -6447,18 +6904,14 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { - if (is_guest_mode(&vmx->vcpu)) - return; - __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, + __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, VM_EXIT_INSTRUCTION_LEN, IDT_VECTORING_ERROR_CODE); } static void vmx_cancel_injection(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) - return; - __vmx_complete_interrupts(to_vmx(vcpu), + __vmx_complete_interrupts(vcpu, vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), VM_ENTRY_INSTRUCTION_LEN, VM_ENTRY_EXCEPTION_ERROR_CODE); @@ -6489,21 +6942,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long debugctlmsr; - if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_VALID_MASK) { - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - vmcs12->idt_vectoring_info_field); - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmcs12->vm_exit_instruction_len); - if (vmcs12->idt_vectoring_info_field & - VECTORING_INFO_DELIVER_CODE_MASK) - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, - vmcs12->idt_vectoring_error_code); - } - } - /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) vmx->entry_time = ktime_get(); @@ -6513,6 +6951,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vmx->emulation_required) return; + if (vmx->nested.sync_shadow_vmcs) { + copy_vmcs12_to_shadow(vmx); + vmx->nested.sync_shadow_vmcs = false; + } + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) @@ -6662,17 +7105,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info; - if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { - vmcs12->idt_vectoring_error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); - vmcs12->vm_exit_instruction_len = - vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - } - } - vmx->loaded_vmcs->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); @@ -6734,10 +7166,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) put_cpu(); if (err) goto free_vmcs; - if (vm_need_virtualize_apic_accesses(kvm)) + if (vm_need_virtualize_apic_accesses(kvm)) { err = alloc_apic_access_page(kvm); if (err) goto free_vmcs; + } if (enable_ept) { if (!kvm->arch.ept_identity_map_addr) @@ -6931,9 +7364,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->vm_entry_instruction_len); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, vmcs12->guest_interruptibility_info); - vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); - vmcs_writel(GUEST_DR7, vmcs12->guest_dr7); + kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, vmcs12->guest_pending_dbg_exceptions); @@ -6946,6 +7378,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs_config.pin_based_exec_ctrl | vmcs12->pin_based_vm_exec_control)); + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, + vmcs12->vmx_preemption_timer_value); + /* * Whether page-faults are trapped is determined by a combination of * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. @@ -7016,7 +7452,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * Other fields are different per CPU, and will be set later when * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. */ - vmx_set_constant_host_state(); + vmx_set_constant_host_state(vmx); /* * HOST_RSP is normally set correctly in vmx_vcpu_run() just before @@ -7082,7 +7518,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->guest_ia32_efer; - if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) + else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) vcpu->arch.efer |= (EFER_LMA | EFER_LME); else vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); @@ -7121,6 +7557,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; struct loaded_vmcs *vmcs02; + bool ia32e; if (!nested_vmx_check_permission(vcpu) || !nested_vmx_check_vmcs12(vcpu)) @@ -7129,6 +7566,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) skip_emulated_instruction(vcpu); vmcs12 = get_vmcs12(vcpu); + if (enable_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + /* * The nested entry process starts with enforcing various prerequisites * on vmcs12 as required by the Intel SDM, and act appropriately when @@ -7146,6 +7586,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } + if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { + nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + return 1; + } + if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { /*TODO: Also verify bits beyond physical address width are 0*/ @@ -7204,6 +7649,45 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } /* + * If the load IA32_EFER VM-entry control is 1, the following checks + * are performed on the field for the IA32_EFER MSR: + * - Bits reserved in the IA32_EFER MSR must be 0. + * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of + * the IA-32e mode guest VM-exit control. It must also be identical + * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to + * CR0.PG) is 1. + */ + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { + ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; + if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || + ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || + ((vmcs12->guest_cr0 & X86_CR0_PG) && + ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + return 1; + } + } + + /* + * If the load IA32_EFER VM-exit control is 1, bits reserved in the + * IA32_EFER MSR must be 0 in the field for that register. In addition, + * the values of the LMA and LME bits in the field must each be that of + * the host address-space size VM-exit control. + */ + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { + ia32e = (vmcs12->vm_exit_controls & + VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; + if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || + ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || + ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + return 1; + } + } + + /* * We're finally done with prerequisite checking, and can start with * the nested entry. */ @@ -7223,6 +7707,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vcpu->cpu = cpu; put_cpu(); + vmx_segment_cache_clear(vmx); + vmcs12->launch_state = 1; prepare_vmcs02(vcpu, vmcs12); @@ -7273,6 +7759,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vcpu->arch.cr4_guest_owned_bits)); } +static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 idt_vectoring; + unsigned int nr; + + if (vcpu->arch.exception.pending) { + nr = vcpu->arch.exception.nr; + idt_vectoring = nr | VECTORING_INFO_VALID_MASK; + + if (kvm_exception_is_soft(nr)) { + vmcs12->vm_exit_instruction_len = + vcpu->arch.event_exit_inst_len; + idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; + } else + idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; + + if (vcpu->arch.exception.has_error_code) { + idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; + vmcs12->idt_vectoring_error_code = + vcpu->arch.exception.error_code; + } + + vmcs12->idt_vectoring_info_field = idt_vectoring; + } else if (vcpu->arch.nmi_pending) { + vmcs12->idt_vectoring_info_field = + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; + } else if (vcpu->arch.interrupt.pending) { + nr = vcpu->arch.interrupt.nr; + idt_vectoring = nr | VECTORING_INFO_VALID_MASK; + + if (vcpu->arch.interrupt.soft) { + idt_vectoring |= INTR_TYPE_SOFT_INTR; + vmcs12->vm_entry_instruction_len = + vcpu->arch.event_exit_inst_len; + } else + idt_vectoring |= INTR_TYPE_EXT_INTR; + + vmcs12->idt_vectoring_info_field = idt_vectoring; + } +} + /* * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), @@ -7284,7 +7812,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * exit-information fields only. Other fields are modified by L1 with VMWRITE, * which already writes to vmcs12 directly. */ -void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { /* update guest state fields: */ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); @@ -7332,16 +7860,19 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); - vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE); vmcs12->guest_interruptibility_info = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + vmcs12->vm_entry_controls = + (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | + (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); + /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); @@ -7349,21 +7880,38 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) /* update exit information fields: */ - vmcs12->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); + vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - vmcs12->idt_vectoring_info_field = - vmcs_read32(IDT_VECTORING_INFO_FIELD); - vmcs12->idt_vectoring_error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); + if ((vmcs12->vm_exit_intr_info & + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + vmcs12->idt_vectoring_info_field = 0; vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - /* clear vm-entry fields which are to be cleared on exit */ - if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) + if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { + /* vm_entry_intr_info_field is cleared on exit. Emulate this + * instead of reading the real value. */ vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; + + /* + * Transfer the event that L0 or L1 may wanted to inject into + * L2 to IDT_VECTORING_INFO_FIELD. + */ + vmcs12_save_pending_event(vcpu, vmcs12); + } + + /* + * Drop what we picked up for L2 via vmx_complete_interrupts. It is + * preserved above and would only end up incorrectly in L1. + */ + vcpu->arch.nmi_injected = false; + kvm_clear_exception_queue(vcpu); + kvm_clear_interrupt_queue(vcpu); } /* @@ -7375,11 +7923,12 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * Failures During or After Loading Guest State"). * This function should be called when the active VMCS is L1's (vmcs01). */ -void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) { if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; - if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) + else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) vcpu->arch.efer |= (EFER_LMA | EFER_LME); else vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); @@ -7387,6 +7936,7 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); + vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); /* * Note that calling vmx_set_cr0 is important, even if cr0 hasn't * actually changed, because it depends on the current state of @@ -7445,6 +7995,9 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, vmcs12->host_ia32_perf_global_ctrl); + + kvm_set_dr(vcpu, 7, 0x400); + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } /* @@ -7458,6 +8011,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) int cpu; struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + /* trying to cancel vmlaunch/vmresume is a bug */ + WARN_ON_ONCE(vmx->nested.nested_run_pending); + leave_guest_mode(vcpu); prepare_vmcs12(vcpu, vmcs12); @@ -7468,6 +8024,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) vcpu->cpu = cpu; put_cpu(); + vmx_segment_cache_clear(vmx); + /* if no vmcs02 cache requested, remove the one we used */ if (VMCS02_POOL_SIZE == 0) nested_free_vmcs02(vmx, vmx->nested.current_vmptr); @@ -7496,6 +8054,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); } else nested_vmx_succeed(vcpu); + if (enable_shadow_vmcs) + vmx->nested.sync_shadow_vmcs = true; } /* @@ -7513,6 +8073,8 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; vmcs12->exit_qualification = qualification; nested_vmx_succeed(vcpu); + if (enable_shadow_vmcs) + to_vmx(vcpu)->nested.sync_shadow_vmcs = true; } static int vmx_check_intercept(struct kvm_vcpu *vcpu, @@ -7590,6 +8152,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, + .sync_pir_to_irr = vmx_sync_pir_to_irr, + .deliver_posted_interrupt = vmx_deliver_posted_interrupt, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ -7618,6 +8182,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tdp_cr3 = vmx_set_cr3, .check_intercept = vmx_check_intercept, + .handle_external_intr = vmx_handle_external_intr, }; static int __init vmx_init(void) @@ -7656,6 +8221,24 @@ static int __init vmx_init(void) (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_msr_bitmap_longmode_x2apic) goto out4; + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmread_bitmap) + goto out5; + + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_vmwrite_bitmap) + goto out6; + + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); + /* shadowed read/write fields */ + for (i = 0; i < max_shadow_read_write_fields; i++) { + clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap); + clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap); + } + /* shadowed read only fields */ + for (i = 0; i < max_shadow_read_only_fields; i++) + clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap); /* * Allow direct access to the PC debug port (it is often used for I/O @@ -7674,7 +8257,7 @@ static int __init vmx_init(void) r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) - goto out3; + goto out7; #ifdef CONFIG_KEXEC rcu_assign_pointer(crash_vmclear_loaded_vmcss, @@ -7692,7 +8275,7 @@ static int __init vmx_init(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); - if (enable_apicv_reg_vid) { + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); @@ -7722,6 +8305,12 @@ static int __init vmx_init(void) return 0; +out7: + free_page((unsigned long)vmx_vmwrite_bitmap); +out6: + free_page((unsigned long)vmx_vmread_bitmap); +out5: + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); out4: free_page((unsigned long)vmx_msr_bitmap_longmode); out3: @@ -7743,6 +8332,8 @@ static void __exit vmx_exit(void) free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); free_page((unsigned long)vmx_io_bitmap_a); + free_page((unsigned long)vmx_vmwrite_bitmap); + free_page((unsigned long)vmx_vmread_bitmap); #ifdef CONFIG_KEXEC rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1721324c271..05a8b1a2300d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0; static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); -static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); - static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -263,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) } EXPORT_SYMBOL_GPL(kvm_set_apic_base); +asmlinkage void kvm_spurious_fault(void) +{ + /* Fault while not rebooting. We want the trace. */ + BUG(); +} +EXPORT_SYMBOL_GPL(kvm_spurious_fault); + #define EXCPT_BENIGN 0 #define EXCPT_CONTRIBUTORY 1 #define EXCPT_PF 2 @@ -840,23 +845,17 @@ static const u32 emulated_msrs[] = { MSR_IA32_MCG_CTL, }; -static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { - u64 old_efer = vcpu->arch.efer; - if (efer & efer_reserved_bits) - return 1; - - if (is_paging(vcpu) - && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) - return 1; + return false; if (efer & EFER_FFXSR) { struct kvm_cpuid_entry2 *feat; feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) - return 1; + return false; } if (efer & EFER_SVME) { @@ -864,9 +863,24 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) - return 1; + return false; } + return true; +} +EXPORT_SYMBOL_GPL(kvm_valid_efer); + +static int set_efer(struct kvm_vcpu *vcpu, u64 efer) +{ + u64 old_efer = vcpu->arch.efer; + + if (!kvm_valid_efer(vcpu, efer)) + return 1; + + if (is_paging(vcpu) + && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) + return 1; + efer &= ~EFER_LMA; efer |= vcpu->arch.efer & EFER_LMA; @@ -1079,6 +1093,10 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) u32 thresh_lo, thresh_hi; int use_scaling = 0; + /* tsc_khz can be zero if TSC calibration fails */ + if (this_tsc_khz == 0) + return; + /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, &vcpu->arch.virtual_tsc_shift, @@ -1156,20 +1174,23 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; - /* n.b - signed multiplication and division required */ - usdiff = data - kvm->arch.last_tsc_write; + if (vcpu->arch.virtual_tsc_khz) { + /* n.b - signed multiplication and division required */ + usdiff = data - kvm->arch.last_tsc_write; #ifdef CONFIG_X86_64 - usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; + usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; #else - /* do_div() only does unsigned */ - asm("idivl %2; xor %%edx, %%edx" - : "=A"(usdiff) - : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); + /* do_div() only does unsigned */ + asm("idivl %2; xor %%edx, %%edx" + : "=A"(usdiff) + : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); #endif - do_div(elapsed, 1000); - usdiff -= elapsed; - if (usdiff < 0) - usdiff = -usdiff; + do_div(elapsed, 1000); + usdiff -= elapsed; + if (usdiff < 0) + usdiff = -usdiff; + } else + usdiff = USEC_PER_SEC; /* disable TSC match window below */ /* * Special case: TSC write with a small delta (1 second) of virtual @@ -2034,7 +2055,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_P6_EVNTSEL0: case MSR_P6_EVNTSEL1: if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (pr || data != 0) vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " @@ -2080,7 +2101,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) return xen_hvm_config(vcpu, data); if (kvm_pmu_msr(vcpu, msr)) - return kvm_pmu_set_msr(vcpu, msr, data); + return kvm_pmu_set_msr(vcpu, msr_info); if (!ignore_msrs) { vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data); @@ -2479,7 +2500,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_USER_NMI: case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: - case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_IRQFD: case KVM_CAP_IOEVENTFD: case KVM_CAP_PIT2: @@ -2497,10 +2517,12 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: case KVM_CAP_GET_TSC_KHZ: - case KVM_CAP_PCI_2_3: case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_READONLY_MEM: - case KVM_CAP_IRQFD_RESAMPLE: +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT + case KVM_CAP_ASSIGN_DEV_IRQ: + case KVM_CAP_PCI_2_3: +#endif r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2521,9 +2543,11 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PV_MMU: /* obsolete */ r = 0; break; +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_IOMMU: r = iommu_present(&pci_bus_type); break; +#endif case KVM_CAP_MCE: r = KVM_MAX_MCE_BANKS; break; @@ -2679,6 +2703,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { + kvm_x86_ops->sync_pir_to_irr(vcpu); memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); return 0; @@ -2696,7 +2721,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) + if (irq->irq >= KVM_NR_INTERRUPTS) return -EINVAL; if (irqchip_in_kernel(vcpu->kvm)) return -ENXIO; @@ -2819,10 +2844,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); events->nmi.pad = 0; - events->sipi_vector = vcpu->arch.sipi_vector; + events->sipi_vector = 0; /* never valid when reporting to user space */ events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW); memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -2853,8 +2877,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.nmi_pending = events->nmi.pending; kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); - if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) - vcpu->arch.sipi_vector = events->sipi_vector; + if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && + kvm_vcpu_has_lapic(vcpu)) + vcpu->arch.apic->sipi_vector = events->sipi_vector; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -3478,13 +3503,15 @@ out: return r; } -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) { if (!irqchip_in_kernel(kvm)) return -ENXIO; irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event->irq, irq_event->level); + irq_event->irq, irq_event->level, + line_status); return 0; } @@ -4752,11 +4779,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) } static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, - bool write_fault_to_shadow_pgtable) + bool write_fault_to_shadow_pgtable, + int emulation_type) { gpa_t gpa = cr2; pfn_t pfn; + if (emulation_type & EMULTYPE_NO_REEXECUTE) + return false; + if (!vcpu->arch.mmu.direct_map) { /* * Write permission should be allowed since only @@ -4899,8 +4930,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; - if (reexecute_instruction(vcpu, cr2, - write_fault_to_spt)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + emulation_type)) return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; @@ -4930,7 +4961,8 @@ restart: return EMULATE_DONE; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + emulation_type)) return EMULATE_DONE; return handle_emulation_failure(vcpu); @@ -5641,14 +5673,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) #endif } -static void update_eoi_exitmap(struct kvm_vcpu *vcpu) +static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { u64 eoi_exit_bitmap[4]; + u32 tmr[8]; + + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) + return; memset(eoi_exit_bitmap, 0, 32); + memset(tmr, 0, 32); - kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_apic_update_tmr(vcpu, tmr); } static int vcpu_enter_guest(struct kvm_vcpu *vcpu) @@ -5656,7 +5694,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) int r; bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && vcpu->run->request_interrupt_window; - bool req_immediate_exit = 0; + bool req_immediate_exit = false; if (vcpu->requests) { if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) @@ -5698,24 +5736,30 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) record_steal_time(vcpu); if (kvm_check_request(KVM_REQ_NMI, vcpu)) process_nmi(vcpu); - req_immediate_exit = - kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); if (kvm_check_request(KVM_REQ_PMU, vcpu)) kvm_handle_pmu_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_deliver_pmi(vcpu); - if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) - update_eoi_exitmap(vcpu); + if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) + vcpu_scan_ioapic(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { + kvm_apic_accept_events(vcpu); + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + r = 1; + goto out; + } + inject_pending_event(vcpu); /* enable NMI/IRQ window open exits if needed */ if (vcpu->arch.nmi_pending) - kvm_x86_ops->enable_nmi_window(vcpu); + req_immediate_exit = + kvm_x86_ops->enable_nmi_window(vcpu) != 0; else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) - kvm_x86_ops->enable_irq_window(vcpu); + req_immediate_exit = + kvm_x86_ops->enable_irq_window(vcpu) != 0; if (kvm_lapic_enabled(vcpu)) { /* @@ -5794,7 +5838,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - local_irq_enable(); + + /* Interrupt is enabled by handle_external_intr() */ + kvm_x86_ops->handle_external_intr(vcpu); ++vcpu->stat.exits; @@ -5843,16 +5889,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) int r; struct kvm *kvm = vcpu->kvm; - if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { - pr_debug("vcpu %d received sipi with vector # %x\n", - vcpu->vcpu_id, vcpu->arch.sipi_vector); - kvm_lapic_reset(vcpu); - r = kvm_vcpu_reset(vcpu); - if (r) - return r; - vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - } - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); r = vapic_enter(vcpu); if (r) { @@ -5869,8 +5905,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) - { + if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { + kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: vcpu->arch.mp_state = @@ -5878,7 +5914,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) case KVM_MP_STATE_RUNNABLE: vcpu->arch.apf.halted = false; break; - case KVM_MP_STATE_SIPI_RECEIVED: + case KVM_MP_STATE_INIT_RECEIVED: + break; default: r = -EINTR; break; @@ -6013,6 +6050,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); + kvm_apic_accept_events(vcpu); clear_bit(KVM_REQ_UNHALT, &vcpu->requests); r = -EAGAIN; goto out; @@ -6169,6 +6207,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + kvm_apic_accept_events(vcpu); mp_state->mp_state = vcpu->arch.mp_state; return 0; } @@ -6176,7 +6215,15 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu->arch.mp_state = mp_state->mp_state; + if (!kvm_vcpu_has_lapic(vcpu) && + mp_state->mp_state != KVM_MP_STATE_RUNNABLE) + return -EINVAL; + + if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); + } else + vcpu->arch.mp_state = mp_state->mp_state; kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; } @@ -6475,9 +6522,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) r = vcpu_load(vcpu); if (r) return r; - r = kvm_vcpu_reset(vcpu); - if (r == 0) - r = kvm_mmu_setup(vcpu); + kvm_vcpu_reset(vcpu); + r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); return r; @@ -6514,7 +6560,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) +void kvm_vcpu_reset(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; @@ -6541,7 +6587,18 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; - return kvm_x86_ops->vcpu_reset(vcpu); + kvm_x86_ops->vcpu_reset(vcpu); +} + +void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) +{ + struct kvm_segment cs; + + kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); + cs.selector = vector << 8; + cs.base = vector << 12; + kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); + kvm_rip_write(vcpu, 0); } int kvm_arch_hardware_enable(void *garbage) @@ -6706,8 +6763,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; - if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { + r = -ENOMEM; goto fail_free_mce_banks; + } r = fx_init(vcpu); if (r) @@ -6811,6 +6870,23 @@ void kvm_arch_sync_events(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + if (current->mm == kvm->mm) { + /* + * Free memory regions allocated on behalf of userspace, + * unless the the memory map has changed due to process exit + * or fd copying. + */ + struct kvm_userspace_memory_region mem; + memset(&mem, 0, sizeof(mem)); + mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; + kvm_set_memory_region(kvm, &mem); + + mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; + kvm_set_memory_region(kvm, &mem); + + mem.slot = TSS_PRIVATE_MEMSLOT; + kvm_set_memory_region(kvm, &mem); + } kvm_iommu_unmap_guest(kvm); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); @@ -6903,24 +6979,21 @@ out_free: int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc) + enum kvm_mr_change change) { - int npages = memslot->npages; - /* * Only private memory slots need to be mapped here since * KVM_SET_MEMORY_REGION ioctl is no longer supported. */ - if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { + if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { unsigned long userspace_addr; /* * MAP_SHARED to prevent internal slot pages from being moved * by fork()/COW. */ - userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, + userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); @@ -6935,17 +7008,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc) + const struct kvm_memory_slot *old, + enum kvm_mr_change change) { - int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; + int nr_mmu_pages = 0; - if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { + if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { int ret; - ret = vm_munmap(old.userspace_addr, - old.npages * PAGE_SIZE); + ret = vm_munmap(old->userspace_addr, + old->npages * PAGE_SIZE); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " @@ -6962,14 +7035,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * Existing largepage mappings are destroyed here and new ones will * not be created until the end of the logging. */ - if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) + if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) kvm_mmu_slot_remove_write_access(kvm, mem->slot); /* * If memory slot is created, or moved, we need to clear all * mmio sptes. */ - if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { - kvm_mmu_zap_all(kvm); + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + kvm_mmu_zap_mmio_sptes(kvm); kvm_reload_remote_mmus(kvm); } } @@ -6991,7 +7064,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED + || kvm_apic_has_events(vcpu) || atomic_read(&vcpu->arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) && kvm_cpu_has_interrupt(vcpu)); diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 7fabc4c01993..facbf26bc6bb 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -373,6 +373,9 @@ int rsa_extract_mpi(void *context, size_t hdrlen, return 0; } +/* The keyIdentifier in AuthorityKeyIdentifier SEQUENCE is tag(CONT,PRIM,0) */ +#define SEQ_TAG_KEYID (ASN1_CONT << 6) + /* * Process certificate extensions that are used to qualify the certificate. */ @@ -407,21 +410,57 @@ int x509_process_extension(void *context, size_t hdrlen, } if (ctx->last_oid == OID_authorityKeyIdentifier) { + size_t key_len; + /* Get hold of the CA key fingerprint */ if (vlen < 5) return -EBADMSG; - if (v[0] != (ASN1_SEQ | (ASN1_CONS << 5)) || - v[1] != vlen - 2 || - v[2] != (ASN1_CONT << 6) || - v[3] != vlen - 4) + + /* Authority Key Identifier must be a Constructed SEQUENCE */ + if (v[0] != (ASN1_SEQ | (ASN1_CONS << 5))) return -EBADMSG; - v += 4; - vlen -= 4; - f = kmalloc(vlen * 2 + 1, GFP_KERNEL); + /* Authority Key Identifier is not indefinite length */ + if (unlikely(vlen == ASN1_INDEFINITE_LENGTH)) + return -EBADMSG; + + if (vlen < ASN1_INDEFINITE_LENGTH) { + /* Short Form length */ + if (v[1] != vlen - 2 || + v[2] != SEQ_TAG_KEYID || + v[3] > vlen - 4) + return -EBADMSG; + + key_len = v[3]; + v += 4; + } else { + /* Long Form length */ + size_t seq_len = 0; + size_t sub = v[1] - ASN1_INDEFINITE_LENGTH; + + if (sub > 2) + return -EBADMSG; + + /* calculate the length from subsequent octets */ + v += 2; + for (i = 0; i < sub; i++) { + seq_len <<= 8; + seq_len |= v[i]; + } + + if (seq_len != vlen - 2 - sub || + v[sub] != SEQ_TAG_KEYID || + v[sub + 1] > vlen - 4 - sub) + return -EBADMSG; + + key_len = v[sub + 1]; + v += (sub + 2); + } + + f = kmalloc(key_len * 2 + 1, GFP_KERNEL); if (!f) return -ENOMEM; - for (i = 0; i < vlen; i++) + for (i = 0; i < key_len; i++) sprintf(f + i * 2, "%02x", v[i]); pr_debug("authority %s\n", f); ctx->cert->authority = f; diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index 544b4ce617f8..0fae5296e311 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -355,7 +355,7 @@ static const struct file_operations ds1620_proc_therm_fops = { .open = ds1620_proc_therm_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c index ea54a6e3f5ad..e39e7402e623 100644 --- a/drivers/char/efirtc.c +++ b/drivers/char/efirtc.c @@ -369,7 +369,7 @@ static const struct file_operations efi_rtc_proc_fops = { .open = efi_rtc_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int __init diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c index bc9b84d56ee4..4f943759d376 100644 --- a/drivers/char/genrtc.c +++ b/drivers/char/genrtc.c @@ -465,7 +465,7 @@ static const struct file_operations gen_rtc_proc_fops = { .open = gen_rtc_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int __init gen_rtc_proc_init(void) diff --git a/drivers/gpio/gpio-ucb1400.c b/drivers/gpio/gpio-ucb1400.c index 26405efe0f9f..6d0feb234d3c 100644 --- a/drivers/gpio/gpio-ucb1400.c +++ b/drivers/gpio/gpio-ucb1400.c @@ -12,8 +12,6 @@ #include <linux/module.h> #include <linux/ucb1400.h> -struct ucb1400_gpio_data *ucbdata; - static int ucb1400_gpio_dir_in(struct gpio_chip *gc, unsigned off) { struct ucb1400_gpio *gpio; @@ -50,7 +48,7 @@ static int ucb1400_gpio_probe(struct platform_device *dev) struct ucb1400_gpio *ucb = dev->dev.platform_data; int err = 0; - if (!(ucbdata && ucbdata->gpio_offset)) { + if (!(ucb && ucb->gpio_offset)) { err = -EINVAL; goto err; } @@ -58,7 +56,7 @@ static int ucb1400_gpio_probe(struct platform_device *dev) platform_set_drvdata(dev, ucb); ucb->gc.label = "ucb1400_gpio"; - ucb->gc.base = ucbdata->gpio_offset; + ucb->gc.base = ucb->gpio_offset; ucb->gc.ngpio = 10; ucb->gc.owner = THIS_MODULE; @@ -72,8 +70,8 @@ static int ucb1400_gpio_probe(struct platform_device *dev) if (err) goto err; - if (ucbdata && ucbdata->gpio_setup) - err = ucbdata->gpio_setup(&dev->dev, ucb->gc.ngpio); + if (ucb && ucb->gpio_setup) + err = ucb->gpio_setup(&dev->dev, ucb->gc.ngpio); err: return err; @@ -85,8 +83,8 @@ static int ucb1400_gpio_remove(struct platform_device *dev) int err = 0; struct ucb1400_gpio *ucb = platform_get_drvdata(dev); - if (ucbdata && ucbdata->gpio_teardown) { - err = ucbdata->gpio_teardown(&dev->dev, ucb->gc.ngpio); + if (ucb && ucb->gpio_teardown) { + err = ucb->gpio_teardown(&dev->dev, ucb->gc.ngpio); if (err) return err; } @@ -103,11 +101,6 @@ static struct platform_driver ucb1400_gpio_driver = { }, }; -void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data) -{ - ucbdata = data; -} - module_platform_driver(ucb1400_gpio_driver); MODULE_DESCRIPTION("Philips UCB1400 GPIO driver"); diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index ac0500667000..6a195d5e90ff 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -628,4 +628,16 @@ config KEYBOARD_W90P910 To compile this driver as a module, choose M here: the module will be called w90p910_keypad. +config KEYBOARD_CROS_EC + tristate "ChromeOS EC keyboard" + select INPUT_MATRIXKMAP + depends on MFD_CROS_EC + help + Say Y here to enable the matrix keyboard used by ChromeOS devices + and implemented on the ChromeOS EC. You must enable one bus option + (MFD_CROS_EC_I2C or MFD_CROS_EC_SPI) to use this. + + To compile this driver as a module, choose M here: the + module will be called cros_ec_keyb. + endif diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 49b16453d00e..0c43e8cf8d0e 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o obj-$(CONFIG_KEYBOARD_BFIN) += bf54x-keys.o +obj-$(CONFIG_KEYBOARD_CROS_EC) += cros_ec_keyb.o obj-$(CONFIG_KEYBOARD_DAVINCI) += davinci_keyscan.o obj-$(CONFIG_KEYBOARD_EP93XX) += ep93xx_keypad.o obj-$(CONFIG_KEYBOARD_GOLDFISH_EVENTS) += goldfish_events.o diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c new file mode 100644 index 000000000000..49557f27bfa6 --- /dev/null +++ b/drivers/input/keyboard/cros_ec_keyb.c @@ -0,0 +1,334 @@ +/* + * ChromeOS EC keyboard driver + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This driver uses the Chrome OS EC byte-level message-based protocol for + * communicating the keyboard state (which keys are pressed) from a keyboard EC + * to the AP over some bus (such as i2c, lpc, spi). The EC does debouncing, + * but everything else (including deghosting) is done here. The main + * motivation for this is to keep the EC firmware as simple as possible, since + * it cannot be easily upgraded and EC flash/IRAM space is relatively + * expensive. + */ + +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/kernel.h> +#include <linux/notifier.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/input/matrix_keypad.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> + +/* + * @rows: Number of rows in the keypad + * @cols: Number of columns in the keypad + * @row_shift: log2 or number of rows, rounded up + * @keymap_data: Matrix keymap data used to convert to keyscan values + * @ghost_filter: true to enable the matrix key-ghosting filter + * @dev: Device pointer + * @idev: Input device + * @ec: Top level ChromeOS device to use to talk to EC + * @event_notifier: interrupt event notifier for transport devices + */ +struct cros_ec_keyb { + unsigned int rows; + unsigned int cols; + int row_shift; + const struct matrix_keymap_data *keymap_data; + bool ghost_filter; + + struct device *dev; + struct input_dev *idev; + struct cros_ec_device *ec; + struct notifier_block notifier; +}; + + +static bool cros_ec_keyb_row_has_ghosting(struct cros_ec_keyb *ckdev, + uint8_t *buf, int row) +{ + int pressed_in_row = 0; + int row_has_teeth = 0; + int col, mask; + + mask = 1 << row; + for (col = 0; col < ckdev->cols; col++) { + if (buf[col] & mask) { + pressed_in_row++; + row_has_teeth |= buf[col] & ~mask; + if (pressed_in_row > 1 && row_has_teeth) { + /* ghosting */ + dev_dbg(ckdev->dev, + "ghost found at: r%d c%d, pressed %d, teeth 0x%x\n", + row, col, pressed_in_row, + row_has_teeth); + return true; + } + } + } + + return false; +} + +/* + * Returns true when there is at least one combination of pressed keys that + * results in ghosting. + */ +static bool cros_ec_keyb_has_ghosting(struct cros_ec_keyb *ckdev, uint8_t *buf) +{ + int row; + + /* + * Ghosting happens if for any pressed key X there are other keys + * pressed both in the same row and column of X as, for instance, + * in the following diagram: + * + * . . Y . g . + * . . . . . . + * . . . . . . + * . . X . Z . + * + * In this case only X, Y, and Z are pressed, but g appears to be + * pressed too (see Wikipedia). + * + * We can detect ghosting in a single pass (*) over the keyboard state + * by maintaining two arrays. pressed_in_row counts how many pressed + * keys we have found in a row. row_has_teeth is true if any of the + * pressed keys for this row has other pressed keys in its column. If + * at any point of the scan we find that a row has multiple pressed + * keys, and at least one of them is at the intersection with a column + * with multiple pressed keys, we're sure there is ghosting. + * Conversely, if there is ghosting, we will detect such situation for + * at least one key during the pass. + * + * (*) This looks linear in the number of keys, but it's not. We can + * cheat because the number of rows is small. + */ + for (row = 0; row < ckdev->rows; row++) + if (cros_ec_keyb_row_has_ghosting(ckdev, buf, row)) + return true; + + return false; +} + +/* + * Compares the new keyboard state to the old one and produces key + * press/release events accordingly. The keyboard state is 13 bytes (one byte + * per column) + */ +static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev, + uint8_t *kb_state, int len) +{ + struct input_dev *idev = ckdev->idev; + int col, row; + int new_state; + int num_cols; + + num_cols = len; + + if (ckdev->ghost_filter && cros_ec_keyb_has_ghosting(ckdev, kb_state)) { + /* + * Simple-minded solution: ignore this state. The obvious + * improvement is to only ignore changes to keys involved in + * the ghosting, but process the other changes. + */ + dev_dbg(ckdev->dev, "ghosting found\n"); + return; + } + + for (col = 0; col < ckdev->cols; col++) { + for (row = 0; row < ckdev->rows; row++) { + int pos = MATRIX_SCAN_CODE(row, col, ckdev->row_shift); + const unsigned short *keycodes = idev->keycode; + int code; + + code = keycodes[pos]; + new_state = kb_state[col] & (1 << row); + if (!!new_state != test_bit(code, idev->key)) { + dev_dbg(ckdev->dev, + "changed: [r%d c%d]: byte %02x\n", + row, col, new_state); + + input_report_key(idev, code, new_state); + } + } + } + input_sync(ckdev->idev); +} + +static int cros_ec_keyb_open(struct input_dev *dev) +{ + struct cros_ec_keyb *ckdev = input_get_drvdata(dev); + + return blocking_notifier_chain_register(&ckdev->ec->event_notifier, + &ckdev->notifier); +} + +static void cros_ec_keyb_close(struct input_dev *dev) +{ + struct cros_ec_keyb *ckdev = input_get_drvdata(dev); + + blocking_notifier_chain_unregister(&ckdev->ec->event_notifier, + &ckdev->notifier); +} + +static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state) +{ + return ckdev->ec->command_recv(ckdev->ec, EC_CMD_MKBP_STATE, + kb_state, ckdev->cols); +} + +static int cros_ec_keyb_work(struct notifier_block *nb, + unsigned long state, void *_notify) +{ + int ret; + struct cros_ec_keyb *ckdev = container_of(nb, struct cros_ec_keyb, + notifier); + uint8_t kb_state[ckdev->cols]; + + ret = cros_ec_keyb_get_state(ckdev, kb_state); + if (ret >= 0) + cros_ec_keyb_process(ckdev, kb_state, ret); + + return NOTIFY_DONE; +} + +/* Clear any keys in the buffer */ +static void cros_ec_keyb_clear_keyboard(struct cros_ec_keyb *ckdev) +{ + uint8_t old_state[ckdev->cols]; + uint8_t new_state[ckdev->cols]; + unsigned long duration; + int i, ret; + + /* + * Keep reading until we see that the scan state does not change. + * That indicates that we are done. + * + * Assume that the EC keyscan buffer is at most 32 deep. + */ + duration = jiffies; + ret = cros_ec_keyb_get_state(ckdev, new_state); + for (i = 1; !ret && i < 32; i++) { + memcpy(old_state, new_state, sizeof(old_state)); + ret = cros_ec_keyb_get_state(ckdev, new_state); + if (0 == memcmp(old_state, new_state, sizeof(old_state))) + break; + } + duration = jiffies - duration; + dev_info(ckdev->dev, "Discarded %d keyscan(s) in %dus\n", i, + jiffies_to_usecs(duration)); +} + +static int cros_ec_keyb_probe(struct platform_device *pdev) +{ + struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent); + struct device *dev = ec->dev; + struct cros_ec_keyb *ckdev; + struct input_dev *idev; + struct device_node *np; + int err; + + np = pdev->dev.of_node; + if (!np) + return -ENODEV; + + ckdev = devm_kzalloc(&pdev->dev, sizeof(*ckdev), GFP_KERNEL); + if (!ckdev) + return -ENOMEM; + err = matrix_keypad_parse_of_params(&pdev->dev, &ckdev->rows, + &ckdev->cols); + if (err) + return err; + + idev = devm_input_allocate_device(&pdev->dev); + if (!idev) + return -ENOMEM; + + ckdev->ec = ec; + ckdev->notifier.notifier_call = cros_ec_keyb_work; + ckdev->dev = dev; + dev_set_drvdata(&pdev->dev, ckdev); + + idev->name = ec->ec_name; + idev->phys = ec->phys_name; + __set_bit(EV_REP, idev->evbit); + + idev->id.bustype = BUS_VIRTUAL; + idev->id.version = 1; + idev->id.product = 0; + idev->dev.parent = &pdev->dev; + idev->open = cros_ec_keyb_open; + idev->close = cros_ec_keyb_close; + + ckdev->ghost_filter = of_property_read_bool(np, + "google,needs-ghost-filter"); + + err = matrix_keypad_build_keymap(NULL, NULL, ckdev->rows, ckdev->cols, + NULL, idev); + if (err) { + dev_err(dev, "cannot build key matrix\n"); + return err; + } + + ckdev->row_shift = get_count_order(ckdev->cols); + + input_set_capability(idev, EV_MSC, MSC_SCAN); + input_set_drvdata(idev, ckdev); + ckdev->idev = idev; + err = input_register_device(ckdev->idev); + if (err) { + dev_err(dev, "cannot register input device\n"); + return err; + } + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int cros_ec_keyb_resume(struct device *dev) +{ + struct cros_ec_keyb *ckdev = dev_get_drvdata(dev); + + /* + * When the EC is not a wake source, then it could not have caused the + * resume, so we clear the EC's key scan buffer. If the EC was a + * wake source (e.g. the lid is open and the user might press a key to + * wake) then the key scan buffer should be preserved. + */ + if (ckdev->ec->was_wake_device) + cros_ec_keyb_clear_keyboard(ckdev); + + return 0; +} + +#endif + +static SIMPLE_DEV_PM_OPS(cros_ec_keyb_pm_ops, NULL, cros_ec_keyb_resume); + +static struct platform_driver cros_ec_keyb_driver = { + .probe = cros_ec_keyb_probe, + .driver = { + .name = "cros-ec-keyb", + .pm = &cros_ec_keyb_pm_ops, + }, +}; + +module_platform_driver(cros_ec_keyb_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ChromeOS EC keyboard driver"); +MODULE_ALIAS("platform:cros-ec-keyb"); diff --git a/drivers/input/keyboard/lpc32xx-keys.c b/drivers/input/keyboard/lpc32xx-keys.c index 1b8add6cfb9d..42181435fe67 100644 --- a/drivers/input/keyboard/lpc32xx-keys.c +++ b/drivers/input/keyboard/lpc32xx-keys.c @@ -144,12 +144,13 @@ static int lpc32xx_parse_dt(struct device *dev, { struct device_node *np = dev->of_node; u32 rows = 0, columns = 0; + int err; - of_property_read_u32(np, "keypad,num-rows", &rows); - of_property_read_u32(np, "keypad,num-columns", &columns); - if (!rows || rows != columns) { - dev_err(dev, - "rows and columns must be specified and be equal!\n"); + err = matrix_keypad_parse_of_params(dev, &rows, &columns); + if (err) + return err; + if (rows != columns) { + dev_err(dev, "rows and columns must be equal!\n"); return -EINVAL; } diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index e25b022692cd..1b289092f4e3 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -215,18 +215,12 @@ static int omap4_keypad_parse_dt(struct device *dev, struct omap4_keypad *keypad_data) { struct device_node *np = dev->of_node; + int err; - if (!np) { - dev_err(dev, "missing DT data"); - return -EINVAL; - } - - of_property_read_u32(np, "keypad,num-rows", &keypad_data->rows); - of_property_read_u32(np, "keypad,num-columns", &keypad_data->cols); - if (!keypad_data->rows || !keypad_data->cols) { - dev_err(dev, "number of keypad rows/columns not specified\n"); - return -EINVAL; - } + err = matrix_keypad_parse_of_params(dev, &keypad_data->rows, + &keypad_data->cols); + if (err) + return err; if (of_get_property(np, "linux,input-no-autorepeat", NULL)) keypad_data->no_autorepeat = true; diff --git a/drivers/input/keyboard/tca8418_keypad.c b/drivers/input/keyboard/tca8418_keypad.c index a34cc6714e5b..55c15304ddbc 100644 --- a/drivers/input/keyboard/tca8418_keypad.c +++ b/drivers/input/keyboard/tca8418_keypad.c @@ -288,8 +288,11 @@ static int tca8418_keypad_probe(struct i2c_client *client, irq_is_gpio = pdata->irq_is_gpio; } else { struct device_node *np = dev->of_node; - of_property_read_u32(np, "keypad,num-rows", &rows); - of_property_read_u32(np, "keypad,num-columns", &cols); + int err; + + err = matrix_keypad_parse_of_params(dev, &rows, &cols); + if (err) + return err; rep = of_property_read_bool(np, "keypad,autorepeat"); } diff --git a/drivers/input/matrix-keymap.c b/drivers/input/matrix-keymap.c index 3ae496ea5fe6..08b61f506db6 100644 --- a/drivers/input/matrix-keymap.c +++ b/drivers/input/matrix-keymap.c @@ -50,6 +50,26 @@ static bool matrix_keypad_map_key(struct input_dev *input_dev, } #ifdef CONFIG_OF +int matrix_keypad_parse_of_params(struct device *dev, + unsigned int *rows, unsigned int *cols) +{ + struct device_node *np = dev->of_node; + + if (!np) { + dev_err(dev, "missing DT data"); + return -EINVAL; + } + of_property_read_u32(np, "keypad,num-rows", rows); + of_property_read_u32(np, "keypad,num-columns", cols); + if (!*rows || !*cols) { + dev_err(dev, "number of keypad rows/columns not specified\n"); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(matrix_keypad_parse_of_params); + static int matrix_keypad_parse_of_keymap(const char *propname, unsigned int rows, unsigned int cols, struct input_dev *input_dev) diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index 770479df8657..86b822806e95 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -515,7 +515,7 @@ static const struct file_operations hp_sdc_rtc_proc_fops = { .open = hp_sdc_rtc_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int hp_sdc_rtc_ioctl(struct file *file, diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c index 893fc1ba6ead..31ca55548ef9 100644 --- a/drivers/mfd/88pm860x-core.c +++ b/drivers/mfd/88pm860x-core.c @@ -1144,17 +1144,15 @@ static int pm860x_probe(struct i2c_client *client, return -ENOMEM; ret = pm860x_dt_init(node, &client->dev, pdata); if (ret) - goto err; + return ret; } else if (!pdata) { pr_info("No platform data in %s!\n", __func__); return -EINVAL; } chip = kzalloc(sizeof(struct pm860x_chip), GFP_KERNEL); - if (chip == NULL) { - ret = -ENOMEM; - goto err; - } + if (chip == NULL) + return -ENOMEM; chip->id = verify_addr(client); chip->regmap = regmap_init_i2c(client, &pm860x_regmap_config); @@ -1194,10 +1192,6 @@ static int pm860x_probe(struct i2c_client *client, pm860x_device_init(chip, pdata); return 0; -err: - if (node) - devm_kfree(&client->dev, pdata); - return ret; } static int pm860x_remove(struct i2c_client *client) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index ca86581d02ce..d9aed1593e5d 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -10,19 +10,240 @@ config MFD_CORE select IRQ_DOMAIN default n -config MFD_88PM860X - bool "Support Marvell 88PM8606/88PM8607" +config MFD_CS5535 + tristate "AMD CS5535 and CS5536 southbridge core functions" + select MFD_CORE + depends on PCI && X86 + ---help--- + This is the core driver for CS5535/CS5536 MFD functions. This is + necessary for using the board's GPIO and MFGPT functionality. + +config MFD_AS3711 + bool "AMS AS3711" + select MFD_CORE + select REGMAP_I2C + select REGMAP_IRQ depends on I2C=y && GENERIC_HARDIRQS + help + Support for the AS3711 PMIC from AMS + +config PMIC_ADP5520 + bool "Analog Devices ADP5520/01 MFD PMIC Core Support" + depends on I2C=y + help + Say yes here to add support for Analog Devices AD5520 and ADP5501, + Multifunction Power Management IC. This includes + the I2C driver and the core APIs _only_, you have to select + individual components like LCD backlight, LEDs, GPIOs and Kepad + under the corresponding menus. + +config MFD_AAT2870_CORE + bool "AnalogicTech AAT2870" + select MFD_CORE + depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS + help + If you say yes here you get support for the AAT2870. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the + functionality of the device. + +config MFD_CROS_EC + tristate "ChromeOS Embedded Controller" + select MFD_CORE + help + If you say Y here you get support for the ChromeOS Embedded + Controller (EC) providing keyboard, battery and power services. + You also ned to enable the driver for the bus you are using. The + protocol for talking to the EC is defined by the bus driver. + +config MFD_CROS_EC_I2C + tristate "ChromeOS Embedded Controller (I2C)" + depends on MFD_CROS_EC && I2C + + help + If you say Y here, you get support for talking to the ChromeOS + EC through an I2C bus. This uses a simple byte-level protocol with + a checksum. Failing accesses will be retried three times to + improve reliability. + +config MFD_CROS_EC_SPI + tristate "ChromeOS Embedded Controller (SPI)" + depends on MFD_CROS_EC && SPI + + ---help--- + If you say Y here, you get support for talking to the ChromeOS EC + through a SPI bus, using a byte-level protocol. Since the EC's + response time cannot be guaranteed, we support ignoring + 'pre-amble' bytes before the response actually starts. + +config MFD_ASIC3 + bool "Compaq ASIC3" + depends on GENERIC_HARDIRQS && GPIOLIB && ARM + select MFD_CORE + ---help--- + This driver supports the ASIC3 multifunction chip found on many + PDAs (mainly iPAQ and HTC based ones) + +config PMIC_DA903X + bool "Dialog Semiconductor DA9030/DA9034 PMIC Support" + depends on I2C=y + help + Say yes here to support for Dialog Semiconductor DA9030 (a.k.a + ARAVA) and DA9034 (a.k.a MICCO), these are Power Management IC + usually found on PXA processors-based platforms. This includes + the I2C driver and the core APIs _only_, you have to select + individual components like LCD backlight, voltage regulators, + LEDs and battery-charger under the corresponding menus. + +config PMIC_DA9052 + bool + select MFD_CORE + +config MFD_DA9052_SPI + bool "Dialog Semiconductor DA9052/53 PMIC variants with SPI" + select REGMAP_SPI + select REGMAP_IRQ + select PMIC_DA9052 + depends on SPI_MASTER=y && GENERIC_HARDIRQS + help + Support for the Dialog Semiconductor DA9052 PMIC + when controlled using SPI. This driver provides common support + for accessing the device, additional drivers must be enabled in + order to use the functionality of the device. + +config MFD_DA9052_I2C + bool "Dialog Semiconductor DA9052/53 PMIC variants with I2C" select REGMAP_I2C + select REGMAP_IRQ + select PMIC_DA9052 + depends on I2C=y && GENERIC_HARDIRQS + help + Support for the Dialog Semiconductor DA9052 PMIC + when controlled using I2C. This driver provides common support + for accessing the device, additional drivers must be enabled in + order to use the functionality of the device. + +config MFD_DA9055 + bool "Dialog Semiconductor DA9055 PMIC Support" + select REGMAP_I2C + select REGMAP_IRQ select MFD_CORE + depends on I2C=y && GENERIC_HARDIRQS help - This supports for Marvell 88PM8606/88PM8607 Power Management IC. - This includes the I2C driver and the core APIs _only_, you have to - select individual components like voltage regulators, RTC and - battery-charger under the corresponding menus. + Say yes here for support of Dialog Semiconductor DA9055. This is + a Power Management IC. This driver provides common support for + accessing the device as well as the I2C interface to the chip itself. + Additional drivers must be enabled in order to use the functionality + of the device. + + This driver can be built as a module. If built as a module it will be + called "da9055" + +config MFD_MC13783 + tristate + +config MFD_MC13XXX + tristate + depends on (SPI_MASTER || I2C) && GENERIC_HARDIRQS + select MFD_CORE + select MFD_MC13783 + help + Enable support for the Freescale MC13783 and MC13892 PMICs. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the + functionality of the device. + +config MFD_MC13XXX_SPI + tristate "Freescale MC13783 and MC13892 SPI interface" + depends on SPI_MASTER && GENERIC_HARDIRQS + select REGMAP_SPI + select MFD_MC13XXX + help + Select this if your MC13xxx is connected via an SPI bus. + +config MFD_MC13XXX_I2C + tristate "Freescale MC13892 I2C interface" + depends on I2C && GENERIC_HARDIRQS + select REGMAP_I2C + select MFD_MC13XXX + help + Select this if your MC13xxx is connected via an I2C bus. + +config HTC_EGPIO + bool "HTC EGPIO support" + depends on GENERIC_HARDIRQS && GPIOLIB && ARM + help + This driver supports the CPLD egpio chip present on + several HTC phones. It provides basic support for input + pins, output pins, and irqs. + +config HTC_PASIC3 + tristate "HTC PASIC3 LED/DS1WM chip support" + select MFD_CORE + depends on GENERIC_HARDIRQS + help + This core driver provides register access for the LED/DS1WM + chips labeled "AIC2" and "AIC3", found on HTC Blueangel and + HTC Magician devices, respectively. Actual functionality is + handled by the leds-pasic3 and ds1wm drivers. + +config HTC_I2CPLD + bool "HTC I2C PLD chip support" + depends on I2C=y && GPIOLIB + help + If you say yes here you get support for the supposed CPLD + found on omap850 HTC devices like the HTC Wizard and HTC Herald. + This device provides input and output GPIOs through an I2C + interface to one or more sub-chips. + +config LPC_ICH + tristate "Intel ICH LPC" + depends on PCI && GENERIC_HARDIRQS + select MFD_CORE + help + The LPC bridge function of the Intel ICH provides support for + many functional units. This driver provides needed support for + other drivers to control these functions, currently GPIO and + watchdog. + +config LPC_SCH + tristate "Intel SCH LPC" + depends on PCI && GENERIC_HARDIRQS + select MFD_CORE + help + LPC bridge function of the Intel SCH provides support for + System Management Bus and General Purpose I/O. + +config MFD_INTEL_MSIC + bool "Intel MSIC" + depends on INTEL_SCU_IPC + select MFD_CORE + help + Select this option to enable access to Intel MSIC (Avatele + Passage) chip. This chip embeds audio, battery, GPIO, etc. + devices used in Intel Medfield platforms. + +config MFD_JANZ_CMODIO + tristate "Janz CMOD-IO PCI MODULbus Carrier Board" + select MFD_CORE + depends on PCI && GENERIC_HARDIRQS + help + This is the core driver for the Janz CMOD-IO PCI MODULbus + carrier board. This device is a PCI to MODULbus bridge which may + host many different types of MODULbus daughterboards, including + CAN and GPIO controllers. + +config MFD_JZ4740_ADC + bool "Janz JZ4740 ADC core" + select MFD_CORE + select GENERIC_IRQ_CHIP + depends on MACH_JZ4740 + help + Say yes here if you want support for the ADC unit in the JZ4740 SoC. + This driver is necessary for jz4740-battery and jz4740-hwmon driver. config MFD_88PM800 - tristate "Support Marvell 88PM800" + tristate "Marvell 88PM800" depends on I2C=y && GENERIC_HARDIRQS select REGMAP_I2C select REGMAP_IRQ @@ -34,7 +255,7 @@ config MFD_88PM800 battery-charger under the corresponding menus. config MFD_88PM805 - tristate "Support Marvell 88PM805" + tristate "Marvell 88PM805" depends on I2C=y && GENERIC_HARDIRQS select REGMAP_I2C select REGMAP_IRQ @@ -45,8 +266,242 @@ config MFD_88PM805 components like codec device, headset/Mic device under the corresponding menus. +config MFD_88PM860X + bool "Marvell 88PM8606/88PM8607" + depends on I2C=y && GENERIC_HARDIRQS + select REGMAP_I2C + select MFD_CORE + help + This supports for Marvell 88PM8606/88PM8607 Power Management IC. + This includes the I2C driver and the core APIs _only_, you have to + select individual components like voltage regulators, RTC and + battery-charger under the corresponding menus. + +config MFD_MAX77686 + bool "Maxim Semiconductor MAX77686 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + select IRQ_DOMAIN + help + Say yes here to support for Maxim Semiconductor MAX77686. + This is a Power Management IC with RTC on chip. + This driver provides common support for accessing the device; + additional drivers must be enabled in order to use the functionality + of the device. + +config MFD_MAX77693 + bool "Maxim Semiconductor MAX77693 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + help + Say yes here to support for Maxim Semiconductor MAX77693. + This is a companion Power Management IC with Flash, Haptic, Charger, + and MUIC(Micro USB Interface Controller) controls on chip. + This driver provides common support for accessing the device; + additional drivers must be enabled in order to use the functionality + of the device. + +config MFD_MAX8907 + tristate "Maxim Semiconductor MAX8907 PMIC Support" + select MFD_CORE + depends on I2C=y && GENERIC_HARDIRQS + select REGMAP_I2C + select REGMAP_IRQ + help + Say yes here to support for Maxim Semiconductor MAX8907. This is + a Power Management IC. This driver provides common support for + accessing the device; additional drivers must be enabled in order + to use the functionality of the device. + +config MFD_MAX8925 + bool "Maxim Semiconductor MAX8925 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + help + Say yes here to support for Maxim Semiconductor MAX8925. This is + a Power Management IC. This driver provides common support for + accessing the device, additional drivers must be enabled in order + to use the functionality of the device. + +config MFD_MAX8997 + bool "Maxim Semiconductor MAX8997/8966 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select IRQ_DOMAIN + help + Say yes here to support for Maxim Semiconductor MAX8997/8966. + This is a Power Management IC with RTC, Flash, Fuel Gauge, Haptic, + MUIC controls on chip. + This driver provides common support for accessing the device; + additional drivers must be enabled in order to use the functionality + of the device. + +config MFD_MAX8998 + bool "Maxim Semiconductor MAX8998/National LP3974 PMIC Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + help + Say yes here to support for Maxim Semiconductor MAX8998 and + National Semiconductor LP3974. This is a Power Management IC. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the functionality + of the device. + +config EZX_PCAP + bool "Motorola EZXPCAP Support" + depends on GENERIC_HARDIRQS && SPI_MASTER + help + This enables the PCAP ASIC present on EZX Phones. This is + needed for MMC, TouchScreen, Sound, USB, etc.. + +config MFD_VIPERBOARD + tristate "Nano River Technologies Viperboard" + select MFD_CORE + depends on USB && GENERIC_HARDIRQS + default n + help + Say yes here if you want support for Nano River Technologies + Viperboard. + There are mfd cell drivers available for i2c master, adc and + both gpios found on the board. The spi part does not yet + have a driver. + You need to select the mfd cell drivers separately. + The drivers do not support all features the board exposes. + +config MFD_RETU + tristate "Nokia Retu and Tahvo multi-function device" + select MFD_CORE + depends on I2C && GENERIC_HARDIRQS + select REGMAP_IRQ + help + Retu and Tahvo are a multi-function devices found on Nokia + Internet Tablets (770, N800 and N810). + +config MFD_PCF50633 + tristate "NXP PCF50633" + depends on I2C + select REGMAP_I2C + help + Say yes here if you have NXP PCF50633 chip on your board. + This core driver provides register access and IRQ handling + facilities, and registers devices for the various functions + so that function-specific drivers can bind to them. + +config PCF50633_ADC + tristate "NXP PCF50633 ADC" + depends on MFD_PCF50633 + help + Say yes here if you want to include support for ADC in the + NXP PCF50633 chip. + +config PCF50633_GPIO + tristate "NXP PCF50633 GPIO" + depends on MFD_PCF50633 + help + Say yes here if you want to include support GPIO for pins on + the PCF50633 chip. + +config UCB1400_CORE + tristate "Philips UCB1400 Core driver" + depends on AC97_BUS + depends on GPIOLIB + help + This enables support for the Philips UCB1400 core functions. + The UCB1400 is an AC97 audio codec. + + To compile this driver as a module, choose M here: the + module will be called ucb1400_core. + +config MFD_PM8XXX + tristate + +config MFD_PM8921_CORE + tristate "Qualcomm PM8921 PMIC chip" + depends on SSBI && BROKEN + select MFD_CORE + select MFD_PM8XXX + help + If you say yes to this option, support will be included for the + built-in PM8921 PMIC chip. + + This is required if your board has a PM8921 and uses its features, + such as: MPPs, GPIOs, regulators, interrupts, and PWM. + + Say M here if you want to include support for PM8921 chip as a module. + This will build a module called "pm8921-core". + +config MFD_PM8XXX_IRQ + bool "Qualcomm PM8xxx IRQ features" + depends on MFD_PM8XXX + default y if MFD_PM8XXX + help + This is the IRQ driver for Qualcomm PM 8xxx PMIC chips. + + This is required to use certain other PM 8xxx features, such as GPIO + and MPP. + +config MFD_RDC321X + tristate "RDC R-321x southbridge" + select MFD_CORE + depends on PCI && GENERIC_HARDIRQS + help + Say yes here if you want to have support for the RDC R-321x SoC + southbridge which provides access to GPIOs and Watchdog using the + southbridge PCI device configuration space. + +config MFD_RTSX_PCI + tristate "Realtek PCI-E card reader" + depends on PCI && GENERIC_HARDIRQS + select MFD_CORE + help + This supports for Realtek PCI-Express card reader including rts5209, + rts5229, rtl8411, etc. Realtek card reader supports access to many + types of memory cards, such as Memory Stick, Memory Stick Pro, + Secure Digital and MultiMediaCard. + +config MFD_RC5T583 + bool "Ricoh RC5T583 Power Management system device" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + help + Select this option to get support for the RICOH583 Power + Management system device. + This driver provides common support for accessing the device + through i2c interface. The device supports multiple sub-devices + like GPIO, interrupts, RTC, LDO and DCDC regulators, onkey. + Additional drivers must be enabled in order to use the + different functionality of the device. + +config MFD_SEC_CORE + bool "SAMSUNG Electronics PMIC Series Support" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + select REGMAP_IRQ + help + Support for the Samsung Electronics MFD series. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the functionality + of the device + +config MFD_SI476X_CORE + tristate "Silicon Laboratories 4761/64/68 AM/FM radio." + depends on I2C + select MFD_CORE + select REGMAP_I2C + help + This is the core driver for the SI476x series of AM/FM + radio. This MFD driver connects the radio-si476x V4L2 module + and the si476x audio codec. + + To compile this driver as a module, choose M here: the + module will be called si476x-core. + config MFD_SM501 - tristate "Support for Silicon Motion SM501" + tristate "Silicon Motion SM501" ---help--- This is the core driver for the Silicon Motion SM501 multimedia companion chip. This device is a multifunction device which may @@ -63,46 +518,147 @@ config MFD_SM501_GPIO lines on the SM501. The platform data is used to supply the base number for the first GPIO line to register. -config MFD_RTSX_PCI - tristate "Support for Realtek PCI-E card reader" - depends on PCI && GENERIC_HARDIRQS +config MFD_SMSC + bool "SMSC ECE1099 series chips" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + help + If you say yes here you get support for the + ece1099 chips from SMSC. + + To compile this driver as a module, choose M here: the + module will be called smsc. + +config ABX500_CORE + bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions" + default y if ARCH_U300 || ARCH_U8500 + help + Say yes here if you have the ABX500 Mixed Signal IC family + chips. This core driver expose register access functions. + Functionality specific drivers using these functions can + remain unchanged when IC changes. Binding of the functions to + actual register access is done by the IC core driver. + +config AB3100_CORE + bool "ST-Ericsson AB3100 Mixed Signal Circuit core functions" + depends on I2C=y && ABX500_CORE && GENERIC_HARDIRQS select MFD_CORE + default y if ARCH_U300 help - This supports for Realtek PCI-Express card reader including rts5209, - rts5229, rtl8411, etc. Realtek card reader supports access to many - types of memory cards, such as Memory Stick, Memory Stick Pro, - Secure Digital and MultiMediaCard. + Select this to enable the AB3100 Mixed Signal IC core + functionality. This connects to a AB3100 on the I2C bus + and expose a number of symbols needed for dependent devices + to read and write registers and subscribe to events from + this multi-functional IC. This is needed to use other features + of the AB3100 such as battery-backed RTC, charging control, + LEDs, vibrator, system power and temperature, power management + and ALSA sound. -config MFD_ASIC3 - bool "Support for Compaq ASIC3" - depends on GENERIC_HARDIRQS && GPIOLIB && ARM +config AB3100_OTP + tristate "ST-Ericsson AB3100 OTP functions" + depends on AB3100_CORE + default y if AB3100_CORE + help + Select this to enable the AB3100 Mixed Signal IC OTP (one-time + programmable memory) support. This exposes a sysfs file to read + out OTP values. + +config AB8500_CORE + bool "ST-Ericsson AB8500 Mixed Signal Power Management chip" + depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU + select POWER_SUPPLY select MFD_CORE - ---help--- - This driver supports the ASIC3 multifunction chip found on many - PDAs (mainly iPAQ and HTC based ones) + select IRQ_DOMAIN + help + Select this option to enable access to AB8500 power management + chip. This connects to U8500 either on the SSP/SPI bus (deprecated + since hardware version v1.0) or the I2C bus via PRCMU. It also adds + the irq_chip parts for handling the Mixed Signal chip events. + This chip embeds various other multimedia funtionalities as well. -config MFD_DAVINCI_VOICECODEC - tristate +config AB8500_DEBUG + bool "Enable debug info via debugfs" + depends on AB8500_CORE && DEBUG_FS + default y if DEBUG_FS + help + Select this option if you want debug information using the debug + filesystem, debugfs. + +config AB8500_GPADC + bool "ST-Ericsson AB8500 GPADC driver" + depends on AB8500_CORE && REGULATOR_AB8500 + default y + help + AB8500 GPADC driver used to convert Acc and battery/ac/usb voltage + +config MFD_DB8500_PRCMU + bool "ST-Ericsson DB8500 Power Reset Control Management Unit" + depends on UX500_SOC_DB8500 select MFD_CORE + help + Select this option to enable support for the DB8500 Power Reset + and Control Management Unit. This is basically an autonomous + system controller running an XP70 microprocessor, which is accessed + through a register map. -config MFD_DM355EVM_MSP - bool "DaVinci DM355 EVM microcontroller" - depends on I2C=y && MACH_DAVINCI_DM355_EVM +config MFD_STMPE + bool "STMicroelectronics STMPE" + depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS + select MFD_CORE help - This driver supports the MSP430 microcontroller used on these - boards. MSP430 firmware manages resets and power sequencing, - inputs from buttons and the IR remote, LEDs, an RTC, and more. + Support for the STMPE family of I/O Expanders from + STMicroelectronics. -config MFD_TI_SSP - tristate "TI Sequencer Serial Port support" - depends on ARCH_DAVINCI_TNETV107X && GENERIC_HARDIRQS + Currently supported devices are: + + STMPE811: GPIO, Touchscreen + STMPE1601: GPIO, Keypad + STMPE1801: GPIO, Keypad + STMPE2401: GPIO, Keypad + STMPE2403: GPIO, Keypad + + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the functionality + of the device. Currently available sub drivers are: + + GPIO: stmpe-gpio + Keypad: stmpe-keypad + Touchscreen: stmpe-ts + +menu "STMicroelectronics STMPE Interface Drivers" +depends on MFD_STMPE + +config STMPE_I2C + bool "STMicroelectronics STMPE I2C Inteface" + depends on I2C=y + default y + help + This is used to enable I2C interface of STMPE + +config STMPE_SPI + bool "STMicroelectronics STMPE SPI Inteface" + depends on SPI_MASTER + help + This is used to enable SPI interface of STMPE +endmenu + +config MFD_STA2X11 + bool "STMicroelectronics STA2X11" + depends on STA2X11 && GENERIC_HARDIRQS select MFD_CORE - ---help--- - Say Y here if you want support for the Sequencer Serial Port - in a Texas Instruments TNETV107X SoC. + select REGMAP_MMIO - To compile this driver as a module, choose M here: the - module will be called ti-ssp. +config MFD_SYSCON + bool "System Controller Register R/W Based on Regmap" + select REGMAP_MMIO + help + Select this option to enable accessing system control registers + via regmap. + +config MFD_DAVINCI_VOICECODEC + tristate + select MFD_CORE config MFD_TI_AM335X_TSCADC tristate "TI ADC / Touch Screen chip support" @@ -116,60 +672,56 @@ config MFD_TI_AM335X_TSCADC To compile this driver as a module, choose M here: the module will be called ti_am335x_tscadc. -config HTC_EGPIO - bool "HTC EGPIO support" - depends on GENERIC_HARDIRQS && GPIOLIB && ARM +config MFD_DM355EVM_MSP + bool "TI DaVinci DM355 EVM microcontroller" + depends on I2C=y && MACH_DAVINCI_DM355_EVM help - This driver supports the CPLD egpio chip present on - several HTC phones. It provides basic support for input - pins, output pins, and irqs. + This driver supports the MSP430 microcontroller used on these + boards. MSP430 firmware manages resets and power sequencing, + inputs from buttons and the IR remote, LEDs, an RTC, and more. -config HTC_PASIC3 - tristate "HTC PASIC3 LED/DS1WM chip support" +config MFD_LP8788 + bool "TI LP8788 Power Management Unit Driver" + depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE - depends on GENERIC_HARDIRQS - help - This core driver provides register access for the LED/DS1WM - chips labeled "AIC2" and "AIC3", found on HTC Blueangel and - HTC Magician devices, respectively. Actual functionality is - handled by the leds-pasic3 and ds1wm drivers. - -config HTC_I2CPLD - bool "HTC I2C PLD chip support" - depends on I2C=y && GPIOLIB + select REGMAP_I2C + select IRQ_DOMAIN help - If you say yes here you get support for the supposed CPLD - found on omap850 HTC devices like the HTC Wizard and HTC Herald. - This device provides input and output GPIOs through an I2C - interface to one or more sub-chips. + TI LP8788 PMU supports regulators, battery charger, RTC, + ADC, backlight driver and current sinks. -config UCB1400_CORE - tristate "Philips UCB1400 Core driver" - depends on AC97_BUS - depends on GPIOLIB +config MFD_OMAP_USB_HOST + bool "TI OMAP USBHS core and TLL driver" + depends on USB_EHCI_HCD_OMAP || USB_OHCI_HCD_OMAP3 + default y help - This enables support for the Philips UCB1400 core functions. - The UCB1400 is an AC97 audio codec. - - To compile this driver as a module, choose M here: the - module will be called ucb1400_core. + This is the core driver for the OAMP EHCI and OHCI drivers. + This MFD driver does the required setup functionalities for + OMAP USB Host drivers. -config MFD_LM3533 - tristate "LM3533 Lighting Power chip" - depends on I2C +config MFD_PALMAS + bool "TI Palmas series chips" select MFD_CORE select REGMAP_I2C - depends on GENERIC_HARDIRQS + select REGMAP_IRQ + depends on I2C=y && GENERIC_HARDIRQS help - Say yes here to enable support for National Semiconductor / TI - LM3533 Lighting Power chips. + If you say yes here you get support for the Palmas + series of PMIC chips from Texas Instruments. - This driver provides common support for accessing the device; - additional drivers must be enabled in order to use the LED, - backlight or ambient-light-sensor functionality of the device. +config MFD_TI_SSP + tristate "TI Sequencer Serial Port support" + depends on ARCH_DAVINCI_TNETV107X && GENERIC_HARDIRQS + select MFD_CORE + ---help--- + Say Y here if you want support for the Sequencer Serial Port + in a Texas Instruments TNETV107X SoC. + + To compile this driver as a module, choose M here: the + module will be called ti-ssp. config TPS6105X - tristate "TPS61050/61052 Boost Converters" + tristate "TI TPS61050/61052 Boost Converters" depends on I2C select REGULATOR select MFD_CORE @@ -182,7 +734,7 @@ config TPS6105X also contains a GPIO pin. config TPS65010 - tristate "TPS6501x Power Management chips" + tristate "TI TPS6501x Power Management chips" depends on I2C && GPIOLIB default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK help @@ -195,7 +747,7 @@ config TPS65010 will be called tps65010. config TPS6507X - tristate "TPS6507x Power Management / Touch Screen chips" + tristate "TI TPS6507x Power Management / Touch Screen chips" select MFD_CORE depends on I2C && GENERIC_HARDIRQS help @@ -206,8 +758,24 @@ config TPS6507X This driver can also be built as a module. If so, the module will be called tps6507x. +config TPS65911_COMPARATOR + tristate + +config MFD_TPS65090 + bool "TI TPS65090 Power Management chips" + depends on I2C=y && GENERIC_HARDIRQS + select MFD_CORE + select REGMAP_I2C + select REGMAP_IRQ + help + If you say yes here you get support for the TPS65090 series of + Power Management chips. + This driver provides common support for accessing the device, + additional drivers must be enabled in order to use the + functionality of the device. + config MFD_TPS65217 - tristate "TPS65217 Power Management / White LED chips" + tristate "TI TPS65217 Power Management / White LED chips" depends on I2C && GENERIC_HARDIRQS select MFD_CORE select REGMAP_I2C @@ -222,7 +790,7 @@ config MFD_TPS65217 will be called tps65217. config MFD_TPS6586X - bool "TPS6586x Power Management chips" + bool "TI TPS6586x Power Management chips" depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE select REGMAP_I2C @@ -237,7 +805,7 @@ config MFD_TPS6586X will be called tps6586x. config MFD_TPS65910 - bool "TPS65910 Power Management chip" + bool "TI TPS65910 Power Management chip" depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS select MFD_CORE select REGMAP_I2C @@ -248,11 +816,14 @@ config MFD_TPS65910 Power Management chips. config MFD_TPS65912 - bool + bool "TI TPS65912 Power Management chip" depends on GPIOLIB + help + If you say yes here you get support for the TPS65912 series of + PM chips. config MFD_TPS65912_I2C - bool "TPS65912 Power Management chip with I2C" + bool "TI TPS65912 Power Management chip with I2C" select MFD_CORE select MFD_TPS65912 depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS @@ -261,7 +832,7 @@ config MFD_TPS65912_I2C PM chips with I2C interface. config MFD_TPS65912_SPI - bool "TPS65912 Power Management chip with SPI" + bool "TI TPS65912 Power Management chip with SPI" select MFD_CORE select MFD_TPS65912 depends on SPI_MASTER && GPIOLIB && GENERIC_HARDIRQS @@ -283,18 +854,8 @@ config MFD_TPS80031 ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with Power Path from USB, 32K clock generator. -config MENELAUS - bool "Texas Instruments TWL92330/Menelaus PM chip" - depends on I2C=y && ARCH_OMAP2 - help - If you say yes here you get support for the Texas Instruments - TWL92330/Menelaus Power Management chip. This include voltage - regulators, Dual slot memory card transceivers, real-time clock - and other features that are often used in portable devices like - cell phones and PDAs. - config TWL4030_CORE - bool "Texas Instruments TWL4030/TWL5030/TWL6030/TPS659x0 Support" + bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 Support" depends on I2C=y && GENERIC_HARDIRQS select IRQ_DOMAIN select REGMAP_I2C @@ -310,7 +871,7 @@ config TWL4030_CORE versions) and many other features. config TWL4030_MADC - tristate "Texas Instruments TWL4030 MADC" + tristate "TI TWL4030 MADC" depends on TWL4030_CORE help This driver provides support for triton TWL4030-MADC. The @@ -320,7 +881,7 @@ config TWL4030_MADC named twl4030-madc config TWL4030_POWER - bool "Support power resources on TWL4030 family chips" + bool "TI TWL4030 power resources" depends on TWL4030_CORE && ARM help Say yes here if you want to use the power resources on the @@ -333,13 +894,13 @@ config TWL4030_POWER or reset when a sleep, wakeup or warm reset event occurs. config MFD_TWL4030_AUDIO - bool + bool "TI TWL4030 Audio" depends on TWL4030_CORE && GENERIC_HARDIRQS select MFD_CORE default n config TWL6040_CORE - bool "Support for TWL6040 audio codec" + bool "TI TWL6040 audio codec" depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE select REGMAP_I2C @@ -352,48 +913,53 @@ config TWL6040_CORE additional drivers must be enabled in order to use the functionality of the device (audio, vibra). -config MFD_STMPE - bool "Support STMicroelectronics STMPE" - depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS - select MFD_CORE +config MENELAUS + bool "TI TWL92330/Menelaus PM chip" + depends on I2C=y && ARCH_OMAP2 help - Support for the STMPE family of I/O Expanders from - STMicroelectronics. - - Currently supported devices are: - - STMPE811: GPIO, Touchscreen - STMPE1601: GPIO, Keypad - STMPE2401: GPIO, Keypad - STMPE2403: GPIO, Keypad + If you say yes here you get support for the Texas Instruments + TWL92330/Menelaus Power Management chip. This include voltage + regulators, Dual slot memory card transceivers, real-time clock + and other features that are often used in portable devices like + cell phones and PDAs. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the functionality - of the device. Currently available sub drivers are: +config MFD_WL1273_CORE + tristate "TI WL1273 FM radio" + depends on I2C && GENERIC_HARDIRQS + select MFD_CORE + default n + help + This is the core driver for the TI WL1273 FM radio. This MFD + driver connects the radio-wl1273 V4L2 module and the wl1273 + audio codec. - GPIO: stmpe-gpio - Keypad: stmpe-keypad - Touchscreen: stmpe-ts +config MFD_LM3533 + tristate "TI/National Semiconductor LM3533 Lighting Power chip" + depends on I2C + select MFD_CORE + select REGMAP_I2C + depends on GENERIC_HARDIRQS + help + Say yes here to enable support for National Semiconductor / TI + LM3533 Lighting Power chips. -menu "STMPE Interface Drivers" -depends on MFD_STMPE + This driver provides common support for accessing the device; + additional drivers must be enabled in order to use the LED, + backlight or ambient-light-sensor functionality of the device. -config STMPE_I2C - bool "STMPE I2C Inteface" - depends on I2C=y - default y - help - This is used to enable I2C interface of STMPE +config MFD_TIMBERDALE + tristate "Timberdale FPGA" + select MFD_CORE + depends on PCI && GPIOLIB + ---help--- + This is the core driver for the timberdale FPGA. This device is a + multifunction device which exposes numerous platform devices. -config STMPE_SPI - bool "STMPE SPI Inteface" - depends on SPI_MASTER - help - This is used to enable SPI interface of STMPE -endmenu + The timberdale FPGA can be found on the Intel Atom development board + for in-vehicle infontainment, called Russellville. config MFD_TC3589X - bool "Support Toshiba TC35892 and variants" + bool "Toshiba TC35892 and variants" depends on I2C=y && GENERIC_HARDIRQS select MFD_CORE help @@ -408,27 +974,15 @@ config MFD_TMIO default n config MFD_T7L66XB - bool "Support Toshiba T7L66XB" + bool "Toshiba T7L66XB" depends on ARM && HAVE_CLK && GENERIC_HARDIRQS select MFD_CORE select MFD_TMIO help Support for Toshiba Mobile IO Controller T7L66XB -config MFD_SMSC - bool "Support for the SMSC ECE1099 series chips" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - help - If you say yes here you get support for the - ece1099 chips from SMSC. - - To compile this driver as a module, choose M here: the - module will be called smsc. - config MFD_TC6387XB - bool "Support Toshiba TC6387XB" + bool "Toshiba TC6387XB" depends on ARM && HAVE_CLK select MFD_CORE select MFD_TMIO @@ -436,7 +990,7 @@ config MFD_TC6387XB Support for Toshiba Mobile IO Controller TC6387XB config MFD_TC6393XB - bool "Support Toshiba TC6393XB" + bool "Toshiba TC6393XB" depends on ARM && HAVE_CLK select GPIOLIB select MFD_CORE @@ -444,165 +998,14 @@ config MFD_TC6393XB help Support for Toshiba Mobile IO Controller TC6393XB -config PMIC_DA903X - bool "Dialog Semiconductor DA9030/DA9034 PMIC Support" - depends on I2C=y - help - Say yes here to support for Dialog Semiconductor DA9030 (a.k.a - ARAVA) and DA9034 (a.k.a MICCO), these are Power Management IC - usually found on PXA processors-based platforms. This includes - the I2C driver and the core APIs _only_, you have to select - individual components like LCD backlight, voltage regulators, - LEDs and battery-charger under the corresponding menus. - -config PMIC_DA9052 - bool - select MFD_CORE - -config MFD_DA9052_SPI - bool "Support Dialog Semiconductor DA9052/53 PMIC variants with SPI" - select REGMAP_SPI - select REGMAP_IRQ - select PMIC_DA9052 - depends on SPI_MASTER=y && GENERIC_HARDIRQS - help - Support for the Dialog Semiconductor DA9052 PMIC - when controlled using SPI. This driver provides common support - for accessing the device, additional drivers must be enabled in - order to use the functionality of the device. - -config MFD_DA9052_I2C - bool "Support Dialog Semiconductor DA9052/53 PMIC variants with I2C" - select REGMAP_I2C - select REGMAP_IRQ - select PMIC_DA9052 - depends on I2C=y && GENERIC_HARDIRQS - help - Support for the Dialog Semiconductor DA9052 PMIC - when controlled using I2C. This driver provides common support - for accessing the device, additional drivers must be enabled in - order to use the functionality of the device. - -config MFD_DA9055 - bool "Dialog Semiconductor DA9055 PMIC Support" - select REGMAP_I2C - select REGMAP_IRQ - select PMIC_DA9055 - select MFD_CORE - depends on I2C=y && GENERIC_HARDIRQS - help - Say yes here for support of Dialog Semiconductor DA9055. This is - a Power Management IC. This driver provides common support for - accessing the device as well as the I2C interface to the chip itself. - Additional drivers must be enabled in order to use the functionality - of the device. - - This driver can be built as a module. If built as a module it will be - called "da9055" - -config PMIC_ADP5520 - bool "Analog Devices ADP5520/01 MFD PMIC Core Support" - depends on I2C=y - help - Say yes here to add support for Analog Devices AD5520 and ADP5501, - Multifunction Power Management IC. This includes - the I2C driver and the core APIs _only_, you have to select - individual components like LCD backlight, LEDs, GPIOs and Kepad - under the corresponding menus. - -config MFD_LP8788 - bool "Texas Instruments LP8788 Power Management Unit Driver" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - select IRQ_DOMAIN - help - TI LP8788 PMU supports regulators, battery charger, RTC, - ADC, backlight driver and current sinks. - -config MFD_MAX77686 - bool "Maxim Semiconductor MAX77686 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - select IRQ_DOMAIN - help - Say yes here to support for Maxim Semiconductor MAX77686. - This is a Power Management IC with RTC on chip. - This driver provides common support for accessing the device; - additional drivers must be enabled in order to use the functionality - of the device. - -config MFD_MAX77693 - bool "Maxim Semiconductor MAX77693 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - help - Say yes here to support for Maxim Semiconductor MAX77693. - This is a companion Power Management IC with Flash, Haptic, Charger, - and MUIC(Micro USB Interface Controller) controls on chip. - This driver provides common support for accessing the device; - additional drivers must be enabled in order to use the functionality - of the device. - -config MFD_MAX8907 - tristate "Maxim Semiconductor MAX8907 PMIC Support" - select MFD_CORE - depends on I2C=y && GENERIC_HARDIRQS - select REGMAP_I2C - select REGMAP_IRQ - help - Say yes here to support for Maxim Semiconductor MAX8907. This is - a Power Management IC. This driver provides common support for - accessing the device; additional drivers must be enabled in order - to use the functionality of the device. - -config MFD_MAX8925 - bool "Maxim Semiconductor MAX8925 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - help - Say yes here to support for Maxim Semiconductor MAX8925. This is - a Power Management IC. This driver provides common support for - accessing the device, additional drivers must be enabled in order - to use the functionality of the device. - -config MFD_MAX8997 - bool "Maxim Semiconductor MAX8997/8966 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select IRQ_DOMAIN - help - Say yes here to support for Maxim Semiconductor MAX8997/8966. - This is a Power Management IC with RTC, Flash, Fuel Gauge, Haptic, - MUIC controls on chip. - This driver provides common support for accessing the device; - additional drivers must be enabled in order to use the functionality - of the device. - -config MFD_MAX8998 - bool "Maxim Semiconductor MAX8998/National LP3974 PMIC Support" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - help - Say yes here to support for Maxim Semiconductor MAX8998 and - National Semiconductor LP3974. This is a Power Management IC. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the functionality - of the device. - -config MFD_SEC_CORE - bool "SAMSUNG Electronics PMIC Series Support" - depends on I2C=y && GENERIC_HARDIRQS +config MFD_VX855 + tristate "VIA VX855/VX875 integrated south bridge" + depends on PCI && GENERIC_HARDIRQS select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ help - Support for the Samsung Electronics MFD series. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the functionality - of the device + Say yes here to enable support for various functions of the + VIA VX855/VX875 south bridge. You will need to enable the vx855_spi + and/or vx855_gpio drivers for this to do anything useful. config MFD_ARIZONA select REGMAP @@ -611,7 +1014,7 @@ config MFD_ARIZONA bool config MFD_ARIZONA_I2C - tristate "Support Wolfson Microelectronics Arizona platform with I2C" + tristate "Wolfson Microelectronics Arizona platform with I2C" select MFD_ARIZONA select MFD_CORE select REGMAP_I2C @@ -621,7 +1024,7 @@ config MFD_ARIZONA_I2C core functionality controlled via I2C. config MFD_ARIZONA_SPI - tristate "Support Wolfson Microelectronics Arizona platform with SPI" + tristate "Wolfson Microelectronics Arizona platform with SPI" select MFD_ARIZONA select MFD_CORE select REGMAP_SPI @@ -631,19 +1034,19 @@ config MFD_ARIZONA_SPI core functionality controlled via I2C. config MFD_WM5102 - bool "Support Wolfson Microelectronics WM5102" + bool "Wolfson Microelectronics WM5102" depends on MFD_ARIZONA help Support for Wolfson Microelectronics WM5102 low power audio SoC config MFD_WM5110 - bool "Support Wolfson Microelectronics WM5110" + bool "Wolfson Microelectronics WM5110" depends on MFD_ARIZONA help Support for Wolfson Microelectronics WM5110 low power audio SoC config MFD_WM8400 - bool "Support Wolfson Microelectronics WM8400" + bool "Wolfson Microelectronics WM8400" select MFD_CORE depends on I2C=y && GENERIC_HARDIRQS select REGMAP_I2C @@ -658,7 +1061,7 @@ config MFD_WM831X depends on GENERIC_HARDIRQS config MFD_WM831X_I2C - bool "Support Wolfson Microelectronics WM831x/2x PMICs with I2C" + bool "Wolfson Microelectronics WM831x/2x PMICs with I2C" select MFD_CORE select MFD_WM831X select REGMAP_I2C @@ -671,7 +1074,7 @@ config MFD_WM831X_I2C order to use the functionality of the device. config MFD_WM831X_SPI - bool "Support Wolfson Microelectronics WM831x/2x PMICs with SPI" + bool "Wolfson Microelectronics WM831x/2x PMICs with SPI" select MFD_CORE select MFD_WM831X select REGMAP_SPI @@ -687,56 +1090,8 @@ config MFD_WM8350 bool depends on GENERIC_HARDIRQS -config MFD_WM8350_CONFIG_MODE_0 - bool - depends on MFD_WM8350 - -config MFD_WM8350_CONFIG_MODE_1 - bool - depends on MFD_WM8350 - -config MFD_WM8350_CONFIG_MODE_2 - bool - depends on MFD_WM8350 - -config MFD_WM8350_CONFIG_MODE_3 - bool - depends on MFD_WM8350 - -config MFD_WM8351_CONFIG_MODE_0 - bool - depends on MFD_WM8350 - -config MFD_WM8351_CONFIG_MODE_1 - bool - depends on MFD_WM8350 - -config MFD_WM8351_CONFIG_MODE_2 - bool - depends on MFD_WM8350 - -config MFD_WM8351_CONFIG_MODE_3 - bool - depends on MFD_WM8350 - -config MFD_WM8352_CONFIG_MODE_0 - bool - depends on MFD_WM8350 - -config MFD_WM8352_CONFIG_MODE_1 - bool - depends on MFD_WM8350 - -config MFD_WM8352_CONFIG_MODE_2 - bool - depends on MFD_WM8350 - -config MFD_WM8352_CONFIG_MODE_3 - bool - depends on MFD_WM8350 - config MFD_WM8350_I2C - bool "Support Wolfson Microelectronics WM8350 with I2C" + bool "Wolfson Microelectronics WM8350 with I2C" select MFD_WM8350 depends on I2C=y && GENERIC_HARDIRQS help @@ -747,7 +1102,7 @@ config MFD_WM8350_I2C selected to enable support for the functionality of the chip. config MFD_WM8994 - bool "Support Wolfson Microelectronics WM8994" + bool "Wolfson Microelectronics WM8994" select MFD_CORE select REGMAP_I2C select REGMAP_IRQ @@ -760,365 +1115,6 @@ config MFD_WM8994 core support for the WM8994, in order to use the actual functionaltiy of the device other drivers must be enabled. -config MFD_PCF50633 - tristate "Support for NXP PCF50633" - depends on I2C - select REGMAP_I2C - help - Say yes here if you have NXP PCF50633 chip on your board. - This core driver provides register access and IRQ handling - facilities, and registers devices for the various functions - so that function-specific drivers can bind to them. - -config PCF50633_ADC - tristate "Support for NXP PCF50633 ADC" - depends on MFD_PCF50633 - help - Say yes here if you want to include support for ADC in the - NXP PCF50633 chip. - -config PCF50633_GPIO - tristate "Support for NXP PCF50633 GPIO" - depends on MFD_PCF50633 - help - Say yes here if you want to include support GPIO for pins on - the PCF50633 chip. - -config MFD_MC13783 - tristate - -config MFD_MC13XXX - tristate - depends on (SPI_MASTER || I2C) && GENERIC_HARDIRQS - select MFD_CORE - select MFD_MC13783 - help - Enable support for the Freescale MC13783 and MC13892 PMICs. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the - functionality of the device. - -config MFD_MC13XXX_SPI - tristate "Freescale MC13783 and MC13892 SPI interface" - depends on SPI_MASTER && GENERIC_HARDIRQS - select REGMAP_SPI - select MFD_MC13XXX - help - Select this if your MC13xxx is connected via an SPI bus. - -config MFD_MC13XXX_I2C - tristate "Freescale MC13892 I2C interface" - depends on I2C && GENERIC_HARDIRQS - select REGMAP_I2C - select MFD_MC13XXX - help - Select this if your MC13xxx is connected via an I2C bus. - -config ABX500_CORE - bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions" - default y if ARCH_U300 || ARCH_U8500 - help - Say yes here if you have the ABX500 Mixed Signal IC family - chips. This core driver expose register access functions. - Functionality specific drivers using these functions can - remain unchanged when IC changes. Binding of the functions to - actual register access is done by the IC core driver. - -config AB3100_CORE - bool "ST-Ericsson AB3100 Mixed Signal Circuit core functions" - depends on I2C=y && ABX500_CORE && GENERIC_HARDIRQS - select MFD_CORE - default y if ARCH_U300 - help - Select this to enable the AB3100 Mixed Signal IC core - functionality. This connects to a AB3100 on the I2C bus - and expose a number of symbols needed for dependent devices - to read and write registers and subscribe to events from - this multi-functional IC. This is needed to use other features - of the AB3100 such as battery-backed RTC, charging control, - LEDs, vibrator, system power and temperature, power management - and ALSA sound. - -config AB3100_OTP - tristate "ST-Ericsson AB3100 OTP functions" - depends on AB3100_CORE - default y if AB3100_CORE - help - Select this to enable the AB3100 Mixed Signal IC OTP (one-time - programmable memory) support. This exposes a sysfs file to read - out OTP values. - -config EZX_PCAP - bool "PCAP Support" - depends on GENERIC_HARDIRQS && SPI_MASTER - help - This enables the PCAP ASIC present on EZX Phones. This is - needed for MMC, TouchScreen, Sound, USB, etc.. - -config AB8500_CORE - bool "ST-Ericsson AB8500 Mixed Signal Power Management chip" - depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU - select POWER_SUPPLY - select MFD_CORE - select IRQ_DOMAIN - help - Select this option to enable access to AB8500 power management - chip. This connects to U8500 either on the SSP/SPI bus (deprecated - since hardware version v1.0) or the I2C bus via PRCMU. It also adds - the irq_chip parts for handling the Mixed Signal chip events. - This chip embeds various other multimedia funtionalities as well. - -config AB8500_DEBUG - bool "Enable debug info via debugfs" - depends on AB8500_CORE && DEBUG_FS - default y if DEBUG_FS - help - Select this option if you want debug information using the debug - filesystem, debugfs. - -config AB8500_GPADC - bool "AB8500 GPADC driver" - depends on AB8500_CORE && REGULATOR_AB8500 - default y - help - AB8500 GPADC driver used to convert Acc and battery/ac/usb voltage - -config MFD_DB8500_PRCMU - bool "ST-Ericsson DB8500 Power Reset Control Management Unit" - depends on UX500_SOC_DB8500 - select MFD_CORE - help - Select this option to enable support for the DB8500 Power Reset - and Control Management Unit. This is basically an autonomous - system controller running an XP70 microprocessor, which is accessed - through a register map. - -config MFD_CS5535 - tristate "Support for CS5535 and CS5536 southbridge core functions" - select MFD_CORE - depends on PCI && X86 - ---help--- - This is the core driver for CS5535/CS5536 MFD functions. This is - necessary for using the board's GPIO and MFGPT functionality. - -config MFD_TIMBERDALE - tristate "Support for the Timberdale FPGA" - select MFD_CORE - depends on PCI && GPIOLIB - ---help--- - This is the core driver for the timberdale FPGA. This device is a - multifunction device which exposes numerous platform devices. - - The timberdale FPGA can be found on the Intel Atom development board - for in-vehicle infontainment, called Russellville. - -config LPC_SCH - tristate "Intel SCH LPC" - depends on PCI && GENERIC_HARDIRQS - select MFD_CORE - help - LPC bridge function of the Intel SCH provides support for - System Management Bus and General Purpose I/O. - -config LPC_ICH - tristate "Intel ICH LPC" - depends on PCI && GENERIC_HARDIRQS - select MFD_CORE - help - The LPC bridge function of the Intel ICH provides support for - many functional units. This driver provides needed support for - other drivers to control these functions, currently GPIO and - watchdog. - -config MFD_RDC321X - tristate "Support for RDC-R321x southbridge" - select MFD_CORE - depends on PCI && GENERIC_HARDIRQS - help - Say yes here if you want to have support for the RDC R-321x SoC - southbridge which provides access to GPIOs and Watchdog using the - southbridge PCI device configuration space. - -config MFD_JANZ_CMODIO - tristate "Support for Janz CMOD-IO PCI MODULbus Carrier Board" - select MFD_CORE - depends on PCI && GENERIC_HARDIRQS - help - This is the core driver for the Janz CMOD-IO PCI MODULbus - carrier board. This device is a PCI to MODULbus bridge which may - host many different types of MODULbus daughterboards, including - CAN and GPIO controllers. - -config MFD_JZ4740_ADC - bool "Support for the JZ4740 SoC ADC core" - select MFD_CORE - select GENERIC_IRQ_CHIP - depends on MACH_JZ4740 - help - Say yes here if you want support for the ADC unit in the JZ4740 SoC. - This driver is necessary for jz4740-battery and jz4740-hwmon driver. - -config MFD_VX855 - tristate "Support for VIA VX855/VX875 integrated south bridge" - depends on PCI && GENERIC_HARDIRQS - select MFD_CORE - help - Say yes here to enable support for various functions of the - VIA VX855/VX875 south bridge. You will need to enable the vx855_spi - and/or vx855_gpio drivers for this to do anything useful. - -config MFD_WL1273_CORE - tristate "Support for TI WL1273 FM radio." - depends on I2C && GENERIC_HARDIRQS - select MFD_CORE - default n - help - This is the core driver for the TI WL1273 FM radio. This MFD - driver connects the radio-wl1273 V4L2 module and the wl1273 - audio codec. - -config MFD_OMAP_USB_HOST - bool "Support OMAP USBHS core and TLL driver" - depends on USB_EHCI_HCD_OMAP || USB_OHCI_HCD_OMAP3 - default y - help - This is the core driver for the OAMP EHCI and OHCI drivers. - This MFD driver does the required setup functionalities for - OMAP USB Host drivers. - -config MFD_PM8XXX - tristate - -config MFD_PM8921_CORE - tristate "Qualcomm PM8921 PMIC chip" - depends on SSBI && BROKEN - select MFD_CORE - select MFD_PM8XXX - help - If you say yes to this option, support will be included for the - built-in PM8921 PMIC chip. - - This is required if your board has a PM8921 and uses its features, - such as: MPPs, GPIOs, regulators, interrupts, and PWM. - - Say M here if you want to include support for PM8921 chip as a module. - This will build a module called "pm8921-core". - -config MFD_PM8XXX_IRQ - bool "Support for Qualcomm PM8xxx IRQ features" - depends on MFD_PM8XXX - default y if MFD_PM8XXX - help - This is the IRQ driver for Qualcomm PM 8xxx PMIC chips. - - This is required to use certain other PM 8xxx features, such as GPIO - and MPP. - -config TPS65911_COMPARATOR - tristate - -config MFD_TPS65090 - bool "TPS65090 Power Management chips" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ - help - If you say yes here you get support for the TPS65090 series of - Power Management chips. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the - functionality of the device. - -config MFD_AAT2870_CORE - bool "Support for the AnalogicTech AAT2870" - select MFD_CORE - depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS - help - If you say yes here you get support for the AAT2870. - This driver provides common support for accessing the device, - additional drivers must be enabled in order to use the - functionality of the device. - -config MFD_INTEL_MSIC - bool "Support for Intel MSIC" - depends on INTEL_SCU_IPC - select MFD_CORE - help - Select this option to enable access to Intel MSIC (Avatele - Passage) chip. This chip embeds audio, battery, GPIO, etc. - devices used in Intel Medfield platforms. - -config MFD_RC5T583 - bool "Ricoh RC5T583 Power Management system device" - depends on I2C=y && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_I2C - help - Select this option to get support for the RICOH583 Power - Management system device. - This driver provides common support for accessing the device - through i2c interface. The device supports multiple sub-devices - like GPIO, interrupts, RTC, LDO and DCDC regulators, onkey. - Additional drivers must be enabled in order to use the - different functionality of the device. - -config MFD_STA2X11 - bool "STA2X11 multi function device support" - depends on STA2X11 && GENERIC_HARDIRQS - select MFD_CORE - select REGMAP_MMIO - -config MFD_SYSCON - bool "System Controller Register R/W Based on Regmap" - depends on OF - select REGMAP_MMIO - help - Select this option to enable accessing system control registers - via regmap. - -config MFD_PALMAS - bool "Support for the TI Palmas series chips" - select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ - depends on I2C=y && GENERIC_HARDIRQS - help - If you say yes here you get support for the Palmas - series of PMIC chips from Texas Instruments. - -config MFD_VIPERBOARD - tristate "Support for Nano River Technologies Viperboard" - select MFD_CORE - depends on USB && GENERIC_HARDIRQS - default n - help - Say yes here if you want support for Nano River Technologies - Viperboard. - There are mfd cell drivers available for i2c master, adc and - both gpios found on the board. The spi part does not yet - have a driver. - You need to select the mfd cell drivers separately. - The drivers do not support all features the board exposes. - -config MFD_RETU - tristate "Support for Retu multi-function device" - select MFD_CORE - depends on I2C && GENERIC_HARDIRQS - select REGMAP_IRQ - help - Retu is a multi-function device found on Nokia Internet Tablets - (770, N800 and N810). - -config MFD_AS3711 - bool "Support for AS3711" - select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ - depends on I2C=y && GENERIC_HARDIRQS - help - Support for the AS3711 PMIC from AMS - endmenu endif diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index b90409c23664..718e94a2a9a7 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -8,8 +8,11 @@ obj-$(CONFIG_MFD_88PM800) += 88pm800.o 88pm80x.o obj-$(CONFIG_MFD_88PM805) += 88pm805.o 88pm80x.o obj-$(CONFIG_MFD_SM501) += sm501.o obj-$(CONFIG_MFD_ASIC3) += asic3.o tmio_core.o +obj-$(CONFIG_MFD_CROS_EC) += cros_ec.o +obj-$(CONFIG_MFD_CROS_EC_I2C) += cros_ec_i2c.o +obj-$(CONFIG_MFD_CROS_EC_SPI) += cros_ec_spi.o -rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o obj-$(CONFIG_MFD_RTSX_PCI) += rtsx_pci.o obj-$(CONFIG_HTC_EGPIO) += htc-egpio.o @@ -131,6 +134,10 @@ obj-$(CONFIG_MFD_JZ4740_ADC) += jz4740-adc.o obj-$(CONFIG_MFD_TPS6586X) += tps6586x.o obj-$(CONFIG_MFD_VX855) += vx855.o obj-$(CONFIG_MFD_WL1273_CORE) += wl1273-core.o + +si476x-core-y := si476x-cmd.o si476x-prop.o si476x-i2c.o +obj-$(CONFIG_MFD_SI476X_CORE) += si476x-core.o + obj-$(CONFIG_MFD_CS5535) += cs5535-mfd.o obj-$(CONFIG_MFD_OMAP_USB_HOST) += omap-usb-host.o omap-usb-tll.o obj-$(CONFIG_MFD_PM8921_CORE) += pm8921-core.o diff --git a/drivers/mfd/aat2870-core.c b/drivers/mfd/aat2870-core.c index f1beb4971f87..dfdb0a2b6835 100644 --- a/drivers/mfd/aat2870-core.c +++ b/drivers/mfd/aat2870-core.c @@ -367,12 +367,12 @@ static int aat2870_i2c_probe(struct i2c_client *client, int i, j; int ret = 0; - aat2870 = kzalloc(sizeof(struct aat2870_data), GFP_KERNEL); + aat2870 = devm_kzalloc(&client->dev, sizeof(struct aat2870_data), + GFP_KERNEL); if (!aat2870) { dev_err(&client->dev, "Failed to allocate memory for aat2870\n"); - ret = -ENOMEM; - goto out; + return -ENOMEM; } aat2870->dev = &client->dev; @@ -400,12 +400,12 @@ static int aat2870_i2c_probe(struct i2c_client *client, aat2870->init(aat2870); if (aat2870->en_pin >= 0) { - ret = gpio_request_one(aat2870->en_pin, GPIOF_OUT_INIT_HIGH, - "aat2870-en"); + ret = devm_gpio_request_one(&client->dev, aat2870->en_pin, + GPIOF_OUT_INIT_HIGH, "aat2870-en"); if (ret < 0) { dev_err(&client->dev, "Failed to request GPIO %d\n", aat2870->en_pin); - goto out_kfree; + return ret; } } @@ -436,11 +436,6 @@ static int aat2870_i2c_probe(struct i2c_client *client, out_disable: aat2870_disable(aat2870); - if (aat2870->en_pin >= 0) - gpio_free(aat2870->en_pin); -out_kfree: - kfree(aat2870); -out: return ret; } @@ -452,11 +447,8 @@ static int aat2870_i2c_remove(struct i2c_client *client) mfd_remove_devices(aat2870->dev); aat2870_disable(aat2870); - if (aat2870->en_pin >= 0) - gpio_free(aat2870->en_pin); if (aat2870->uninit) aat2870->uninit(aat2870); - kfree(aat2870); return 0; } diff --git a/drivers/mfd/ab3100-otp.c b/drivers/mfd/ab3100-otp.c index 8440010eb2b8..d7ce016029fa 100644 --- a/drivers/mfd/ab3100-otp.c +++ b/drivers/mfd/ab3100-otp.c @@ -248,19 +248,7 @@ static struct platform_driver ab3100_otp_driver = { .remove = __exit_p(ab3100_otp_remove), }; -static int __init ab3100_otp_init(void) -{ - return platform_driver_probe(&ab3100_otp_driver, - ab3100_otp_probe); -} - -static void __exit ab3100_otp_exit(void) -{ - platform_driver_unregister(&ab3100_otp_driver); -} - -module_init(ab3100_otp_init); -module_exit(ab3100_otp_exit); +module_platform_driver_probe(ab3100_otp_driver, ab3100_otp_probe); MODULE_AUTHOR("Linus Walleij <[email protected]>"); MODULE_DESCRIPTION("AB3100 OTP Readout Driver"); diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index f276352cc9ef..8e8a016effe9 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -458,22 +458,23 @@ static void update_latch_offset(u8 *offset, int i) static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500, int latch_offset, u8 latch_val) { - int int_bit = __ffs(latch_val); - int line, i; + int int_bit, line, i; - do { - int_bit = __ffs(latch_val); + for (i = 0; i < ab8500->mask_size; i++) + if (ab8500->irq_reg_offset[i] == latch_offset) + break; - for (i = 0; i < ab8500->mask_size; i++) - if (ab8500->irq_reg_offset[i] == latch_offset) - break; + if (i >= ab8500->mask_size) { + dev_err(ab8500->dev, "Register offset 0x%2x not declared\n", + latch_offset); + return -ENXIO; + } - if (i >= ab8500->mask_size) { - dev_err(ab8500->dev, "Register offset 0x%2x not declared\n", - latch_offset); - return -ENXIO; - } + /* ignore masked out interrupts */ + latch_val &= ~ab8500->mask[i]; + while (latch_val) { + int_bit = __ffs(latch_val); line = (i << 3) + int_bit; latch_val &= ~(1 << int_bit); @@ -491,7 +492,7 @@ static int ab8500_handle_hierarchical_line(struct ab8500 *ab8500, line += 1; handle_nested_irq(ab8500->irq_base + line); - } while (latch_val); + } return 0; } @@ -1107,6 +1108,7 @@ static struct mfd_cell ab8500_devs[] = { }, { .name = "ab8500-usb", + .of_compatible = "stericsson,ab8500-usb", .num_resources = ARRAY_SIZE(ab8500_usb_resources), .resources = ab8500_usb_resources, }, diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c index 65f72284185d..5e65b28a5d09 100644 --- a/drivers/mfd/ab8500-gpadc.c +++ b/drivers/mfd/ab8500-gpadc.c @@ -332,7 +332,7 @@ if (ad_value < 0) { return voltage; } -EXPORT_SYMBOL(ab8500_gpadc_convert); +EXPORT_SYMBOL(ab8500_gpadc_sw_hw_convert); /** * ab8500_gpadc_read_raw() - gpadc read diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c index 272479cdb107..fbca1ced49fa 100644 --- a/drivers/mfd/ab8500-sysctrl.c +++ b/drivers/mfd/ab8500-sysctrl.c @@ -242,7 +242,7 @@ static int __init ab8500_sysctrl_init(void) { return platform_driver_register(&ab8500_sysctrl_driver); } -subsys_initcall(ab8500_sysctrl_init); +arch_initcall(ab8500_sysctrl_init); MODULE_AUTHOR("Mattias Nilsson <[email protected]"); MODULE_DESCRIPTION("AB8500 system control driver"); diff --git a/drivers/mfd/adp5520.c b/drivers/mfd/adp5520.c index 210dd038bb5a..0d2eba023439 100644 --- a/drivers/mfd/adp5520.c +++ b/drivers/mfd/adp5520.c @@ -36,6 +36,7 @@ struct adp5520_chip { struct blocking_notifier_head notifier_list; int irq; unsigned long id; + uint8_t mode; }; static int __adp5520_read(struct i2c_client *client, @@ -326,7 +327,10 @@ static int adp5520_suspend(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct adp5520_chip *chip = dev_get_drvdata(&client->dev); - adp5520_clr_bits(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY); + adp5520_read(chip->dev, ADP5520_MODE_STATUS, &chip->mode); + /* All other bits are W1C */ + chip->mode &= ADP5520_BL_EN | ADP5520_DIM_EN | ADP5520_nSTNBY; + adp5520_write(chip->dev, ADP5520_MODE_STATUS, 0); return 0; } @@ -335,7 +339,7 @@ static int adp5520_resume(struct device *dev) struct i2c_client *client = to_i2c_client(dev); struct adp5520_chip *chip = dev_get_drvdata(&client->dev); - adp5520_set_bits(chip->dev, ADP5520_MODE_STATUS, ADP5520_nSTNBY); + adp5520_write(chip->dev, ADP5520_MODE_STATUS, chip->mode); return 0; } #endif @@ -360,17 +364,7 @@ static struct i2c_driver adp5520_driver = { .id_table = adp5520_id, }; -static int __init adp5520_init(void) -{ - return i2c_add_driver(&adp5520_driver); -} -module_init(adp5520_init); - -static void __exit adp5520_exit(void) -{ - i2c_del_driver(&adp5520_driver); -} -module_exit(adp5520_exit); +module_i2c_driver(adp5520_driver); MODULE_AUTHOR("Michael Hennerich <[email protected]>"); MODULE_DESCRIPTION("ADP5520(01) PMIC-MFD Driver"); diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index b562c7bf8a46..6ab03043fd60 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -39,11 +39,21 @@ int arizona_clk32k_enable(struct arizona *arizona) arizona->clk32k_ref++; - if (arizona->clk32k_ref == 1) + if (arizona->clk32k_ref == 1) { + switch (arizona->pdata.clk32k_src) { + case ARIZONA_32KZ_MCLK1: + ret = pm_runtime_get_sync(arizona->dev); + if (ret != 0) + goto out; + break; + } + ret = regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1, ARIZONA_CLK_32K_ENA, ARIZONA_CLK_32K_ENA); + } +out: if (ret != 0) arizona->clk32k_ref--; @@ -63,10 +73,17 @@ int arizona_clk32k_disable(struct arizona *arizona) arizona->clk32k_ref--; - if (arizona->clk32k_ref == 0) + if (arizona->clk32k_ref == 0) { regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1, ARIZONA_CLK_32K_ENA, 0); + switch (arizona->pdata.clk32k_src) { + case ARIZONA_32KZ_MCLK1: + pm_runtime_put_sync(arizona->dev); + break; + } + } + mutex_unlock(&arizona->clk_lock); return ret; @@ -179,42 +196,134 @@ static irqreturn_t arizona_overclocked(int irq, void *data) return IRQ_HANDLED; } -static int arizona_wait_for_boot(struct arizona *arizona) +static int arizona_poll_reg(struct arizona *arizona, + int timeout, unsigned int reg, + unsigned int mask, unsigned int target) { - unsigned int reg; + unsigned int val = 0; int ret, i; + for (i = 0; i < timeout; i++) { + ret = regmap_read(arizona->regmap, reg, &val); + if (ret != 0) { + dev_err(arizona->dev, "Failed to read reg %u: %d\n", + reg, ret); + continue; + } + + if ((val & mask) == target) + return 0; + + msleep(1); + } + + dev_err(arizona->dev, "Polling reg %u timed out: %x\n", reg, val); + return -ETIMEDOUT; +} + +static int arizona_wait_for_boot(struct arizona *arizona) +{ + int ret; + /* * We can't use an interrupt as we need to runtime resume to do so, * we won't race with the interrupt handler as it'll be blocked on * runtime resume. */ - for (i = 0; i < 5; i++) { - msleep(1); + ret = arizona_poll_reg(arizona, 5, ARIZONA_INTERRUPT_RAW_STATUS_5, + ARIZONA_BOOT_DONE_STS, ARIZONA_BOOT_DONE_STS); - ret = regmap_read(arizona->regmap, - ARIZONA_INTERRUPT_RAW_STATUS_5, ®); - if (ret != 0) { - dev_err(arizona->dev, "Failed to read boot state: %d\n", - ret); - continue; - } + if (!ret) + regmap_write(arizona->regmap, ARIZONA_INTERRUPT_STATUS_5, + ARIZONA_BOOT_DONE_STS); - if (reg & ARIZONA_BOOT_DONE_STS) - break; + pm_runtime_mark_last_busy(arizona->dev); + + return ret; +} + +static int arizona_apply_hardware_patch(struct arizona* arizona) +{ + unsigned int fll, sysclk; + int ret, err; + + regcache_cache_bypass(arizona->regmap, true); + + /* Cache existing FLL and SYSCLK settings */ + ret = regmap_read(arizona->regmap, ARIZONA_FLL1_CONTROL_1, &fll); + if (ret != 0) { + dev_err(arizona->dev, "Failed to cache FLL settings: %d\n", + ret); + return ret; + } + ret = regmap_read(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, &sysclk); + if (ret != 0) { + dev_err(arizona->dev, "Failed to cache SYSCLK settings: %d\n", + ret); + return ret; } - if (reg & ARIZONA_BOOT_DONE_STS) { - regmap_write(arizona->regmap, ARIZONA_INTERRUPT_STATUS_5, - ARIZONA_BOOT_DONE_STS); - } else { - dev_err(arizona->dev, "Device boot timed out: %x\n", reg); - return -ETIMEDOUT; + /* Start up SYSCLK using the FLL in free running mode */ + ret = regmap_write(arizona->regmap, ARIZONA_FLL1_CONTROL_1, + ARIZONA_FLL1_ENA | ARIZONA_FLL1_FREERUN); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to start FLL in freerunning mode: %d\n", + ret); + return ret; + } + ret = arizona_poll_reg(arizona, 25, ARIZONA_INTERRUPT_RAW_STATUS_5, + ARIZONA_FLL1_CLOCK_OK_STS, + ARIZONA_FLL1_CLOCK_OK_STS); + if (ret != 0) { + ret = -ETIMEDOUT; + goto err_fll; } - pm_runtime_mark_last_busy(arizona->dev); + ret = regmap_write(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, 0x0144); + if (ret != 0) { + dev_err(arizona->dev, "Failed to start SYSCLK: %d\n", ret); + goto err_fll; + } - return 0; + /* Start the write sequencer and wait for it to finish */ + ret = regmap_write(arizona->regmap, ARIZONA_WRITE_SEQUENCER_CTRL_0, + ARIZONA_WSEQ_ENA | ARIZONA_WSEQ_START | 160); + if (ret != 0) { + dev_err(arizona->dev, "Failed to start write sequencer: %d\n", + ret); + goto err_sysclk; + } + ret = arizona_poll_reg(arizona, 5, ARIZONA_WRITE_SEQUENCER_CTRL_1, + ARIZONA_WSEQ_BUSY, 0); + if (ret != 0) { + regmap_write(arizona->regmap, ARIZONA_WRITE_SEQUENCER_CTRL_0, + ARIZONA_WSEQ_ABORT); + ret = -ETIMEDOUT; + } + +err_sysclk: + err = regmap_write(arizona->regmap, ARIZONA_SYSTEM_CLOCK_1, sysclk); + if (err != 0) { + dev_err(arizona->dev, + "Failed to re-apply old SYSCLK settings: %d\n", + err); + } + +err_fll: + err = regmap_write(arizona->regmap, ARIZONA_FLL1_CONTROL_1, fll); + if (err != 0) { + dev_err(arizona->dev, + "Failed to re-apply old FLL settings: %d\n", + err); + } + + regcache_cache_bypass(arizona->regmap, false); + + if (ret != 0) + return ret; + else + return err; } #ifdef CONFIG_PM_RUNTIME @@ -233,20 +342,44 @@ static int arizona_runtime_resume(struct device *dev) regcache_cache_only(arizona->regmap, false); - ret = arizona_wait_for_boot(arizona); - if (ret != 0) { - regulator_disable(arizona->dcvdd); - return ret; + switch (arizona->type) { + case WM5102: + ret = wm5102_patch(arizona); + if (ret != 0) { + dev_err(arizona->dev, "Failed to apply patch: %d\n", + ret); + goto err; + } + + ret = arizona_apply_hardware_patch(arizona); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to apply hardware patch: %d\n", + ret); + goto err; + } + break; + default: + ret = arizona_wait_for_boot(arizona); + if (ret != 0) { + goto err; + } + + break; } ret = regcache_sync(arizona->regmap); if (ret != 0) { dev_err(arizona->dev, "Failed to restore register cache\n"); - regulator_disable(arizona->dcvdd); - return ret; + goto err; } return 0; + +err: + regcache_cache_only(arizona->regmap, true); + regulator_disable(arizona->dcvdd); + return ret; } static int arizona_runtime_suspend(struct device *dev) @@ -371,6 +504,17 @@ int arizona_dev_init(struct arizona *arizona) goto err_early; } + if (arizona->pdata.reset) { + /* Start out with /RESET low to put the chip into reset */ + ret = gpio_request_one(arizona->pdata.reset, + GPIOF_DIR_OUT | GPIOF_INIT_LOW, + "arizona /RESET"); + if (ret != 0) { + dev_err(dev, "Failed to request /RESET: %d\n", ret); + goto err_early; + } + } + ret = regulator_bulk_enable(arizona->num_core_supplies, arizona->core_supplies); if (ret != 0) { @@ -386,16 +530,8 @@ int arizona_dev_init(struct arizona *arizona) } if (arizona->pdata.reset) { - /* Start out with /RESET low to put the chip into reset */ - ret = gpio_request_one(arizona->pdata.reset, - GPIOF_DIR_OUT | GPIOF_INIT_LOW, - "arizona /RESET"); - if (ret != 0) { - dev_err(dev, "Failed to request /RESET: %d\n", ret); - goto err_dcvdd; - } - gpio_set_value_cansleep(arizona->pdata.reset, 1); + msleep(1); } regcache_cache_only(arizona->regmap, false); @@ -424,6 +560,7 @@ int arizona_dev_init(struct arizona *arizona) arizona->type = WM5102; } apply_patch = wm5102_patch; + arizona->rev &= 0x7; break; #endif #ifdef CONFIG_MFD_WM5110 @@ -454,6 +591,8 @@ int arizona_dev_init(struct arizona *arizona) goto err_reset; } + msleep(1); + ret = regcache_sync(arizona->regmap); if (ret != 0) { dev_err(dev, "Failed to sync device: %d\n", ret); @@ -461,10 +600,24 @@ int arizona_dev_init(struct arizona *arizona) } } - ret = arizona_wait_for_boot(arizona); - if (ret != 0) { - dev_err(arizona->dev, "Device failed initial boot: %d\n", ret); - goto err_reset; + switch (arizona->type) { + case WM5102: + ret = regmap_read(arizona->regmap, 0x19, &val); + if (ret != 0) + dev_err(dev, + "Failed to check write sequencer state: %d\n", + ret); + else if (val & 0x01) + break; + /* Fall through */ + default: + ret = arizona_wait_for_boot(arizona); + if (ret != 0) { + dev_err(arizona->dev, + "Device failed initial boot: %d\n", ret); + goto err_reset; + } + break; } if (apply_patch) { @@ -474,6 +627,20 @@ int arizona_dev_init(struct arizona *arizona) ret); goto err_reset; } + + switch (arizona->type) { + case WM5102: + ret = arizona_apply_hardware_patch(arizona); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to apply hardware patch: %d\n", + ret); + goto err_reset; + } + break; + default: + break; + } } for (i = 0; i < ARRAY_SIZE(arizona->pdata.gpio_defaults); i++) { @@ -498,6 +665,7 @@ int arizona_dev_init(struct arizona *arizona) regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1, ARIZONA_CLK_32K_SRC_MASK, arizona->pdata.clk32k_src - 1); + arizona_clk32k_enable(arizona); break; case ARIZONA_32KZ_NONE: regmap_update_bits(arizona->regmap, ARIZONA_CLOCK_32K_1, @@ -511,10 +679,16 @@ int arizona_dev_init(struct arizona *arizona) } for (i = 0; i < ARIZONA_MAX_MICBIAS; i++) { - if (!arizona->pdata.micbias[i].mV) + if (!arizona->pdata.micbias[i].mV && + !arizona->pdata.micbias[i].bypass) continue; + /* Apply default for bypass mode */ + if (!arizona->pdata.micbias[i].mV) + arizona->pdata.micbias[i].mV = 2800; + val = (arizona->pdata.micbias[i].mV - 1500) / 100; + val <<= ARIZONA_MICB1_LVL_SHIFT; if (arizona->pdata.micbias[i].ext_cap) @@ -526,10 +700,14 @@ int arizona_dev_init(struct arizona *arizona) if (arizona->pdata.micbias[i].fast_start) val |= ARIZONA_MICB1_RATE; + if (arizona->pdata.micbias[i].bypass) + val |= ARIZONA_MICB1_BYPASS; + regmap_update_bits(arizona->regmap, ARIZONA_MIC_BIAS_CTRL_1 + i, ARIZONA_MICB1_LVL_MASK | ARIZONA_MICB1_DISCH | + ARIZONA_MICB1_BYPASS | ARIZONA_MICB1_RATE, val); } @@ -610,10 +788,9 @@ err_irq: arizona_irq_exit(arizona); err_reset: if (arizona->pdata.reset) { - gpio_set_value_cansleep(arizona->pdata.reset, 1); + gpio_set_value_cansleep(arizona->pdata.reset, 0); gpio_free(arizona->pdata.reset); } -err_dcvdd: regulator_disable(arizona->dcvdd); err_enable: regulator_bulk_disable(arizona->num_core_supplies, diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index 2bec5f0db3ee..64cd9b6dac92 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -94,6 +94,7 @@ static irqreturn_t arizona_ctrlif_err(int irq, void *data) static irqreturn_t arizona_irq_thread(int irq, void *data) { struct arizona *arizona = data; + bool poll; unsigned int val; int ret; @@ -103,20 +104,39 @@ static irqreturn_t arizona_irq_thread(int irq, void *data) return IRQ_NONE; } - /* Always handle the AoD domain */ - handle_nested_irq(irq_find_mapping(arizona->virq, 0)); + do { + poll = false; + + /* Always handle the AoD domain */ + handle_nested_irq(irq_find_mapping(arizona->virq, 0)); + + /* + * Check if one of the main interrupts is asserted and only + * check that domain if it is. + */ + ret = regmap_read(arizona->regmap, ARIZONA_IRQ_PIN_STATUS, + &val); + if (ret == 0 && val & ARIZONA_IRQ1_STS) { + handle_nested_irq(irq_find_mapping(arizona->virq, 1)); + } else if (ret != 0) { + dev_err(arizona->dev, + "Failed to read main IRQ status: %d\n", ret); + } - /* - * Check if one of the main interrupts is asserted and only - * check that domain if it is. - */ - ret = regmap_read(arizona->regmap, ARIZONA_IRQ_PIN_STATUS, &val); - if (ret == 0 && val & ARIZONA_IRQ1_STS) { - handle_nested_irq(irq_find_mapping(arizona->virq, 1)); - } else if (ret != 0) { - dev_err(arizona->dev, "Failed to read main IRQ status: %d\n", - ret); - } + /* + * Poll the IRQ pin status to see if we're really done + * if the interrupt controller can't do it for us. + */ + if (!arizona->pdata.irq_gpio) { + break; + } else if (arizona->pdata.irq_flags & IRQF_TRIGGER_RISING && + gpio_get_value_cansleep(arizona->pdata.irq_gpio)) { + poll = true; + } else if (arizona->pdata.irq_flags & IRQF_TRIGGER_FALLING && + !gpio_get_value_cansleep(arizona->pdata.irq_gpio)) { + poll = true; + } + } while (poll); pm_runtime_mark_last_busy(arizona->dev); pm_runtime_put_autosuspend(arizona->dev); @@ -169,6 +189,7 @@ int arizona_irq_init(struct arizona *arizona) int ret, i; const struct regmap_irq_chip *aod, *irq; bool ctrlif_error = true; + struct irq_data *irq_data; switch (arizona->type) { #ifdef CONFIG_MFD_WM5102 @@ -192,7 +213,36 @@ int arizona_irq_init(struct arizona *arizona) return -EINVAL; } - if (arizona->pdata.irq_active_high) { + /* Disable all wake sources by default */ + regmap_write(arizona->regmap, ARIZONA_WAKE_CONTROL, 0); + + /* Read the flags from the interrupt controller if not specified */ + if (!arizona->pdata.irq_flags) { + irq_data = irq_get_irq_data(arizona->irq); + if (!irq_data) { + dev_err(arizona->dev, "Invalid IRQ: %d\n", + arizona->irq); + return -EINVAL; + } + + arizona->pdata.irq_flags = irqd_get_trigger_type(irq_data); + switch (arizona->pdata.irq_flags) { + case IRQF_TRIGGER_LOW: + case IRQF_TRIGGER_HIGH: + case IRQF_TRIGGER_RISING: + case IRQF_TRIGGER_FALLING: + break; + + case IRQ_TYPE_NONE: + default: + /* Device default */ + arizona->pdata.irq_flags = IRQF_TRIGGER_LOW; + break; + } + } + + if (arizona->pdata.irq_flags & (IRQF_TRIGGER_HIGH | + IRQF_TRIGGER_RISING)) { ret = regmap_update_bits(arizona->regmap, ARIZONA_IRQ_CTRL_1, ARIZONA_IRQ_POL, 0); if (ret != 0) { @@ -200,12 +250,10 @@ int arizona_irq_init(struct arizona *arizona) ret); goto err; } - - flags |= IRQF_TRIGGER_HIGH; - } else { - flags |= IRQF_TRIGGER_LOW; } + flags |= arizona->pdata.irq_flags; + /* Allocate a virtual IRQ domain to distribute to the regmap domains */ arizona->virq = irq_domain_add_linear(NULL, 2, &arizona_domain_ops, arizona); @@ -257,11 +305,31 @@ int arizona_irq_init(struct arizona *arizona) } } + /* Used to emulate edge trigger and to work around broken pinmux */ + if (arizona->pdata.irq_gpio) { + if (gpio_to_irq(arizona->pdata.irq_gpio) != arizona->irq) { + dev_warn(arizona->dev, "IRQ %d is not GPIO %d (%d)\n", + arizona->irq, arizona->pdata.irq_gpio, + gpio_to_irq(arizona->pdata.irq_gpio)); + arizona->irq = gpio_to_irq(arizona->pdata.irq_gpio); + } + + ret = devm_gpio_request_one(arizona->dev, + arizona->pdata.irq_gpio, + GPIOF_IN, "arizona IRQ"); + if (ret != 0) { + dev_err(arizona->dev, + "Failed to request IRQ GPIO %d:: %d\n", + arizona->pdata.irq_gpio, ret); + arizona->pdata.irq_gpio = 0; + } + } + ret = request_threaded_irq(arizona->irq, NULL, arizona_irq_thread, flags, "arizona", arizona); if (ret != 0) { - dev_err(arizona->dev, "Failed to request IRQ %d: %d\n", + dev_err(arizona->dev, "Failed to request primary IRQ %d: %d\n", arizona->irq, ret); goto err_main_irq; } diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c index 1b9fdd698b03..b57e642d2b4a 100644 --- a/drivers/mfd/arizona-spi.c +++ b/drivers/mfd/arizona-spi.c @@ -67,7 +67,7 @@ static int arizona_spi_probe(struct spi_device *spi) static int arizona_spi_remove(struct spi_device *spi) { - struct arizona *arizona = dev_get_drvdata(&spi->dev); + struct arizona *arizona = spi_get_drvdata(spi); arizona_dev_exit(arizona); return 0; } diff --git a/drivers/mfd/as3711.c b/drivers/mfd/as3711.c index e994c9691124..01e414162702 100644 --- a/drivers/mfd/as3711.c +++ b/drivers/mfd/as3711.c @@ -112,16 +112,34 @@ static const struct regmap_config as3711_regmap_config = { .cache_type = REGCACHE_RBTREE, }; +#ifdef CONFIG_OF +static struct of_device_id as3711_of_match[] = { + {.compatible = "ams,as3711",}, + {} +}; +MODULE_DEVICE_TABLE(of, as3711_of_match); +#endif + static int as3711_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct as3711 *as3711; - struct as3711_platform_data *pdata = client->dev.platform_data; + struct as3711_platform_data *pdata; unsigned int id1, id2; int ret; - if (!pdata) - dev_dbg(&client->dev, "Platform data not found\n"); + if (!client->dev.of_node) { + pdata = client->dev.platform_data; + if (!pdata) + dev_dbg(&client->dev, "Platform data not found\n"); + } else { + pdata = devm_kzalloc(&client->dev, + sizeof(*pdata), GFP_KERNEL); + if (!pdata) { + dev_err(&client->dev, "Failed to allocate pdata\n"); + return -ENOMEM; + } + } as3711 = devm_kzalloc(&client->dev, sizeof(struct as3711), GFP_KERNEL); if (!as3711) { @@ -193,7 +211,8 @@ static struct i2c_driver as3711_i2c_driver = { .driver = { .name = "as3711", .owner = THIS_MODULE, - }, + .of_match_table = of_match_ptr(as3711_of_match), + }, .probe = as3711_i2c_probe, .remove = as3711_i2c_remove, .id_table = as3711_i2c_id, diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c new file mode 100644 index 000000000000..10cd14e35eb0 --- /dev/null +++ b/drivers/mfd/cros_ec.c @@ -0,0 +1,196 @@ +/* + * ChromeOS EC multi-function device + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * The ChromeOS EC multi function device is used to mux all the requests + * to the EC device for its multiple features: keyboard controller, + * battery charging and regulator control, firmware update. + */ + +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/mfd/core.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> + +int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, + struct cros_ec_msg *msg) +{ + uint8_t *out; + int csum, i; + + BUG_ON(msg->out_len > EC_HOST_PARAM_SIZE); + out = ec_dev->dout; + out[0] = EC_CMD_VERSION0 + msg->version; + out[1] = msg->cmd; + out[2] = msg->out_len; + csum = out[0] + out[1] + out[2]; + for (i = 0; i < msg->out_len; i++) + csum += out[EC_MSG_TX_HEADER_BYTES + i] = msg->out_buf[i]; + out[EC_MSG_TX_HEADER_BYTES + msg->out_len] = (uint8_t)(csum & 0xff); + + return EC_MSG_TX_PROTO_BYTES + msg->out_len; +} +EXPORT_SYMBOL(cros_ec_prepare_tx); + +static int cros_ec_command_sendrecv(struct cros_ec_device *ec_dev, + uint16_t cmd, void *out_buf, int out_len, + void *in_buf, int in_len) +{ + struct cros_ec_msg msg; + + msg.version = cmd >> 8; + msg.cmd = cmd & 0xff; + msg.out_buf = out_buf; + msg.out_len = out_len; + msg.in_buf = in_buf; + msg.in_len = in_len; + + return ec_dev->command_xfer(ec_dev, &msg); +} + +static int cros_ec_command_recv(struct cros_ec_device *ec_dev, + uint16_t cmd, void *buf, int buf_len) +{ + return cros_ec_command_sendrecv(ec_dev, cmd, NULL, 0, buf, buf_len); +} + +static int cros_ec_command_send(struct cros_ec_device *ec_dev, + uint16_t cmd, void *buf, int buf_len) +{ + return cros_ec_command_sendrecv(ec_dev, cmd, buf, buf_len, NULL, 0); +} + +static irqreturn_t ec_irq_thread(int irq, void *data) +{ + struct cros_ec_device *ec_dev = data; + + if (device_may_wakeup(ec_dev->dev)) + pm_wakeup_event(ec_dev->dev, 0); + + blocking_notifier_call_chain(&ec_dev->event_notifier, 1, ec_dev); + + return IRQ_HANDLED; +} + +static struct mfd_cell cros_devs[] = { + { + .name = "cros-ec-keyb", + .id = 1, + .of_compatible = "google,cros-ec-keyb", + }, +}; + +int cros_ec_register(struct cros_ec_device *ec_dev) +{ + struct device *dev = ec_dev->dev; + int err = 0; + + BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->event_notifier); + + ec_dev->command_send = cros_ec_command_send; + ec_dev->command_recv = cros_ec_command_recv; + ec_dev->command_sendrecv = cros_ec_command_sendrecv; + + if (ec_dev->din_size) { + ec_dev->din = kmalloc(ec_dev->din_size, GFP_KERNEL); + if (!ec_dev->din) { + err = -ENOMEM; + goto fail_din; + } + } + if (ec_dev->dout_size) { + ec_dev->dout = kmalloc(ec_dev->dout_size, GFP_KERNEL); + if (!ec_dev->dout) { + err = -ENOMEM; + goto fail_dout; + } + } + + if (!ec_dev->irq) { + dev_dbg(dev, "no valid IRQ: %d\n", ec_dev->irq); + goto fail_irq; + } + + err = request_threaded_irq(ec_dev->irq, NULL, ec_irq_thread, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "chromeos-ec", ec_dev); + if (err) { + dev_err(dev, "request irq %d: error %d\n", ec_dev->irq, err); + goto fail_irq; + } + + err = mfd_add_devices(dev, 0, cros_devs, + ARRAY_SIZE(cros_devs), + NULL, ec_dev->irq, NULL); + if (err) { + dev_err(dev, "failed to add mfd devices\n"); + goto fail_mfd; + } + + dev_info(dev, "Chrome EC (%s)\n", ec_dev->name); + + return 0; + +fail_mfd: + free_irq(ec_dev->irq, ec_dev); +fail_irq: + kfree(ec_dev->dout); +fail_dout: + kfree(ec_dev->din); +fail_din: + return err; +} +EXPORT_SYMBOL(cros_ec_register); + +int cros_ec_remove(struct cros_ec_device *ec_dev) +{ + mfd_remove_devices(ec_dev->dev); + free_irq(ec_dev->irq, ec_dev); + kfree(ec_dev->dout); + kfree(ec_dev->din); + + return 0; +} +EXPORT_SYMBOL(cros_ec_remove); + +#ifdef CONFIG_PM_SLEEP +int cros_ec_suspend(struct cros_ec_device *ec_dev) +{ + struct device *dev = ec_dev->dev; + + if (device_may_wakeup(dev)) + ec_dev->wake_enabled = !enable_irq_wake(ec_dev->irq); + + disable_irq(ec_dev->irq); + ec_dev->was_wake_device = ec_dev->wake_enabled; + + return 0; +} +EXPORT_SYMBOL(cros_ec_suspend); + +int cros_ec_resume(struct cros_ec_device *ec_dev) +{ + enable_irq(ec_dev->irq); + + if (ec_dev->wake_enabled) { + disable_irq_wake(ec_dev->irq); + ec_dev->wake_enabled = 0; + } + + return 0; +} +EXPORT_SYMBOL(cros_ec_resume); + +#endif diff --git a/drivers/mfd/cros_ec_i2c.c b/drivers/mfd/cros_ec_i2c.c new file mode 100644 index 000000000000..123044608b63 --- /dev/null +++ b/drivers/mfd/cros_ec_i2c.c @@ -0,0 +1,201 @@ +/* + * ChromeOS EC multi-function device (I2C) + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +static inline struct cros_ec_device *to_ec_dev(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + return i2c_get_clientdata(client); +} + +static int cros_ec_command_xfer(struct cros_ec_device *ec_dev, + struct cros_ec_msg *msg) +{ + struct i2c_client *client = ec_dev->priv; + int ret = -ENOMEM; + int i; + int packet_len; + u8 *out_buf = NULL; + u8 *in_buf = NULL; + u8 sum; + struct i2c_msg i2c_msg[2]; + + i2c_msg[0].addr = client->addr; + i2c_msg[0].flags = 0; + i2c_msg[1].addr = client->addr; + i2c_msg[1].flags = I2C_M_RD; + + /* + * allocate larger packet (one byte for checksum, one byte for + * length, and one for result code) + */ + packet_len = msg->in_len + 3; + in_buf = kzalloc(packet_len, GFP_KERNEL); + if (!in_buf) + goto done; + i2c_msg[1].len = packet_len; + i2c_msg[1].buf = (char *)in_buf; + + /* + * allocate larger packet (one byte for checksum, one for + * command code, one for length, and one for command version) + */ + packet_len = msg->out_len + 4; + out_buf = kzalloc(packet_len, GFP_KERNEL); + if (!out_buf) + goto done; + i2c_msg[0].len = packet_len; + i2c_msg[0].buf = (char *)out_buf; + + out_buf[0] = EC_CMD_VERSION0 + msg->version; + out_buf[1] = msg->cmd; + out_buf[2] = msg->out_len; + + /* copy message payload and compute checksum */ + sum = out_buf[0] + out_buf[1] + out_buf[2]; + for (i = 0; i < msg->out_len; i++) { + out_buf[3 + i] = msg->out_buf[i]; + sum += out_buf[3 + i]; + } + out_buf[3 + msg->out_len] = sum; + + /* send command to EC and read answer */ + ret = i2c_transfer(client->adapter, i2c_msg, 2); + if (ret < 0) { + dev_err(ec_dev->dev, "i2c transfer failed: %d\n", ret); + goto done; + } else if (ret != 2) { + dev_err(ec_dev->dev, "failed to get response: %d\n", ret); + ret = -EIO; + goto done; + } + + /* check response error code */ + if (i2c_msg[1].buf[0]) { + dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n", + msg->cmd, i2c_msg[1].buf[0]); + ret = -EINVAL; + goto done; + } + + /* copy response packet payload and compute checksum */ + sum = in_buf[0] + in_buf[1]; + for (i = 0; i < msg->in_len; i++) { + msg->in_buf[i] = in_buf[2 + i]; + sum += in_buf[2 + i]; + } + dev_dbg(ec_dev->dev, "packet: %*ph, sum = %02x\n", + i2c_msg[1].len, in_buf, sum); + if (sum != in_buf[2 + msg->in_len]) { + dev_err(ec_dev->dev, "bad packet checksum\n"); + ret = -EBADMSG; + goto done; + } + + ret = 0; + done: + kfree(in_buf); + kfree(out_buf); + return ret; +} + +static int cros_ec_probe_i2c(struct i2c_client *client, + const struct i2c_device_id *dev_id) +{ + struct device *dev = &client->dev; + struct cros_ec_device *ec_dev = NULL; + int err; + + ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL); + if (!ec_dev) + return -ENOMEM; + + i2c_set_clientdata(client, ec_dev); + ec_dev->name = "I2C"; + ec_dev->dev = dev; + ec_dev->priv = client; + ec_dev->irq = client->irq; + ec_dev->command_xfer = cros_ec_command_xfer; + ec_dev->ec_name = client->name; + ec_dev->phys_name = client->adapter->name; + ec_dev->parent = &client->dev; + + err = cros_ec_register(ec_dev); + if (err) { + dev_err(dev, "cannot register EC\n"); + return err; + } + + return 0; +} + +static int cros_ec_remove_i2c(struct i2c_client *client) +{ + struct cros_ec_device *ec_dev = i2c_get_clientdata(client); + + cros_ec_remove(ec_dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int cros_ec_i2c_suspend(struct device *dev) +{ + struct cros_ec_device *ec_dev = to_ec_dev(dev); + + return cros_ec_suspend(ec_dev); +} + +static int cros_ec_i2c_resume(struct device *dev) +{ + struct cros_ec_device *ec_dev = to_ec_dev(dev); + + return cros_ec_resume(ec_dev); +} +#endif + +static SIMPLE_DEV_PM_OPS(cros_ec_i2c_pm_ops, cros_ec_i2c_suspend, + cros_ec_i2c_resume); + +static const struct i2c_device_id cros_ec_i2c_id[] = { + { "cros-ec-i2c", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, cros_ec_i2c_id); + +static struct i2c_driver cros_ec_driver = { + .driver = { + .name = "cros-ec-i2c", + .owner = THIS_MODULE, + .pm = &cros_ec_i2c_pm_ops, + }, + .probe = cros_ec_probe_i2c, + .remove = cros_ec_remove_i2c, + .id_table = cros_ec_i2c_id, +}; + +module_i2c_driver(cros_ec_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ChromeOS EC multi function device"); diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c new file mode 100644 index 000000000000..19193cf1e7a1 --- /dev/null +++ b/drivers/mfd/cros_ec_spi.c @@ -0,0 +1,375 @@ +/* + * ChromeOS EC multi-function device (SPI) + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mfd/cros_ec.h> +#include <linux/mfd/cros_ec_commands.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spi/spi.h> + + +/* The header byte, which follows the preamble */ +#define EC_MSG_HEADER 0xec + +/* + * Number of EC preamble bytes we read at a time. Since it takes + * about 400-500us for the EC to respond there is not a lot of + * point in tuning this. If the EC could respond faster then + * we could increase this so that might expect the preamble and + * message to occur in a single transaction. However, the maximum + * SPI transfer size is 256 bytes, so at 5MHz we need a response + * time of perhaps <320us (200 bytes / 1600 bits). + */ +#define EC_MSG_PREAMBLE_COUNT 32 + +/* + * We must get a response from the EC in 5ms. This is a very long + * time, but the flash write command can take 2-3ms. The EC command + * processing is currently not very fast (about 500us). We could + * look at speeding this up and making the flash write command a + * 'slow' command, requiring a GET_STATUS wait loop, like flash + * erase. + */ +#define EC_MSG_DEADLINE_MS 5 + +/* + * Time between raising the SPI chip select (for the end of a + * transaction) and dropping it again (for the next transaction). + * If we go too fast, the EC will miss the transaction. It seems + * that 50us is enough with the 16MHz STM32 EC. + */ +#define EC_SPI_RECOVERY_TIME_NS (50 * 1000) + +/** + * struct cros_ec_spi - information about a SPI-connected EC + * + * @spi: SPI device we are connected to + * @last_transfer_ns: time that we last finished a transfer, or 0 if there + * if no record + */ +struct cros_ec_spi { + struct spi_device *spi; + s64 last_transfer_ns; +}; + +static void debug_packet(struct device *dev, const char *name, u8 *ptr, + int len) +{ +#ifdef DEBUG + int i; + + dev_dbg(dev, "%s: ", name); + for (i = 0; i < len; i++) + dev_cont(dev, " %02x", ptr[i]); +#endif +} + +/** + * cros_ec_spi_receive_response - Receive a response from the EC. + * + * This function has two phases: reading the preamble bytes (since if we read + * data from the EC before it is ready to send, we just get preamble) and + * reading the actual message. + * + * The received data is placed into ec_dev->din. + * + * @ec_dev: ChromeOS EC device + * @need_len: Number of message bytes we need to read + */ +static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev, + int need_len) +{ + struct cros_ec_spi *ec_spi = ec_dev->priv; + struct spi_transfer trans; + struct spi_message msg; + u8 *ptr, *end; + int ret; + unsigned long deadline; + int todo; + + /* Receive data until we see the header byte */ + deadline = jiffies + msecs_to_jiffies(EC_MSG_DEADLINE_MS); + do { + memset(&trans, '\0', sizeof(trans)); + trans.cs_change = 1; + trans.rx_buf = ptr = ec_dev->din; + trans.len = EC_MSG_PREAMBLE_COUNT; + + spi_message_init(&msg); + spi_message_add_tail(&trans, &msg); + ret = spi_sync(ec_spi->spi, &msg); + if (ret < 0) { + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + return ret; + } + + for (end = ptr + EC_MSG_PREAMBLE_COUNT; ptr != end; ptr++) { + if (*ptr == EC_MSG_HEADER) { + dev_dbg(ec_dev->dev, "msg found at %ld\n", + ptr - ec_dev->din); + break; + } + } + + if (time_after(jiffies, deadline)) { + dev_warn(ec_dev->dev, "EC failed to respond in time\n"); + return -ETIMEDOUT; + } + } while (ptr == end); + + /* + * ptr now points to the header byte. Copy any valid data to the + * start of our buffer + */ + todo = end - ++ptr; + BUG_ON(todo < 0 || todo > ec_dev->din_size); + todo = min(todo, need_len); + memmove(ec_dev->din, ptr, todo); + ptr = ec_dev->din + todo; + dev_dbg(ec_dev->dev, "need %d, got %d bytes from preamble\n", + need_len, todo); + need_len -= todo; + + /* Receive data until we have it all */ + while (need_len > 0) { + /* + * We can't support transfers larger than the SPI FIFO size + * unless we have DMA. We don't have DMA on the ISP SPI ports + * for Exynos. We need a way of asking SPI driver for + * maximum-supported transfer size. + */ + todo = min(need_len, 256); + dev_dbg(ec_dev->dev, "loop, todo=%d, need_len=%d, ptr=%ld\n", + todo, need_len, ptr - ec_dev->din); + + memset(&trans, '\0', sizeof(trans)); + trans.cs_change = 1; + trans.rx_buf = ptr; + trans.len = todo; + spi_message_init(&msg); + spi_message_add_tail(&trans, &msg); + + /* send command to EC and read answer */ + BUG_ON((u8 *)trans.rx_buf - ec_dev->din + todo > + ec_dev->din_size); + ret = spi_sync(ec_spi->spi, &msg); + if (ret < 0) { + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + return ret; + } + + debug_packet(ec_dev->dev, "interim", ptr, todo); + ptr += todo; + need_len -= todo; + } + + dev_dbg(ec_dev->dev, "loop done, ptr=%ld\n", ptr - ec_dev->din); + + return 0; +} + +/** + * cros_ec_command_spi_xfer - Transfer a message over SPI and receive the reply + * + * @ec_dev: ChromeOS EC device + * @ec_msg: Message to transfer + */ +static int cros_ec_command_spi_xfer(struct cros_ec_device *ec_dev, + struct cros_ec_msg *ec_msg) +{ + struct cros_ec_spi *ec_spi = ec_dev->priv; + struct spi_transfer trans; + struct spi_message msg; + int i, len; + u8 *ptr; + int sum; + int ret = 0, final_ret; + struct timespec ts; + + len = cros_ec_prepare_tx(ec_dev, ec_msg); + dev_dbg(ec_dev->dev, "prepared, len=%d\n", len); + + /* If it's too soon to do another transaction, wait */ + if (ec_spi->last_transfer_ns) { + struct timespec ts; + unsigned long delay; /* The delay completed so far */ + + ktime_get_ts(&ts); + delay = timespec_to_ns(&ts) - ec_spi->last_transfer_ns; + if (delay < EC_SPI_RECOVERY_TIME_NS) + ndelay(delay); + } + + /* Transmit phase - send our message */ + debug_packet(ec_dev->dev, "out", ec_dev->dout, len); + memset(&trans, '\0', sizeof(trans)); + trans.tx_buf = ec_dev->dout; + trans.len = len; + trans.cs_change = 1; + spi_message_init(&msg); + spi_message_add_tail(&trans, &msg); + ret = spi_sync(ec_spi->spi, &msg); + + /* Get the response */ + if (!ret) { + ret = cros_ec_spi_receive_response(ec_dev, + ec_msg->in_len + EC_MSG_TX_PROTO_BYTES); + } else { + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + } + + /* turn off CS */ + spi_message_init(&msg); + final_ret = spi_sync(ec_spi->spi, &msg); + ktime_get_ts(&ts); + ec_spi->last_transfer_ns = timespec_to_ns(&ts); + if (!ret) + ret = final_ret; + if (ret < 0) { + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + return ret; + } + + /* check response error code */ + ptr = ec_dev->din; + if (ptr[0]) { + dev_warn(ec_dev->dev, "command 0x%02x returned an error %d\n", + ec_msg->cmd, ptr[0]); + debug_packet(ec_dev->dev, "in_err", ptr, len); + return -EINVAL; + } + len = ptr[1]; + sum = ptr[0] + ptr[1]; + if (len > ec_msg->in_len) { + dev_err(ec_dev->dev, "packet too long (%d bytes, expected %d)", + len, ec_msg->in_len); + return -ENOSPC; + } + + /* copy response packet payload and compute checksum */ + for (i = 0; i < len; i++) { + sum += ptr[i + 2]; + if (ec_msg->in_len) + ec_msg->in_buf[i] = ptr[i + 2]; + } + sum &= 0xff; + + debug_packet(ec_dev->dev, "in", ptr, len + 3); + + if (sum != ptr[len + 2]) { + dev_err(ec_dev->dev, + "bad packet checksum, expected %02x, got %02x\n", + sum, ptr[len + 2]); + return -EBADMSG; + } + + return 0; +} + +static int cros_ec_probe_spi(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct cros_ec_device *ec_dev; + struct cros_ec_spi *ec_spi; + int err; + + spi->bits_per_word = 8; + spi->mode = SPI_MODE_0; + err = spi_setup(spi); + if (err < 0) + return err; + + ec_spi = devm_kzalloc(dev, sizeof(*ec_spi), GFP_KERNEL); + if (ec_spi == NULL) + return -ENOMEM; + ec_spi->spi = spi; + ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL); + if (!ec_dev) + return -ENOMEM; + + spi_set_drvdata(spi, ec_dev); + ec_dev->name = "SPI"; + ec_dev->dev = dev; + ec_dev->priv = ec_spi; + ec_dev->irq = spi->irq; + ec_dev->command_xfer = cros_ec_command_spi_xfer; + ec_dev->ec_name = ec_spi->spi->modalias; + ec_dev->phys_name = dev_name(&ec_spi->spi->dev); + ec_dev->parent = &ec_spi->spi->dev; + ec_dev->din_size = EC_MSG_BYTES + EC_MSG_PREAMBLE_COUNT; + ec_dev->dout_size = EC_MSG_BYTES; + + err = cros_ec_register(ec_dev); + if (err) { + dev_err(dev, "cannot register EC\n"); + return err; + } + + return 0; +} + +static int cros_ec_remove_spi(struct spi_device *spi) +{ + struct cros_ec_device *ec_dev; + + ec_dev = spi_get_drvdata(spi); + cros_ec_remove(ec_dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int cros_ec_spi_suspend(struct device *dev) +{ + struct cros_ec_device *ec_dev = dev_get_drvdata(dev); + + return cros_ec_suspend(ec_dev); +} + +static int cros_ec_spi_resume(struct device *dev) +{ + struct cros_ec_device *ec_dev = dev_get_drvdata(dev); + + return cros_ec_resume(ec_dev); +} +#endif + +static SIMPLE_DEV_PM_OPS(cros_ec_spi_pm_ops, cros_ec_spi_suspend, + cros_ec_spi_resume); + +static const struct spi_device_id cros_ec_spi_id[] = { + { "cros-ec-spi", 0 }, + { } +}; +MODULE_DEVICE_TABLE(spi, cros_ec_spi_id); + +static struct spi_driver cros_ec_driver_spi = { + .driver = { + .name = "cros-ec-spi", + .owner = THIS_MODULE, + .pm = &cros_ec_spi_pm_ops, + }, + .probe = cros_ec_probe_spi, + .remove = cros_ec_remove_spi, + .id_table = cros_ec_spi_id, +}; + +module_spi_driver(cros_ec_driver_spi); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ChromeOS EC multi function device (SPI)"); diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c index 05176cd2862b..f1a316e0d6a6 100644 --- a/drivers/mfd/da903x.c +++ b/drivers/mfd/da903x.c @@ -499,7 +499,8 @@ static int da903x_probe(struct i2c_client *client, unsigned int tmp; int ret; - chip = kzalloc(sizeof(struct da903x_chip), GFP_KERNEL); + chip = devm_kzalloc(&client->dev, sizeof(struct da903x_chip), + GFP_KERNEL); if (chip == NULL) return -ENOMEM; @@ -515,33 +516,27 @@ static int da903x_probe(struct i2c_client *client, ret = chip->ops->init_chip(chip); if (ret) - goto out_free_chip; + return ret; /* mask and clear all IRQs */ chip->events_mask = 0xffffffff; chip->ops->mask_events(chip, chip->events_mask); chip->ops->read_events(chip, &tmp); - ret = request_irq(client->irq, da903x_irq_handler, + ret = devm_request_irq(&client->dev, client->irq, da903x_irq_handler, IRQF_TRIGGER_FALLING, "da903x", chip); if (ret) { dev_err(&client->dev, "failed to request irq %d\n", client->irq); - goto out_free_chip; + return ret; } ret = da903x_add_subdevs(chip, pdata); if (ret) - goto out_free_irq; + return ret; return 0; - -out_free_irq: - free_irq(client->irq, chip); -out_free_chip: - kfree(chip); - return ret; } static int da903x_remove(struct i2c_client *client) @@ -549,8 +544,6 @@ static int da903x_remove(struct i2c_client *client) struct da903x_chip *chip = i2c_get_clientdata(client); da903x_remove_subdevs(chip); - free_irq(client->irq, chip); - kfree(chip); return 0; } diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c index 61d63b93576c..0680bcbc53de 100644 --- a/drivers/mfd/da9052-spi.c +++ b/drivers/mfd/da9052-spi.c @@ -38,7 +38,7 @@ static int da9052_spi_probe(struct spi_device *spi) da9052->dev = &spi->dev; da9052->chip_irq = spi->irq; - dev_set_drvdata(&spi->dev, da9052); + spi_set_drvdata(spi, da9052); da9052_regmap_config.read_flag_mask = 1; da9052_regmap_config.write_flag_mask = 0; @@ -60,7 +60,7 @@ static int da9052_spi_probe(struct spi_device *spi) static int da9052_spi_remove(struct spi_device *spi) { - struct da9052 *da9052 = dev_get_drvdata(&spi->dev); + struct da9052 *da9052 = spi_get_drvdata(spi); da9052_device_exit(da9052); return 0; diff --git a/drivers/mfd/da9055-core.c b/drivers/mfd/da9055-core.c index f56a1a9f7777..49cb23d37469 100644 --- a/drivers/mfd/da9055-core.c +++ b/drivers/mfd/da9055-core.c @@ -391,7 +391,7 @@ int da9055_device_init(struct da9055 *da9055) da9055->irq_base = pdata->irq_base; ret = regmap_add_irq_chip(da9055->regmap, da9055->chip_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, da9055->irq_base, &da9055_regmap_irq_chip, &da9055->irq_data); if (ret < 0) diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c index c0bcc872af4e..c60ab0c3c4db 100644 --- a/drivers/mfd/davinci_voicecodec.c +++ b/drivers/mfd/davinci_voicecodec.c @@ -177,17 +177,7 @@ static struct platform_driver davinci_vc_driver = { .remove = davinci_vc_remove, }; -static int __init davinci_vc_init(void) -{ - return platform_driver_probe(&davinci_vc_driver, davinci_vc_probe); -} -module_init(davinci_vc_init); - -static void __exit davinci_vc_exit(void) -{ - platform_driver_unregister(&davinci_vc_driver); -} -module_exit(davinci_vc_exit); +module_platform_driver_probe(davinci_vc_driver, davinci_vc_probe); MODULE_AUTHOR("Miguel Aguilar"); MODULE_DESCRIPTION("Texas Instruments DaVinci Voice Codec Core Interface"); diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index 21434beb420a..319b8abe742b 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -24,6 +24,7 @@ #include <linux/jiffies.h> #include <linux/bitops.h> #include <linux/fs.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/uaccess.h> #include <linux/mfd/core.h> @@ -2704,6 +2705,7 @@ static void dbx500_fw_version_init(struct platform_device *pdev, { struct resource *res; void __iomem *tcpm_base; + u32 version; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "prcmu-tcpm"); @@ -2713,26 +2715,27 @@ static void dbx500_fw_version_init(struct platform_device *pdev, return; } tcpm_base = ioremap(res->start, resource_size(res)); - if (tcpm_base != NULL) { - u32 version; - - version = readl(tcpm_base + version_offset); - fw_info.version.project = (version & 0xFF); - fw_info.version.api_version = (version >> 8) & 0xFF; - fw_info.version.func_version = (version >> 16) & 0xFF; - fw_info.version.errata = (version >> 24) & 0xFF; - strncpy(fw_info.version.project_name, - fw_project_name(fw_info.version.project), - PRCMU_FW_PROJECT_NAME_LEN); - fw_info.valid = true; - pr_info("PRCMU firmware: %s(%d), version %d.%d.%d\n", - fw_info.version.project_name, - fw_info.version.project, - fw_info.version.api_version, - fw_info.version.func_version, - fw_info.version.errata); - iounmap(tcpm_base); + if (!tcpm_base) { + dev_err(&pdev->dev, "no prcmu tcpm mem region provided\n"); + return; } + + version = readl(tcpm_base + version_offset); + fw_info.version.project = (version & 0xFF); + fw_info.version.api_version = (version >> 8) & 0xFF; + fw_info.version.func_version = (version >> 16) & 0xFF; + fw_info.version.errata = (version >> 24) & 0xFF; + strncpy(fw_info.version.project_name, + fw_project_name(fw_info.version.project), + PRCMU_FW_PROJECT_NAME_LEN); + fw_info.valid = true; + pr_info("PRCMU firmware: %s(%d), version %d.%d.%d\n", + fw_info.version.project_name, + fw_info.version.project, + fw_info.version.api_version, + fw_info.version.func_version, + fw_info.version.errata); + iounmap(tcpm_base); } void __init db8500_prcmu_early_init(u32 phy_base, u32 size) @@ -3065,6 +3068,15 @@ static struct db8500_thsens_platform_data db8500_thsens_data = { .num_trips = 4, }; +static struct mfd_cell common_prcmu_devs[] = { + { + .name = "ux500_wdt", + .platform_data = &db8500_wdt_pdata, + .pdata_size = sizeof(db8500_wdt_pdata), + .id = -1, + }, +}; + static struct mfd_cell db8500_prcmu_devs[] = { { .name = "db8500-prcmu-regulators", @@ -3079,12 +3091,6 @@ static struct mfd_cell db8500_prcmu_devs[] = { .pdata_size = sizeof(db8500_cpufreq_table), }, { - .name = "ux500_wdt", - .platform_data = &db8500_wdt_pdata, - .pdata_size = sizeof(db8500_wdt_pdata), - .id = -1, - }, - { .name = "db8500-thermal", .num_resources = ARRAY_SIZE(db8500_thsens_resources), .resources = db8500_thsens_resources, @@ -3173,13 +3179,25 @@ static int db8500_prcmu_probe(struct platform_device *pdev) db8500_prcmu_update_cpufreq(); - err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs, - ARRAY_SIZE(db8500_prcmu_devs), NULL, 0, db8500_irq_domain); + err = mfd_add_devices(&pdev->dev, 0, common_prcmu_devs, + ARRAY_SIZE(common_prcmu_devs), NULL, 0, db8500_irq_domain); if (err) { pr_err("prcmu: Failed to add subdevices\n"); return err; } + /* TODO: Remove restriction when clk definitions are available. */ + if (!of_machine_is_compatible("st-ericsson,u8540")) { + err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs, + ARRAY_SIZE(db8500_prcmu_devs), NULL, 0, + db8500_irq_domain); + if (err) { + mfd_remove_devices(&pdev->dev); + pr_err("prcmu: Failed to add subdevices\n"); + goto no_irq_return; + } + } + err = db8500_prcmu_register_ab8500(&pdev->dev, pdata->ab_platdata, pdata->ab_irq); if (err) { diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c index b7a61f0f27a4..5502106ad515 100644 --- a/drivers/mfd/ezx-pcap.c +++ b/drivers/mfd/ezx-pcap.c @@ -393,7 +393,7 @@ static int pcap_add_subdev(struct pcap_chip *pcap, static int ezx_pcap_remove(struct spi_device *spi) { - struct pcap_chip *pcap = dev_get_drvdata(&spi->dev); + struct pcap_chip *pcap = spi_get_drvdata(spi); struct pcap_platform_data *pdata = spi->dev.platform_data; int i, adc_irq; @@ -403,7 +403,7 @@ static int ezx_pcap_remove(struct spi_device *spi) /* cleanup ADC */ adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ? PCAP_IRQ_ADCDONE2 : PCAP_IRQ_ADCDONE); - free_irq(adc_irq, pcap); + devm_free_irq(&spi->dev, adc_irq, pcap); mutex_lock(&pcap->adc_mutex); for (i = 0; i < PCAP_ADC_MAXQ; i++) kfree(pcap->adc_queue[i]); @@ -415,8 +415,6 @@ static int ezx_pcap_remove(struct spi_device *spi) destroy_workqueue(pcap->workqueue); - kfree(pcap); - return 0; } @@ -431,7 +429,7 @@ static int ezx_pcap_probe(struct spi_device *spi) if (!pdata) goto ret; - pcap = kzalloc(sizeof(*pcap), GFP_KERNEL); + pcap = devm_kzalloc(&spi->dev, sizeof(*pcap), GFP_KERNEL); if (!pcap) { ret = -ENOMEM; goto ret; @@ -441,14 +439,14 @@ static int ezx_pcap_probe(struct spi_device *spi) mutex_init(&pcap->adc_mutex); INIT_WORK(&pcap->isr_work, pcap_isr_work); INIT_WORK(&pcap->msr_work, pcap_msr_work); - dev_set_drvdata(&spi->dev, pcap); + spi_set_drvdata(spi, pcap); /* setup spi */ spi->bits_per_word = 32; spi->mode = SPI_MODE_0 | (pdata->config & PCAP_CS_AH ? SPI_CS_HIGH : 0); ret = spi_setup(spi); if (ret) - goto free_pcap; + goto ret; pcap->spi = spi; @@ -458,7 +456,7 @@ static int ezx_pcap_probe(struct spi_device *spi) if (!pcap->workqueue) { ret = -ENOMEM; dev_err(&spi->dev, "can't create pcap thread\n"); - goto free_pcap; + goto ret; } /* redirect interrupts to AP, except adcdone2 */ @@ -491,7 +489,8 @@ static int ezx_pcap_probe(struct spi_device *spi) adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ? PCAP_IRQ_ADCDONE2 : PCAP_IRQ_ADCDONE); - ret = request_irq(adc_irq, pcap_adc_irq, 0, "ADC", pcap); + ret = devm_request_irq(&spi->dev, adc_irq, pcap_adc_irq, 0, "ADC", + pcap); if (ret) goto free_irqchip; @@ -511,14 +510,12 @@ static int ezx_pcap_probe(struct spi_device *spi) remove_subdevs: device_for_each_child(&spi->dev, NULL, pcap_remove_subdev); /* free_adc: */ - free_irq(adc_irq, pcap); + devm_free_irq(&spi->dev, adc_irq, pcap); free_irqchip: for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++) irq_set_chip_and_handler(i, NULL, NULL); /* destroy_workqueue: */ destroy_workqueue(pcap->workqueue); -free_pcap: - kfree(pcap); ret: return ret; } diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c index 9e5453d21a68..0285fceb99a6 100644 --- a/drivers/mfd/htc-pasic3.c +++ b/drivers/mfd/htc-pasic3.c @@ -208,18 +208,7 @@ static struct platform_driver pasic3_driver = { .remove = pasic3_remove, }; -static int __init pasic3_base_init(void) -{ - return platform_driver_probe(&pasic3_driver, pasic3_probe); -} - -static void __exit pasic3_base_exit(void) -{ - platform_driver_unregister(&pasic3_driver); -} - -module_init(pasic3_base_init); -module_exit(pasic3_base_exit); +module_platform_driver_probe(pasic3_driver, pasic3_probe); MODULE_AUTHOR("Philipp Zabel <[email protected]>"); MODULE_DESCRIPTION("Core driver for HTC PASIC3"); diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c index 1804331bd52c..5be3b5e13855 100644 --- a/drivers/mfd/intel_msic.c +++ b/drivers/mfd/intel_msic.c @@ -323,7 +323,8 @@ static int intel_msic_init_devices(struct intel_msic *msic) if (pdata->ocd) { unsigned gpio = pdata->ocd->gpio; - ret = gpio_request_one(gpio, GPIOF_IN, "ocd_gpio"); + ret = devm_gpio_request_one(&pdev->dev, gpio, + GPIOF_IN, "ocd_gpio"); if (ret) { dev_err(&pdev->dev, "failed to register OCD GPIO\n"); return ret; @@ -332,7 +333,6 @@ static int intel_msic_init_devices(struct intel_msic *msic) ret = gpio_to_irq(gpio); if (ret < 0) { dev_err(&pdev->dev, "no IRQ number for OCD GPIO\n"); - gpio_free(gpio); return ret; } @@ -359,8 +359,6 @@ static int intel_msic_init_devices(struct intel_msic *msic) fail: mfd_remove_devices(&pdev->dev); - if (pdata->ocd) - gpio_free(pdata->ocd->gpio); return ret; } @@ -368,12 +366,8 @@ fail: static void intel_msic_remove_devices(struct intel_msic *msic) { struct platform_device *pdev = msic->pdev; - struct intel_msic_platform_data *pdata = pdev->dev.platform_data; mfd_remove_devices(&pdev->dev); - - if (pdata->ocd) - gpio_free(pdata->ocd->gpio); } static int intel_msic_probe(struct platform_device *pdev) diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c index ceebf2c1ea97..4b7e6dac1de8 100644 --- a/drivers/mfd/lm3533-core.c +++ b/drivers/mfd/lm3533-core.c @@ -496,8 +496,8 @@ static int lm3533_device_init(struct lm3533 *lm3533) dev_set_drvdata(lm3533->dev, lm3533); if (gpio_is_valid(lm3533->gpio_hwen)) { - ret = gpio_request_one(lm3533->gpio_hwen, GPIOF_OUT_INIT_LOW, - "lm3533-hwen"); + ret = devm_gpio_request_one(lm3533->dev, lm3533->gpio_hwen, + GPIOF_OUT_INIT_LOW, "lm3533-hwen"); if (ret < 0) { dev_err(lm3533->dev, "failed to request HWEN GPIO %d\n", @@ -528,8 +528,6 @@ err_unregister: mfd_remove_devices(lm3533->dev); err_disable: lm3533_disable(lm3533); - if (gpio_is_valid(lm3533->gpio_hwen)) - gpio_free(lm3533->gpio_hwen); return ret; } @@ -542,8 +540,6 @@ static void lm3533_device_exit(struct lm3533 *lm3533) mfd_remove_devices(lm3533->dev); lm3533_disable(lm3533); - if (gpio_is_valid(lm3533->gpio_hwen)) - gpio_free(lm3533->gpio_hwen); } static bool lm3533_readable_register(struct device *dev, unsigned int reg) diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c index 4d73963cd8f0..1cbb17609c8b 100644 --- a/drivers/mfd/max77686.c +++ b/drivers/mfd/max77686.c @@ -46,7 +46,7 @@ static struct regmap_config max77686_regmap_config = { #ifdef CONFIG_OF static struct of_device_id max77686_pmic_dt_match[] = { - {.compatible = "maxim,max77686", .data = 0}, + {.compatible = "maxim,max77686", .data = NULL}, {}, }; diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c index 3032bae20b62..77189daadf1e 100644 --- a/drivers/mfd/mc13xxx-spi.c +++ b/drivers/mfd/mc13xxx-spi.c @@ -131,7 +131,7 @@ static int mc13xxx_spi_probe(struct spi_device *spi) if (!mc13xxx) return -ENOMEM; - dev_set_drvdata(&spi->dev, mc13xxx); + spi_set_drvdata(spi, mc13xxx); spi->mode = SPI_MODE_0 | SPI_CS_HIGH; mc13xxx->dev = &spi->dev; @@ -144,7 +144,7 @@ static int mc13xxx_spi_probe(struct spi_device *spi) ret = PTR_ERR(mc13xxx->regmap); dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n", ret); - dev_set_drvdata(&spi->dev, NULL); + spi_set_drvdata(spi, NULL); return ret; } @@ -164,7 +164,7 @@ static int mc13xxx_spi_probe(struct spi_device *spi) static int mc13xxx_spi_remove(struct spi_device *spi) { - struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev); + struct mc13xxx *mc13xxx = spi_get_drvdata(spi); mc13xxx_common_cleanup(mc13xxx); diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 4febc5c7fdee..759fae3ca7fb 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -1,8 +1,9 @@ /** * omap-usb-host.c - The USBHS core driver for OMAP EHCI & OHCI * - * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2011-2013 Texas Instruments Incorporated - http://www.ti.com * Author: Keshava Munegowda <[email protected]> + * Author: Roger Quadros <[email protected]> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 of @@ -27,6 +28,9 @@ #include <linux/platform_device.h> #include <linux/platform_data/usb-omap.h> #include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/err.h> #include "omap-usb.h" @@ -137,6 +141,49 @@ static inline u8 usbhs_readb(void __iomem *base, u8 reg) /*-------------------------------------------------------------------------*/ +/** + * Map 'enum usbhs_omap_port_mode' found in <linux/platform_data/usb-omap.h> + * to the device tree binding portN-mode found in + * 'Documentation/devicetree/bindings/mfd/omap-usb-host.txt' + */ +static const char * const port_modes[] = { + [OMAP_USBHS_PORT_MODE_UNUSED] = "", + [OMAP_EHCI_PORT_MODE_PHY] = "ehci-phy", + [OMAP_EHCI_PORT_MODE_TLL] = "ehci-tll", + [OMAP_EHCI_PORT_MODE_HSIC] = "ehci-hsic", + [OMAP_OHCI_PORT_MODE_PHY_6PIN_DATSE0] = "ohci-phy-6pin-datse0", + [OMAP_OHCI_PORT_MODE_PHY_6PIN_DPDM] = "ohci-phy-6pin-dpdm", + [OMAP_OHCI_PORT_MODE_PHY_3PIN_DATSE0] = "ohci-phy-3pin-datse0", + [OMAP_OHCI_PORT_MODE_PHY_4PIN_DPDM] = "ohci-phy-4pin-dpdm", + [OMAP_OHCI_PORT_MODE_TLL_6PIN_DATSE0] = "ohci-tll-6pin-datse0", + [OMAP_OHCI_PORT_MODE_TLL_6PIN_DPDM] = "ohci-tll-6pin-dpdm", + [OMAP_OHCI_PORT_MODE_TLL_3PIN_DATSE0] = "ohci-tll-3pin-datse0", + [OMAP_OHCI_PORT_MODE_TLL_4PIN_DPDM] = "ohci-tll-4pin-dpdm", + [OMAP_OHCI_PORT_MODE_TLL_2PIN_DATSE0] = "ohci-tll-2pin-datse0", + [OMAP_OHCI_PORT_MODE_TLL_2PIN_DPDM] = "ohci-tll-2pin-dpdm", +}; + +/** + * omap_usbhs_get_dt_port_mode - Get the 'enum usbhs_omap_port_mode' + * from the port mode string. + * @mode: The port mode string, usually obtained from device tree. + * + * The function returns the 'enum usbhs_omap_port_mode' that matches the + * provided port mode string as per the port_modes table. + * If no match is found it returns -ENODEV + */ +static const int omap_usbhs_get_dt_port_mode(const char *mode) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(port_modes); i++) { + if (!strcmp(mode, port_modes[i])) + return i; + } + + return -ENODEV; +} + static struct platform_device *omap_usbhs_alloc_child(const char *name, struct resource *res, int num_resources, void *pdata, size_t pdata_size, struct device *dev) @@ -278,7 +325,7 @@ static int usbhs_runtime_resume(struct device *dev) dev_dbg(dev, "usbhs_runtime_resume\n"); - omap_tll_enable(); + omap_tll_enable(pdata); if (!IS_ERR(omap->ehci_logic_fck)) clk_enable(omap->ehci_logic_fck); @@ -353,7 +400,7 @@ static int usbhs_runtime_suspend(struct device *dev) if (!IS_ERR(omap->ehci_logic_fck)) clk_disable(omap->ehci_logic_fck); - omap_tll_disable(); + omap_tll_disable(pdata); return 0; } @@ -430,24 +477,10 @@ static unsigned omap_usbhs_rev2_hostconfig(struct usbhs_hcd_omap *omap, static void omap_usbhs_init(struct device *dev) { struct usbhs_hcd_omap *omap = dev_get_drvdata(dev); - struct usbhs_omap_platform_data *pdata = omap->pdata; unsigned reg; dev_dbg(dev, "starting TI HSUSB Controller\n"); - if (pdata->phy_reset) { - if (gpio_is_valid(pdata->reset_gpio_port[0])) - gpio_request_one(pdata->reset_gpio_port[0], - GPIOF_OUT_INIT_LOW, "USB1 PHY reset"); - - if (gpio_is_valid(pdata->reset_gpio_port[1])) - gpio_request_one(pdata->reset_gpio_port[1], - GPIOF_OUT_INIT_LOW, "USB2 PHY reset"); - - /* Hold the PHY in RESET for enough time till DIR is high */ - udelay(10); - } - pm_runtime_get_sync(dev); reg = usbhs_read(omap->uhh_base, OMAP_UHH_HOSTCONFIG); @@ -476,36 +509,59 @@ static void omap_usbhs_init(struct device *dev) dev_dbg(dev, "UHH setup done, uhh_hostconfig=%x\n", reg); pm_runtime_put_sync(dev); - if (pdata->phy_reset) { - /* Hold the PHY in RESET for enough time till - * PHY is settled and ready - */ - udelay(10); +} + +static int usbhs_omap_get_dt_pdata(struct device *dev, + struct usbhs_omap_platform_data *pdata) +{ + int ret, i; + struct device_node *node = dev->of_node; - if (gpio_is_valid(pdata->reset_gpio_port[0])) - gpio_set_value_cansleep - (pdata->reset_gpio_port[0], 1); + ret = of_property_read_u32(node, "num-ports", &pdata->nports); + if (ret) + pdata->nports = 0; - if (gpio_is_valid(pdata->reset_gpio_port[1])) - gpio_set_value_cansleep - (pdata->reset_gpio_port[1], 1); + if (pdata->nports > OMAP3_HS_USB_PORTS) { + dev_warn(dev, "Too many num_ports <%d> in device tree. Max %d\n", + pdata->nports, OMAP3_HS_USB_PORTS); + return -ENODEV; } -} -static void omap_usbhs_deinit(struct device *dev) -{ - struct usbhs_hcd_omap *omap = dev_get_drvdata(dev); - struct usbhs_omap_platform_data *pdata = omap->pdata; + /* get port modes */ + for (i = 0; i < OMAP3_HS_USB_PORTS; i++) { + char prop[11]; + const char *mode; - if (pdata->phy_reset) { - if (gpio_is_valid(pdata->reset_gpio_port[0])) - gpio_free(pdata->reset_gpio_port[0]); + pdata->port_mode[i] = OMAP_USBHS_PORT_MODE_UNUSED; + + snprintf(prop, sizeof(prop), "port%d-mode", i + 1); + ret = of_property_read_string(node, prop, &mode); + if (ret < 0) + continue; + + ret = omap_usbhs_get_dt_port_mode(mode); + if (ret < 0) { + dev_warn(dev, "Invalid port%d-mode \"%s\" in device tree\n", + i, mode); + return -ENODEV; + } - if (gpio_is_valid(pdata->reset_gpio_port[1])) - gpio_free(pdata->reset_gpio_port[1]); + dev_dbg(dev, "port%d-mode: %s -> %d\n", i, mode, ret); + pdata->port_mode[i] = ret; } + + /* get flags */ + pdata->single_ulpi_bypass = of_property_read_bool(node, + "single-ulpi-bypass"); + + return 0; } +static struct of_device_id usbhs_child_match_table[] = { + { .compatible = "ti,omap-ehci", }, + { .compatible = "ti,omap-ohci", }, + { } +}; /** * usbhs_omap_probe - initialize TI-based HCDs @@ -522,26 +578,46 @@ static int usbhs_omap_probe(struct platform_device *pdev) int i; bool need_logic_fck; + if (dev->of_node) { + /* For DT boot we populate platform data from OF node */ + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + ret = usbhs_omap_get_dt_pdata(dev, pdata); + if (ret) + return ret; + + dev->platform_data = pdata; + } + if (!pdata) { dev_err(dev, "Missing platform data\n"); return -ENODEV; } + if (pdata->nports > OMAP3_HS_USB_PORTS) { + dev_info(dev, "Too many num_ports <%d> in platform_data. Max %d\n", + pdata->nports, OMAP3_HS_USB_PORTS); + return -ENODEV; + } + omap = devm_kzalloc(dev, sizeof(*omap), GFP_KERNEL); if (!omap) { dev_err(dev, "Memory allocation failed\n"); return -ENOMEM; } - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "uhh"); - omap->uhh_base = devm_request_and_ioremap(dev, res); - if (!omap->uhh_base) { - dev_err(dev, "Resource request/ioremap failed\n"); - return -EADDRNOTAVAIL; - } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + omap->uhh_base = devm_ioremap_resource(dev, res); + if (IS_ERR(omap->uhh_base)) + return PTR_ERR(omap->uhh_base); omap->pdata = pdata; + /* Initialize the TLL subsystem */ + omap_tll_init(pdata); + pm_runtime_enable(dev); platform_set_drvdata(pdev, omap); @@ -575,6 +651,7 @@ static int usbhs_omap_probe(struct platform_device *pdev) omap->usbhs_rev, omap->nports); break; } + pdata->nports = omap->nports; } i = sizeof(struct clk *) * omap->nports; @@ -700,17 +777,28 @@ static int usbhs_omap_probe(struct platform_device *pdev) } omap_usbhs_init(dev); - ret = omap_usbhs_alloc_children(pdev); - if (ret) { - dev_err(dev, "omap_usbhs_alloc_children failed\n"); - goto err_alloc; + + if (dev->of_node) { + ret = of_platform_populate(dev->of_node, + usbhs_child_match_table, NULL, dev); + + if (ret) { + dev_err(dev, "Failed to create DT children: %d\n", ret); + goto err_alloc; + } + + } else { + ret = omap_usbhs_alloc_children(pdev); + if (ret) { + dev_err(dev, "omap_usbhs_alloc_children failed: %d\n", + ret); + goto err_alloc; + } } return 0; err_alloc: - omap_usbhs_deinit(&pdev->dev); - for (i = 0; i < omap->nports; i++) { if (!IS_ERR(omap->utmi_clk[i])) clk_put(omap->utmi_clk[i]); @@ -744,6 +832,13 @@ err_mem: return ret; } +static int usbhs_omap_remove_child(struct device *dev, void *data) +{ + dev_info(dev, "unregistering\n"); + platform_device_unregister(to_platform_device(dev)); + return 0; +} + /** * usbhs_omap_remove - shutdown processing for UHH & TLL HCDs * @pdev: USB Host Controller being removed @@ -755,8 +850,6 @@ static int usbhs_omap_remove(struct platform_device *pdev) struct usbhs_hcd_omap *omap = platform_get_drvdata(pdev); int i; - omap_usbhs_deinit(&pdev->dev); - for (i = 0; i < omap->nports; i++) { if (!IS_ERR(omap->utmi_clk[i])) clk_put(omap->utmi_clk[i]); @@ -777,6 +870,8 @@ static int usbhs_omap_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); + /* remove children */ + device_for_each_child(&pdev->dev, NULL, usbhs_omap_remove_child); return 0; } @@ -785,16 +880,26 @@ static const struct dev_pm_ops usbhsomap_dev_pm_ops = { .runtime_resume = usbhs_runtime_resume, }; +static const struct of_device_id usbhs_omap_dt_ids[] = { + { .compatible = "ti,usbhs-host" }, + { } +}; + +MODULE_DEVICE_TABLE(of, usbhs_omap_dt_ids); + + static struct platform_driver usbhs_omap_driver = { .driver = { .name = (char *)usbhs_driver_name, .owner = THIS_MODULE, .pm = &usbhsomap_dev_pm_ops, + .of_match_table = of_match_ptr(usbhs_omap_dt_ids), }, .remove = usbhs_omap_remove, }; MODULE_AUTHOR("Keshava Munegowda <[email protected]>"); +MODULE_AUTHOR("Roger Quadros <[email protected]>"); MODULE_ALIAS("platform:" USBHS_DRIVER_NAME); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI"); diff --git a/drivers/mfd/omap-usb-tll.c b/drivers/mfd/omap-usb-tll.c index 0aef1a768880..e59ac4cbac96 100644 --- a/drivers/mfd/omap-usb-tll.c +++ b/drivers/mfd/omap-usb-tll.c @@ -1,8 +1,9 @@ /** * omap-usb-tll.c - The USB TLL driver for OMAP EHCI & OHCI * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012-2013 Texas Instruments Incorporated - http://www.ti.com * Author: Keshava Munegowda <[email protected]> + * Author: Roger Quadros <[email protected]> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 of @@ -27,6 +28,7 @@ #include <linux/err.h> #include <linux/pm_runtime.h> #include <linux/platform_data/usb-omap.h> +#include <linux/of.h> #define USBTLL_DRIVER_NAME "usbhs_tll" @@ -105,8 +107,8 @@ struct usbtll_omap { int nch; /* num. of channels */ - struct usbhs_omap_platform_data *pdata; struct clk **ch_clk; + void __iomem *base; }; /*-------------------------------------------------------------------------*/ @@ -210,14 +212,10 @@ static unsigned ohci_omap3_fslsmode(enum usbhs_omap_port_mode mode) static int usbtll_omap_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct usbhs_omap_platform_data *pdata = dev->platform_data; - void __iomem *base; struct resource *res; struct usbtll_omap *tll; - unsigned reg; int ret = 0; int i, ver; - bool needs_tll; dev_dbg(dev, "starting TI HSUSB TLL Controller\n"); @@ -227,26 +225,16 @@ static int usbtll_omap_probe(struct platform_device *pdev) return -ENOMEM; } - if (!pdata) { - dev_err(dev, "Platform data missing\n"); - return -ENODEV; - } - - tll->pdata = pdata; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_request_and_ioremap(dev, res); - if (!base) { - ret = -EADDRNOTAVAIL; - dev_err(dev, "Resource request/ioremap failed:%d\n", ret); - return ret; - } + tll->base = devm_ioremap_resource(dev, res); + if (IS_ERR(tll->base)) + return PTR_ERR(tll->base); platform_set_drvdata(pdev, tll); pm_runtime_enable(dev); pm_runtime_get_sync(dev); - ver = usbtll_read(base, OMAP_USBTLL_REVISION); + ver = usbtll_read(tll->base, OMAP_USBTLL_REVISION); switch (ver) { case OMAP_USBTLL_REV1: case OMAP_USBTLL_REV4: @@ -283,11 +271,85 @@ static int usbtll_omap_probe(struct platform_device *pdev) dev_dbg(dev, "can't get clock : %s\n", clkname); } + pm_runtime_put_sync(dev); + /* only after this can omap_tll_enable/disable work */ + spin_lock(&tll_lock); + tll_dev = dev; + spin_unlock(&tll_lock); + + return 0; + +err_clk_alloc: + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + + return ret; +} + +/** + * usbtll_omap_remove - shutdown processing for UHH & TLL HCDs + * @pdev: USB Host Controller being removed + * + * Reverses the effect of usbtll_omap_probe(). + */ +static int usbtll_omap_remove(struct platform_device *pdev) +{ + struct usbtll_omap *tll = platform_get_drvdata(pdev); + int i; + + spin_lock(&tll_lock); + tll_dev = NULL; + spin_unlock(&tll_lock); + + for (i = 0; i < tll->nch; i++) + if (!IS_ERR(tll->ch_clk[i])) + clk_put(tll->ch_clk[i]); + + pm_runtime_disable(&pdev->dev); + return 0; +} + +static const struct of_device_id usbtll_omap_dt_ids[] = { + { .compatible = "ti,usbhs-tll" }, + { } +}; + +MODULE_DEVICE_TABLE(of, usbtll_omap_dt_ids); + +static struct platform_driver usbtll_omap_driver = { + .driver = { + .name = (char *)usbtll_driver_name, + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(usbtll_omap_dt_ids), + }, + .probe = usbtll_omap_probe, + .remove = usbtll_omap_remove, +}; + +int omap_tll_init(struct usbhs_omap_platform_data *pdata) +{ + int i; + bool needs_tll; + unsigned reg; + struct usbtll_omap *tll; + + spin_lock(&tll_lock); + + if (!tll_dev) { + spin_unlock(&tll_lock); + return -ENODEV; + } + + tll = dev_get_drvdata(tll_dev); + needs_tll = false; for (i = 0; i < tll->nch; i++) needs_tll |= omap_usb_mode_needs_tll(pdata->port_mode[i]); + pm_runtime_get_sync(tll_dev); + if (needs_tll) { + void __iomem *base = tll->base; /* Program Common TLL register */ reg = usbtll_read(base, OMAP_TLL_SHARED_CONF); @@ -336,51 +398,29 @@ static int usbtll_omap_probe(struct platform_device *pdev) } } - pm_runtime_put_sync(dev); - /* only after this can omap_tll_enable/disable work */ - spin_lock(&tll_lock); - tll_dev = dev; + pm_runtime_put_sync(tll_dev); + spin_unlock(&tll_lock); return 0; - -err_clk_alloc: - pm_runtime_put_sync(dev); - pm_runtime_disable(dev); - - return ret; } +EXPORT_SYMBOL_GPL(omap_tll_init); -/** - * usbtll_omap_remove - shutdown processing for UHH & TLL HCDs - * @pdev: USB Host Controller being removed - * - * Reverses the effect of usbtll_omap_probe(). - */ -static int usbtll_omap_remove(struct platform_device *pdev) +int omap_tll_enable(struct usbhs_omap_platform_data *pdata) { - struct usbtll_omap *tll = platform_get_drvdata(pdev); int i; + struct usbtll_omap *tll; spin_lock(&tll_lock); - tll_dev = NULL; - spin_unlock(&tll_lock); - for (i = 0; i < tll->nch; i++) - if (!IS_ERR(tll->ch_clk[i])) - clk_put(tll->ch_clk[i]); - - pm_runtime_disable(&pdev->dev); - return 0; -} + if (!tll_dev) { + spin_unlock(&tll_lock); + return -ENODEV; + } -static int usbtll_runtime_resume(struct device *dev) -{ - struct usbtll_omap *tll = dev_get_drvdata(dev); - struct usbhs_omap_platform_data *pdata = tll->pdata; - int i; + tll = dev_get_drvdata(tll_dev); - dev_dbg(dev, "usbtll_runtime_resume\n"); + pm_runtime_get_sync(tll_dev); for (i = 0; i < tll->nch; i++) { if (omap_usb_mode_needs_tll(pdata->port_mode[i])) { @@ -391,22 +431,31 @@ static int usbtll_runtime_resume(struct device *dev) r = clk_enable(tll->ch_clk[i]); if (r) { - dev_err(dev, + dev_err(tll_dev, "Error enabling ch %d clock: %d\n", i, r); } } } + spin_unlock(&tll_lock); + return 0; } +EXPORT_SYMBOL_GPL(omap_tll_enable); -static int usbtll_runtime_suspend(struct device *dev) +int omap_tll_disable(struct usbhs_omap_platform_data *pdata) { - struct usbtll_omap *tll = dev_get_drvdata(dev); - struct usbhs_omap_platform_data *pdata = tll->pdata; int i; + struct usbtll_omap *tll; - dev_dbg(dev, "usbtll_runtime_suspend\n"); + spin_lock(&tll_lock); + + if (!tll_dev) { + spin_unlock(&tll_lock); + return -ENODEV; + } + + tll = dev_get_drvdata(tll_dev); for (i = 0; i < tll->nch; i++) { if (omap_usb_mode_needs_tll(pdata->port_mode[i])) { @@ -415,64 +464,16 @@ static int usbtll_runtime_suspend(struct device *dev) } } - return 0; -} - -static const struct dev_pm_ops usbtllomap_dev_pm_ops = { - SET_RUNTIME_PM_OPS(usbtll_runtime_suspend, - usbtll_runtime_resume, - NULL) -}; - -static struct platform_driver usbtll_omap_driver = { - .driver = { - .name = (char *)usbtll_driver_name, - .owner = THIS_MODULE, - .pm = &usbtllomap_dev_pm_ops, - }, - .probe = usbtll_omap_probe, - .remove = usbtll_omap_remove, -}; - -int omap_tll_enable(void) -{ - int ret; - - spin_lock(&tll_lock); - - if (!tll_dev) { - pr_err("%s: OMAP USB TLL not initialized\n", __func__); - ret = -ENODEV; - } else { - ret = pm_runtime_get_sync(tll_dev); - } - - spin_unlock(&tll_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(omap_tll_enable); - -int omap_tll_disable(void) -{ - int ret; - - spin_lock(&tll_lock); - - if (!tll_dev) { - pr_err("%s: OMAP USB TLL not initialized\n", __func__); - ret = -ENODEV; - } else { - ret = pm_runtime_put_sync(tll_dev); - } + pm_runtime_put_sync(tll_dev); spin_unlock(&tll_lock); - return ret; + return 0; } EXPORT_SYMBOL_GPL(omap_tll_disable); MODULE_AUTHOR("Keshava Munegowda <[email protected]>"); +MODULE_AUTHOR("Roger Quadros <[email protected]>"); MODULE_ALIAS("platform:" USBHS_DRIVER_NAME); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("usb tll driver for TI OMAP EHCI and OHCI controllers"); diff --git a/drivers/mfd/omap-usb.h b/drivers/mfd/omap-usb.h index 972aa961b064..2a508b6aeac8 100644 --- a/drivers/mfd/omap-usb.h +++ b/drivers/mfd/omap-usb.h @@ -1,2 +1,3 @@ -extern int omap_tll_enable(void); -extern int omap_tll_disable(void); +extern int omap_tll_init(struct usbhs_omap_platform_data *pdata); +extern int omap_tll_enable(struct usbhs_omap_platform_data *pdata); +extern int omap_tll_disable(struct usbhs_omap_platform_data *pdata); diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c index 73bf76df1044..53e9fe638d32 100644 --- a/drivers/mfd/palmas.c +++ b/drivers/mfd/palmas.c @@ -278,20 +278,20 @@ static void palmas_dt_to_pdata(struct i2c_client *i2c, int ret; u32 prop; - ret = of_property_read_u32(node, "ti,mux_pad1", &prop); + ret = of_property_read_u32(node, "ti,mux-pad1", &prop); if (!ret) { pdata->mux_from_pdata = 1; pdata->pad1 = prop; } - ret = of_property_read_u32(node, "ti,mux_pad2", &prop); + ret = of_property_read_u32(node, "ti,mux-pad2", &prop); if (!ret) { pdata->mux_from_pdata = 1; pdata->pad2 = prop; } /* The default for this register is all masked */ - ret = of_property_read_u32(node, "ti,power_ctrl", &prop); + ret = of_property_read_u32(node, "ti,power-ctrl", &prop); if (!ret) pdata->power_ctrl = prop; else @@ -349,6 +349,7 @@ static int palmas_i2c_probe(struct i2c_client *i2c, ret = -ENOMEM; goto err; } + palmas->i2c_clients[i]->dev.of_node = of_node_get(node); } palmas->regmap[i] = devm_regmap_init_i2c(palmas->i2c_clients[i], &palmas_regmap_config[i]); diff --git a/drivers/mfd/retu-mfd.c b/drivers/mfd/retu-mfd.c index 3ba048655bf3..a1830986eeb7 100644 --- a/drivers/mfd/retu-mfd.c +++ b/drivers/mfd/retu-mfd.c @@ -1,5 +1,5 @@ /* - * Retu MFD driver + * Retu/Tahvo MFD driver * * Copyright (C) 2004, 2005 Nokia Corporation * @@ -33,7 +33,8 @@ #define RETU_REG_ASICR 0x00 /* ASIC ID and revision */ #define RETU_REG_ASICR_VILMA (1 << 7) /* Bit indicating Vilma */ #define RETU_REG_IDR 0x01 /* Interrupt ID */ -#define RETU_REG_IMR 0x02 /* Interrupt mask */ +#define RETU_REG_IMR 0x02 /* Interrupt mask (Retu) */ +#define TAHVO_REG_IMR 0x03 /* Interrupt mask (Tahvo) */ /* Interrupt sources */ #define RETU_INT_PWR 0 /* Power button */ @@ -84,6 +85,62 @@ static struct regmap_irq_chip retu_irq_chip = { /* Retu device registered for the power off. */ static struct retu_dev *retu_pm_power_off; +static struct resource tahvo_usb_res[] = { + { + .name = "tahvo-usb", + .start = TAHVO_INT_VBUS, + .end = TAHVO_INT_VBUS, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct mfd_cell tahvo_devs[] = { + { + .name = "tahvo-usb", + .resources = tahvo_usb_res, + .num_resources = ARRAY_SIZE(tahvo_usb_res), + }, +}; + +static struct regmap_irq tahvo_irqs[] = { + [TAHVO_INT_VBUS] = { + .mask = 1 << TAHVO_INT_VBUS, + } +}; + +static struct regmap_irq_chip tahvo_irq_chip = { + .name = "TAHVO", + .irqs = tahvo_irqs, + .num_irqs = ARRAY_SIZE(tahvo_irqs), + .num_regs = 1, + .status_base = RETU_REG_IDR, + .mask_base = TAHVO_REG_IMR, + .ack_base = RETU_REG_IDR, +}; + +static const struct retu_data { + char *chip_name; + char *companion_name; + struct regmap_irq_chip *irq_chip; + struct mfd_cell *children; + int nchildren; +} retu_data[] = { + [0] = { + .chip_name = "Retu", + .companion_name = "Vilma", + .irq_chip = &retu_irq_chip, + .children = retu_devs, + .nchildren = ARRAY_SIZE(retu_devs), + }, + [1] = { + .chip_name = "Tahvo", + .companion_name = "Betty", + .irq_chip = &tahvo_irq_chip, + .children = tahvo_devs, + .nchildren = ARRAY_SIZE(tahvo_devs), + } +}; + int retu_read(struct retu_dev *rdev, u8 reg) { int ret; @@ -173,9 +230,14 @@ static struct regmap_config retu_config = { static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { + struct retu_data const *rdat; struct retu_dev *rdev; int ret; + if (i2c->addr > ARRAY_SIZE(retu_data)) + return -ENODEV; + rdat = &retu_data[i2c->addr - 1]; + rdev = devm_kzalloc(&i2c->dev, sizeof(*rdev), GFP_KERNEL); if (rdev == NULL) return -ENOMEM; @@ -190,25 +252,27 @@ static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id) ret = retu_read(rdev, RETU_REG_ASICR); if (ret < 0) { - dev_err(rdev->dev, "could not read Retu revision: %d\n", ret); + dev_err(rdev->dev, "could not read %s revision: %d\n", + rdat->chip_name, ret); return ret; } - dev_info(rdev->dev, "Retu%s v%d.%d found\n", - (ret & RETU_REG_ASICR_VILMA) ? " & Vilma" : "", + dev_info(rdev->dev, "%s%s%s v%d.%d found\n", rdat->chip_name, + (ret & RETU_REG_ASICR_VILMA) ? " & " : "", + (ret & RETU_REG_ASICR_VILMA) ? rdat->companion_name : "", (ret >> 4) & 0x7, ret & 0xf); - /* Mask all RETU interrupts. */ - ret = retu_write(rdev, RETU_REG_IMR, 0xffff); + /* Mask all interrupts. */ + ret = retu_write(rdev, rdat->irq_chip->mask_base, 0xffff); if (ret < 0) return ret; ret = regmap_add_irq_chip(rdev->regmap, i2c->irq, IRQF_ONESHOT, -1, - &retu_irq_chip, &rdev->irq_data); + rdat->irq_chip, &rdev->irq_data); if (ret < 0) return ret; - ret = mfd_add_devices(rdev->dev, -1, retu_devs, ARRAY_SIZE(retu_devs), + ret = mfd_add_devices(rdev->dev, -1, rdat->children, rdat->nchildren, NULL, regmap_irq_chip_get_base(rdev->irq_data), NULL); if (ret < 0) { @@ -216,7 +280,7 @@ static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id) return ret; } - if (!pm_power_off) { + if (i2c->addr == 1 && !pm_power_off) { retu_pm_power_off = rdev; pm_power_off = retu_power_off; } @@ -240,6 +304,7 @@ static int retu_remove(struct i2c_client *i2c) static const struct i2c_device_id retu_id[] = { { "retu-mfd", 0 }, + { "tahvo-mfd", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, retu_id); diff --git a/drivers/mfd/rts5249.c b/drivers/mfd/rts5249.c new file mode 100644 index 000000000000..15dc848bc081 --- /dev/null +++ b/drivers/mfd/rts5249.c @@ -0,0 +1,241 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Author: + * Wei WANG <[email protected]> + * No. 128, West Shenhu Road, Suzhou Industry Park, Suzhou, China + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/mfd/rtsx_pci.h> + +#include "rtsx_pcr.h" + +static u8 rts5249_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + /* Correct driving */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + SD30_CLK_DRIVE_SEL, 0xFF, 0x99); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + SD30_CMD_DRIVE_SEL, 0xFF, 0x99); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + SD30_DAT_DRIVE_SEL, 0xFF, 0x92); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5249_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_phy_register(pcr, PHY_REG_REV, 0xFE46); + if (err < 0) + return err; + + msleep(1); + + return rtsx_pci_write_phy_register(pcr, PHY_BPCR, 0x05C0); +} + +static int rts5249_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rts5249_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rts5249_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rts5249_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rts5249_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_VCC_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + msleep(5); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_VCC_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + return 0; +} + +static int rts5249_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_OFF); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5249_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + u8 clk_drive, cmd_drive, dat_drive; + + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, 0x4FC0 | 0x24); + if (err < 0) + return err; + clk_drive = 0x99; + cmd_drive = 0x99; + dat_drive = 0x92; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_phy_register(pcr, PHY_BACR, 0x3C02); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, 0x4C40 | 0x24); + if (err < 0) + return err; + clk_drive = 0xb3; + cmd_drive = 0xb3; + dat_drive = 0xb3; + } else { + return -EINVAL; + } + + /* set pad drive */ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, + 0xFF, clk_drive); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, + 0xFF, cmd_drive); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, + 0xFF, dat_drive); + return rtsx_pci_send_cmd(pcr, 100); +} + +static const struct pcr_ops rts5249_pcr_ops = { + .extra_init_hw = rts5249_extra_init_hw, + .optimize_phy = rts5249_optimize_phy, + .turn_on_led = rts5249_turn_on_led, + .turn_off_led = rts5249_turn_off_led, + .enable_auto_blink = rts5249_enable_auto_blink, + .disable_auto_blink = rts5249_disable_auto_blink, + .card_power_on = rts5249_card_power_on, + .card_power_off = rts5249_card_power_off, + .switch_output_voltage = rts5249_switch_output_voltage, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5249_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5249_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5249_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5249_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5249_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5249_pcr_ops; + + pcr->ic_version = rts5249_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5249_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5249_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5249_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5249_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 2f12cc13489a..e968c01ca2ac 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -56,6 +56,7 @@ static DEFINE_PCI_DEVICE_TABLE(rtsx_pci_ids) = { { PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { 0, } }; @@ -1033,6 +1034,10 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) case 0x5227: rts5227_init_params(pcr); break; + + case 0x5249: + rts5249_init_params(pcr); + break; } dev_dbg(&(pcr->pci->dev), "PID: 0x%04x, IC version: 0x%02x\n", @@ -1138,7 +1143,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, ret = rtsx_pci_acquire_irq(pcr); if (ret < 0) - goto free_dma; + goto disable_msi; pci_set_master(pcidev); synchronize_irq(pcr->irq); @@ -1162,7 +1167,9 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, disable_irq: free_irq(pcr->irq, (void *)pcr); -free_dma: +disable_msi: + if (pcr->msi_en) + pci_disable_msi(pcr->pci); dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); unmap: diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h index 2b3ab8a04823..55fcfc25c4e4 100644 --- a/drivers/mfd/rtsx_pcr.h +++ b/drivers/mfd/rtsx_pcr.h @@ -32,5 +32,6 @@ void rts5209_init_params(struct rtsx_pcr *pcr); void rts5229_init_params(struct rtsx_pcr *pcr); void rtl8411_init_params(struct rtsx_pcr *pcr); void rts5227_init_params(struct rtsx_pcr *pcr); +void rts5249_init_params(struct rtsx_pcr *pcr); #endif diff --git a/drivers/mfd/si476x-cmd.c b/drivers/mfd/si476x-cmd.c new file mode 100644 index 000000000000..de48b4e88450 --- /dev/null +++ b/drivers/mfd/si476x-cmd.c @@ -0,0 +1,1553 @@ +/* + * drivers/mfd/si476x-cmd.c -- Subroutines implementing command + * protocol of si476x series of chips + * + * Copyright (C) 2012 Innovative Converged Devices(ICD) + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/atomic.h> +#include <linux/i2c.h> +#include <linux/device.h> +#include <linux/gpio.h> +#include <linux/videodev2.h> + +#include <linux/mfd/si476x-core.h> + +#define msb(x) ((u8)((u16) x >> 8)) +#define lsb(x) ((u8)((u16) x & 0x00FF)) + + + +#define CMD_POWER_UP 0x01 +#define CMD_POWER_UP_A10_NRESP 1 +#define CMD_POWER_UP_A10_NARGS 5 + +#define CMD_POWER_UP_A20_NRESP 1 +#define CMD_POWER_UP_A20_NARGS 5 + +#define POWER_UP_DELAY_MS 110 + +#define CMD_POWER_DOWN 0x11 +#define CMD_POWER_DOWN_A10_NRESP 1 + +#define CMD_POWER_DOWN_A20_NRESP 1 +#define CMD_POWER_DOWN_A20_NARGS 1 + +#define CMD_FUNC_INFO 0x12 +#define CMD_FUNC_INFO_NRESP 7 + +#define CMD_SET_PROPERTY 0x13 +#define CMD_SET_PROPERTY_NARGS 5 +#define CMD_SET_PROPERTY_NRESP 1 + +#define CMD_GET_PROPERTY 0x14 +#define CMD_GET_PROPERTY_NARGS 3 +#define CMD_GET_PROPERTY_NRESP 4 + +#define CMD_AGC_STATUS 0x17 +#define CMD_AGC_STATUS_NRESP_A10 2 +#define CMD_AGC_STATUS_NRESP_A20 6 + +#define PIN_CFG_BYTE(x) (0x7F & (x)) +#define CMD_DIG_AUDIO_PIN_CFG 0x18 +#define CMD_DIG_AUDIO_PIN_CFG_NARGS 4 +#define CMD_DIG_AUDIO_PIN_CFG_NRESP 5 + +#define CMD_ZIF_PIN_CFG 0x19 +#define CMD_ZIF_PIN_CFG_NARGS 4 +#define CMD_ZIF_PIN_CFG_NRESP 5 + +#define CMD_IC_LINK_GPO_CTL_PIN_CFG 0x1A +#define CMD_IC_LINK_GPO_CTL_PIN_CFG_NARGS 4 +#define CMD_IC_LINK_GPO_CTL_PIN_CFG_NRESP 5 + +#define CMD_ANA_AUDIO_PIN_CFG 0x1B +#define CMD_ANA_AUDIO_PIN_CFG_NARGS 1 +#define CMD_ANA_AUDIO_PIN_CFG_NRESP 2 + +#define CMD_INTB_PIN_CFG 0x1C +#define CMD_INTB_PIN_CFG_NARGS 2 +#define CMD_INTB_PIN_CFG_A10_NRESP 6 +#define CMD_INTB_PIN_CFG_A20_NRESP 3 + +#define CMD_FM_TUNE_FREQ 0x30 +#define CMD_FM_TUNE_FREQ_A10_NARGS 5 +#define CMD_FM_TUNE_FREQ_A20_NARGS 3 +#define CMD_FM_TUNE_FREQ_NRESP 1 + +#define CMD_FM_RSQ_STATUS 0x32 + +#define CMD_FM_RSQ_STATUS_A10_NARGS 1 +#define CMD_FM_RSQ_STATUS_A10_NRESP 17 +#define CMD_FM_RSQ_STATUS_A30_NARGS 1 +#define CMD_FM_RSQ_STATUS_A30_NRESP 23 + + +#define CMD_FM_SEEK_START 0x31 +#define CMD_FM_SEEK_START_NARGS 1 +#define CMD_FM_SEEK_START_NRESP 1 + +#define CMD_FM_RDS_STATUS 0x36 +#define CMD_FM_RDS_STATUS_NARGS 1 +#define CMD_FM_RDS_STATUS_NRESP 16 + +#define CMD_FM_RDS_BLOCKCOUNT 0x37 +#define CMD_FM_RDS_BLOCKCOUNT_NARGS 1 +#define CMD_FM_RDS_BLOCKCOUNT_NRESP 8 + +#define CMD_FM_PHASE_DIVERSITY 0x38 +#define CMD_FM_PHASE_DIVERSITY_NARGS 1 +#define CMD_FM_PHASE_DIVERSITY_NRESP 1 + +#define CMD_FM_PHASE_DIV_STATUS 0x39 +#define CMD_FM_PHASE_DIV_STATUS_NRESP 2 + +#define CMD_AM_TUNE_FREQ 0x40 +#define CMD_AM_TUNE_FREQ_NARGS 3 +#define CMD_AM_TUNE_FREQ_NRESP 1 + +#define CMD_AM_RSQ_STATUS 0x42 +#define CMD_AM_RSQ_STATUS_NARGS 1 +#define CMD_AM_RSQ_STATUS_NRESP 13 + +#define CMD_AM_SEEK_START 0x41 +#define CMD_AM_SEEK_START_NARGS 1 +#define CMD_AM_SEEK_START_NRESP 1 + + +#define CMD_AM_ACF_STATUS 0x45 +#define CMD_AM_ACF_STATUS_NRESP 6 +#define CMD_AM_ACF_STATUS_NARGS 1 + +#define CMD_FM_ACF_STATUS 0x35 +#define CMD_FM_ACF_STATUS_NRESP 8 +#define CMD_FM_ACF_STATUS_NARGS 1 + +#define CMD_MAX_ARGS_COUNT (10) + + +enum si476x_acf_status_report_bits { + SI476X_ACF_BLEND_INT = (1 << 4), + SI476X_ACF_HIBLEND_INT = (1 << 3), + SI476X_ACF_HICUT_INT = (1 << 2), + SI476X_ACF_CHBW_INT = (1 << 1), + SI476X_ACF_SOFTMUTE_INT = (1 << 0), + + SI476X_ACF_SMUTE = (1 << 0), + SI476X_ACF_SMATTN = 0b11111, + SI476X_ACF_PILOT = (1 << 7), + SI476X_ACF_STBLEND = ~SI476X_ACF_PILOT, +}; + +enum si476x_agc_status_report_bits { + SI476X_AGC_MXHI = (1 << 5), + SI476X_AGC_MXLO = (1 << 4), + SI476X_AGC_LNAHI = (1 << 3), + SI476X_AGC_LNALO = (1 << 2), +}; + +enum si476x_errors { + SI476X_ERR_BAD_COMMAND = 0x10, + SI476X_ERR_BAD_ARG1 = 0x11, + SI476X_ERR_BAD_ARG2 = 0x12, + SI476X_ERR_BAD_ARG3 = 0x13, + SI476X_ERR_BAD_ARG4 = 0x14, + SI476X_ERR_BUSY = 0x18, + SI476X_ERR_BAD_INTERNAL_MEMORY = 0x20, + SI476X_ERR_BAD_PATCH = 0x30, + SI476X_ERR_BAD_BOOT_MODE = 0x31, + SI476X_ERR_BAD_PROPERTY = 0x40, +}; + +static int si476x_core_parse_and_nag_about_error(struct si476x_core *core) +{ + int err; + char *cause; + u8 buffer[2]; + + if (core->revision != SI476X_REVISION_A10) { + err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV, + buffer, sizeof(buffer)); + if (err == sizeof(buffer)) { + switch (buffer[1]) { + case SI476X_ERR_BAD_COMMAND: + cause = "Bad command"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_ARG1: + cause = "Bad argument #1"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_ARG2: + cause = "Bad argument #2"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_ARG3: + cause = "Bad argument #3"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_ARG4: + cause = "Bad argument #4"; + err = -EINVAL; + break; + case SI476X_ERR_BUSY: + cause = "Chip is busy"; + err = -EBUSY; + break; + case SI476X_ERR_BAD_INTERNAL_MEMORY: + cause = "Bad internal memory"; + err = -EIO; + break; + case SI476X_ERR_BAD_PATCH: + cause = "Bad patch"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_BOOT_MODE: + cause = "Bad boot mode"; + err = -EINVAL; + break; + case SI476X_ERR_BAD_PROPERTY: + cause = "Bad property"; + err = -EINVAL; + break; + default: + cause = "Unknown"; + err = -EIO; + } + + dev_err(&core->client->dev, + "[Chip error status]: %s\n", cause); + } else { + dev_err(&core->client->dev, + "Failed to fetch error code\n"); + err = (err >= 0) ? -EIO : err; + } + } else { + err = -EIO; + } + + return err; +} + +/** + * si476x_core_send_command() - sends a command to si476x and waits its + * response + * @core: si476x_device structure for the device we are + * communicating with + * @command: command id + * @args: command arguments we are sending + * @argn: actual size of @args + * @response: buffer to place the expected response from the device + * @respn: actual size of @response + * @usecs: amount of time to wait before reading the response (in + * usecs) + * + * Function returns 0 on succsess and negative error code on + * failure + */ +static int si476x_core_send_command(struct si476x_core *core, + const u8 command, + const u8 args[], + const int argn, + u8 resp[], + const int respn, + const int usecs) +{ + struct i2c_client *client = core->client; + int err; + u8 data[CMD_MAX_ARGS_COUNT + 1]; + + if (argn > CMD_MAX_ARGS_COUNT) { + err = -ENOMEM; + goto exit; + } + + if (!client->adapter) { + err = -ENODEV; + goto exit; + } + + /* First send the command and its arguments */ + data[0] = command; + memcpy(&data[1], args, argn); + dev_dbg(&client->dev, "Command:\n %*ph\n", argn + 1, data); + + err = si476x_core_i2c_xfer(core, SI476X_I2C_SEND, + (char *) data, argn + 1); + if (err != argn + 1) { + dev_err(&core->client->dev, + "Error while sending command 0x%02x\n", + command); + err = (err >= 0) ? -EIO : err; + goto exit; + } + /* Set CTS to zero only after the command is send to avoid + * possible racing conditions when working in polling mode */ + atomic_set(&core->cts, 0); + + /* if (unlikely(command == CMD_POWER_DOWN) */ + if (!wait_event_timeout(core->command, + atomic_read(&core->cts), + usecs_to_jiffies(usecs) + 1)) + dev_warn(&core->client->dev, + "(%s) [CMD 0x%02x] Answer timeout.\n", + __func__, command); + + /* + When working in polling mode, for some reason the tuner will + report CTS bit as being set in the first status byte read, + but all the consequtive ones will return zeros until the + tuner is actually completed the POWER_UP command. To + workaround that we wait for second CTS to be reported + */ + if (unlikely(!core->client->irq && command == CMD_POWER_UP)) { + if (!wait_event_timeout(core->command, + atomic_read(&core->cts), + usecs_to_jiffies(usecs) + 1)) + dev_warn(&core->client->dev, + "(%s) Power up took too much time.\n", + __func__); + } + + /* Then get the response */ + err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV, resp, respn); + if (err != respn) { + dev_err(&core->client->dev, + "Error while reading response for command 0x%02x\n", + command); + err = (err >= 0) ? -EIO : err; + goto exit; + } + dev_dbg(&client->dev, "Response:\n %*ph\n", respn, resp); + + err = 0; + + if (resp[0] & SI476X_ERR) { + dev_err(&core->client->dev, + "[CMD 0x%02x] Chip set error flag\n", command); + err = si476x_core_parse_and_nag_about_error(core); + goto exit; + } + + if (!(resp[0] & SI476X_CTS)) + err = -EBUSY; +exit: + return err; +} + +static int si476x_cmd_clear_stc(struct si476x_core *core) +{ + int err; + struct si476x_rsq_status_args args = { + .primary = false, + .rsqack = false, + .attune = false, + .cancel = false, + .stcack = true, + }; + + switch (core->power_up_parameters.func) { + case SI476X_FUNC_FM_RECEIVER: + err = si476x_core_cmd_fm_rsq_status(core, &args, NULL); + break; + case SI476X_FUNC_AM_RECEIVER: + err = si476x_core_cmd_am_rsq_status(core, &args, NULL); + break; + default: + err = -EINVAL; + } + + return err; +} + +static int si476x_cmd_tune_seek_freq(struct si476x_core *core, + uint8_t cmd, + const uint8_t args[], size_t argn, + uint8_t *resp, size_t respn) +{ + int err; + + + atomic_set(&core->stc, 0); + err = si476x_core_send_command(core, cmd, args, argn, resp, respn, + SI476X_TIMEOUT_TUNE); + if (!err) { + wait_event_killable(core->tuning, + atomic_read(&core->stc)); + si476x_cmd_clear_stc(core); + } + + return err; +} + +/** + * si476x_cmd_func_info() - send 'FUNC_INFO' command to the device + * @core: device to send the command to + * @info: struct si476x_func_info to fill all the information + * returned by the command + * + * The command requests the firmware and patch version for currently + * loaded firmware (dependent on the function of the device FM/AM/WB) + * + * Function returns 0 on succsess and negative error code on + * failure + */ +int si476x_core_cmd_func_info(struct si476x_core *core, + struct si476x_func_info *info) +{ + int err; + u8 resp[CMD_FUNC_INFO_NRESP]; + + err = si476x_core_send_command(core, CMD_FUNC_INFO, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + + info->firmware.major = resp[1]; + info->firmware.minor[0] = resp[2]; + info->firmware.minor[1] = resp[3]; + + info->patch_id = ((u16) resp[4] << 8) | resp[5]; + info->func = resp[6]; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_func_info); + +/** + * si476x_cmd_set_property() - send 'SET_PROPERTY' command to the device + * @core: device to send the command to + * @property: property address + * @value: property value + * + * Function returns 0 on succsess and negative error code on + * failure + */ +int si476x_core_cmd_set_property(struct si476x_core *core, + u16 property, u16 value) +{ + u8 resp[CMD_SET_PROPERTY_NRESP]; + const u8 args[CMD_SET_PROPERTY_NARGS] = { + 0x00, + msb(property), + lsb(property), + msb(value), + lsb(value), + }; + + return si476x_core_send_command(core, CMD_SET_PROPERTY, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_set_property); + +/** + * si476x_cmd_get_property() - send 'GET_PROPERTY' command to the device + * @core: device to send the command to + * @property: property address + * + * Function return the value of property as u16 on success or a + * negative error on failure + */ +int si476x_core_cmd_get_property(struct si476x_core *core, u16 property) +{ + int err; + u8 resp[CMD_GET_PROPERTY_NRESP]; + const u8 args[CMD_GET_PROPERTY_NARGS] = { + 0x00, + msb(property), + lsb(property), + }; + + err = si476x_core_send_command(core, CMD_GET_PROPERTY, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + else + return be16_to_cpup((__be16 *)(resp + 2)); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_get_property); + +/** + * si476x_cmd_dig_audio_pin_cfg() - send 'DIG_AUDIO_PIN_CFG' command to + * the device + * @core: device to send the command to + * @dclk: DCLK pin function configuration: + * #SI476X_DCLK_NOOP - do not modify the behaviour + * #SI476X_DCLK_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * #SI476X_DCLK_DAUDIO - set the pin to be a part of digital + * audio interface + * @dfs: DFS pin function configuration: + * #SI476X_DFS_NOOP - do not modify the behaviour + * #SI476X_DFS_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_DFS_DAUDIO - set the pin to be a part of digital + * audio interface + * @dout - DOUT pin function configuration: + * SI476X_DOUT_NOOP - do not modify the behaviour + * SI476X_DOUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_DOUT_I2S_OUTPUT - set this pin to be digital out on I2S + * port 1 + * SI476X_DOUT_I2S_INPUT - set this pin to be digital in on I2S + * port 1 + * @xout - XOUT pin function configuration: + * SI476X_XOUT_NOOP - do not modify the behaviour + * SI476X_XOUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_XOUT_I2S_INPUT - set this pin to be digital in on I2S + * port 1 + * SI476X_XOUT_MODE_SELECT - set this pin to be the input that + * selects the mode of the I2S audio + * combiner (analog or HD) + * [SI4761/63/65/67 Only] + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_dig_audio_pin_cfg(struct si476x_core *core, + enum si476x_dclk_config dclk, + enum si476x_dfs_config dfs, + enum si476x_dout_config dout, + enum si476x_xout_config xout) +{ + u8 resp[CMD_DIG_AUDIO_PIN_CFG_NRESP]; + const u8 args[CMD_DIG_AUDIO_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(dclk), + PIN_CFG_BYTE(dfs), + PIN_CFG_BYTE(dout), + PIN_CFG_BYTE(xout), + }; + + return si476x_core_send_command(core, CMD_DIG_AUDIO_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_dig_audio_pin_cfg); + +/** + * si476x_cmd_zif_pin_cfg - send 'ZIF_PIN_CFG_COMMAND' + * @core - device to send the command to + * @iqclk - IQCL pin function configuration: + * SI476X_IQCLK_NOOP - do not modify the behaviour + * SI476X_IQCLK_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_IQCLK_IQ - set pin to be a part of I/Q interace + * in master mode + * @iqfs - IQFS pin function configuration: + * SI476X_IQFS_NOOP - do not modify the behaviour + * SI476X_IQFS_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_IQFS_IQ - set pin to be a part of I/Q interace + * in master mode + * @iout - IOUT pin function configuration: + * SI476X_IOUT_NOOP - do not modify the behaviour + * SI476X_IOUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_IOUT_OUTPUT - set pin to be I out + * @qout - QOUT pin function configuration: + * SI476X_QOUT_NOOP - do not modify the behaviour + * SI476X_QOUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_QOUT_OUTPUT - set pin to be Q out + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_zif_pin_cfg(struct si476x_core *core, + enum si476x_iqclk_config iqclk, + enum si476x_iqfs_config iqfs, + enum si476x_iout_config iout, + enum si476x_qout_config qout) +{ + u8 resp[CMD_ZIF_PIN_CFG_NRESP]; + const u8 args[CMD_ZIF_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(iqclk), + PIN_CFG_BYTE(iqfs), + PIN_CFG_BYTE(iout), + PIN_CFG_BYTE(qout), + }; + + return si476x_core_send_command(core, CMD_ZIF_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_zif_pin_cfg); + +/** + * si476x_cmd_ic_link_gpo_ctl_pin_cfg - send + * 'IC_LINK_GPIO_CTL_PIN_CFG' comand to the device + * @core - device to send the command to + * @icin - ICIN pin function configuration: + * SI476X_ICIN_NOOP - do not modify the behaviour + * SI476X_ICIN_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_ICIN_GPO1_HIGH - set pin to be an output, drive it high + * SI476X_ICIN_GPO1_LOW - set pin to be an output, drive it low + * SI476X_ICIN_IC_LINK - set the pin to be a part of Inter-Chip link + * @icip - ICIP pin function configuration: + * SI476X_ICIP_NOOP - do not modify the behaviour + * SI476X_ICIP_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_ICIP_GPO1_HIGH - set pin to be an output, drive it high + * SI476X_ICIP_GPO1_LOW - set pin to be an output, drive it low + * SI476X_ICIP_IC_LINK - set the pin to be a part of Inter-Chip link + * @icon - ICON pin function configuration: + * SI476X_ICON_NOOP - do not modify the behaviour + * SI476X_ICON_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_ICON_I2S - set the pin to be a part of audio + * interface in slave mode (DCLK) + * SI476X_ICON_IC_LINK - set the pin to be a part of Inter-Chip link + * @icop - ICOP pin function configuration: + * SI476X_ICOP_NOOP - do not modify the behaviour + * SI476X_ICOP_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_ICOP_I2S - set the pin to be a part of audio + * interface in slave mode (DOUT) + * [Si4761/63/65/67 Only] + * SI476X_ICOP_IC_LINK - set the pin to be a part of Inter-Chip link + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(struct si476x_core *core, + enum si476x_icin_config icin, + enum si476x_icip_config icip, + enum si476x_icon_config icon, + enum si476x_icop_config icop) +{ + u8 resp[CMD_IC_LINK_GPO_CTL_PIN_CFG_NRESP]; + const u8 args[CMD_IC_LINK_GPO_CTL_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(icin), + PIN_CFG_BYTE(icip), + PIN_CFG_BYTE(icon), + PIN_CFG_BYTE(icop), + }; + + return si476x_core_send_command(core, CMD_IC_LINK_GPO_CTL_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_ic_link_gpo_ctl_pin_cfg); + +/** + * si476x_cmd_ana_audio_pin_cfg - send 'ANA_AUDIO_PIN_CFG' to the + * device + * @core - device to send the command to + * @lrout - LROUT pin function configuration: + * SI476X_LROUT_NOOP - do not modify the behaviour + * SI476X_LROUT_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_LROUT_AUDIO - set pin to be audio output + * SI476X_LROUT_MPX - set pin to be MPX output + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_ana_audio_pin_cfg(struct si476x_core *core, + enum si476x_lrout_config lrout) +{ + u8 resp[CMD_ANA_AUDIO_PIN_CFG_NRESP]; + const u8 args[CMD_ANA_AUDIO_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(lrout), + }; + + return si476x_core_send_command(core, CMD_ANA_AUDIO_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_ana_audio_pin_cfg); + + +/** + * si476x_cmd_intb_pin_cfg - send 'INTB_PIN_CFG' command to the device + * @core - device to send the command to + * @intb - INTB pin function configuration: + * SI476X_INTB_NOOP - do not modify the behaviour + * SI476X_INTB_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_INTB_DAUDIO - set pin to be a part of digital + * audio interface in slave mode + * SI476X_INTB_IRQ - set pin to be an interrupt request line + * @a1 - A1 pin function configuration: + * SI476X_A1_NOOP - do not modify the behaviour + * SI476X_A1_TRISTATE - put the pin in tristate condition, + * enable 1MOhm pulldown + * SI476X_A1_IRQ - set pin to be an interrupt request line + * + * Function returns 0 on success and negative error code on failure + */ +static int si476x_core_cmd_intb_pin_cfg_a10(struct si476x_core *core, + enum si476x_intb_config intb, + enum si476x_a1_config a1) +{ + u8 resp[CMD_INTB_PIN_CFG_A10_NRESP]; + const u8 args[CMD_INTB_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(intb), + PIN_CFG_BYTE(a1), + }; + + return si476x_core_send_command(core, CMD_INTB_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} + +static int si476x_core_cmd_intb_pin_cfg_a20(struct si476x_core *core, + enum si476x_intb_config intb, + enum si476x_a1_config a1) +{ + u8 resp[CMD_INTB_PIN_CFG_A20_NRESP]; + const u8 args[CMD_INTB_PIN_CFG_NARGS] = { + PIN_CFG_BYTE(intb), + PIN_CFG_BYTE(a1), + }; + + return si476x_core_send_command(core, CMD_INTB_PIN_CFG, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} + + + +/** + * si476x_cmd_am_rsq_status - send 'AM_RSQ_STATUS' command to the + * device + * @core - device to send the command to + * @rsqack - if set command clears RSQINT, SNRINT, SNRLINT, RSSIHINT, + * RSSSILINT, BLENDINT, MULTHINT and MULTLINT + * @attune - when set the values in the status report are the values + * that were calculated at tune + * @cancel - abort ongoing seek/tune opertation + * @stcack - clear the STCINT bin in status register + * @report - all signal quality information retured by the command + * (if NULL then the output of the command is ignored) + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_am_rsq_status(struct si476x_core *core, + struct si476x_rsq_status_args *rsqargs, + struct si476x_rsq_status_report *report) +{ + int err; + u8 resp[CMD_AM_RSQ_STATUS_NRESP]; + const u8 args[CMD_AM_RSQ_STATUS_NARGS] = { + rsqargs->rsqack << 3 | rsqargs->attune << 2 | + rsqargs->cancel << 1 | rsqargs->stcack, + }; + + err = si476x_core_send_command(core, CMD_AM_RSQ_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting received signal quality information this + * command can be used to just acknowledge different interrupt + * flags in those cases it is useless to copy and parse + * received data so user can pass NULL, and thus avoid + * unnecessary copying. + */ + if (!report) + return err; + + report->snrhint = 0b00001000 & resp[1]; + report->snrlint = 0b00000100 & resp[1]; + report->rssihint = 0b00000010 & resp[1]; + report->rssilint = 0b00000001 & resp[1]; + + report->bltf = 0b10000000 & resp[2]; + report->snr_ready = 0b00100000 & resp[2]; + report->rssiready = 0b00001000 & resp[2]; + report->afcrl = 0b00000010 & resp[2]; + report->valid = 0b00000001 & resp[2]; + + report->readfreq = be16_to_cpup((__be16 *)(resp + 3)); + report->freqoff = resp[5]; + report->rssi = resp[6]; + report->snr = resp[7]; + report->lassi = resp[9]; + report->hassi = resp[10]; + report->mult = resp[11]; + report->dev = resp[12]; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_am_rsq_status); + +int si476x_core_cmd_fm_acf_status(struct si476x_core *core, + struct si476x_acf_status_report *report) +{ + int err; + u8 resp[CMD_FM_ACF_STATUS_NRESP]; + const u8 args[CMD_FM_ACF_STATUS_NARGS] = { + 0x0, + }; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_FM_ACF_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + + report->blend_int = resp[1] & SI476X_ACF_BLEND_INT; + report->hblend_int = resp[1] & SI476X_ACF_HIBLEND_INT; + report->hicut_int = resp[1] & SI476X_ACF_HICUT_INT; + report->chbw_int = resp[1] & SI476X_ACF_CHBW_INT; + report->softmute_int = resp[1] & SI476X_ACF_SOFTMUTE_INT; + report->smute = resp[2] & SI476X_ACF_SMUTE; + report->smattn = resp[3] & SI476X_ACF_SMATTN; + report->chbw = resp[4]; + report->hicut = resp[5]; + report->hiblend = resp[6]; + report->pilot = resp[7] & SI476X_ACF_PILOT; + report->stblend = resp[7] & SI476X_ACF_STBLEND; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_acf_status); + +int si476x_core_cmd_am_acf_status(struct si476x_core *core, + struct si476x_acf_status_report *report) +{ + int err; + u8 resp[CMD_AM_ACF_STATUS_NRESP]; + const u8 args[CMD_AM_ACF_STATUS_NARGS] = { + 0x0, + }; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_AM_ACF_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + + report->blend_int = resp[1] & SI476X_ACF_BLEND_INT; + report->hblend_int = resp[1] & SI476X_ACF_HIBLEND_INT; + report->hicut_int = resp[1] & SI476X_ACF_HICUT_INT; + report->chbw_int = resp[1] & SI476X_ACF_CHBW_INT; + report->softmute_int = resp[1] & SI476X_ACF_SOFTMUTE_INT; + report->smute = resp[2] & SI476X_ACF_SMUTE; + report->smattn = resp[3] & SI476X_ACF_SMATTN; + report->chbw = resp[4]; + report->hicut = resp[5]; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_am_acf_status); + + +/** + * si476x_cmd_fm_seek_start - send 'FM_SEEK_START' command to the + * device + * @core - device to send the command to + * @seekup - if set the direction of the search is 'up' + * @wrap - if set seek wraps when hitting band limit + * + * This function begins search for a valid station. The station is + * considered valid when 'FM_VALID_SNR_THRESHOLD' and + * 'FM_VALID_RSSI_THRESHOLD' and 'FM_VALID_MAX_TUNE_ERROR' criteria + * are met. +} * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_fm_seek_start(struct si476x_core *core, + bool seekup, bool wrap) +{ + u8 resp[CMD_FM_SEEK_START_NRESP]; + const u8 args[CMD_FM_SEEK_START_NARGS] = { + seekup << 3 | wrap << 2, + }; + + return si476x_cmd_tune_seek_freq(core, CMD_FM_SEEK_START, + args, sizeof(args), + resp, sizeof(resp)); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_seek_start); + +/** + * si476x_cmd_fm_rds_status - send 'FM_RDS_STATUS' command to the + * device + * @core - device to send the command to + * @status_only - if set the data is not removed from RDSFIFO, + * RDSFIFOUSED is not decremented and data in all the + * rest RDS data contains the last valid info received + * @mtfifo if set the command clears RDS receive FIFO + * @intack if set the command clards the RDSINT bit. + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_fm_rds_status(struct si476x_core *core, + bool status_only, + bool mtfifo, + bool intack, + struct si476x_rds_status_report *report) +{ + int err; + u8 resp[CMD_FM_RDS_STATUS_NRESP]; + const u8 args[CMD_FM_RDS_STATUS_NARGS] = { + status_only << 2 | mtfifo << 1 | intack, + }; + + err = si476x_core_send_command(core, CMD_FM_RDS_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting RDS status information this command can be + * used to just acknowledge different interrupt flags in those + * cases it is useless to copy and parse received data so user + * can pass NULL, and thus avoid unnecessary copying. + */ + if (err < 0 || report == NULL) + return err; + + report->rdstpptyint = 0b00010000 & resp[1]; + report->rdspiint = 0b00001000 & resp[1]; + report->rdssyncint = 0b00000010 & resp[1]; + report->rdsfifoint = 0b00000001 & resp[1]; + + report->tpptyvalid = 0b00010000 & resp[2]; + report->pivalid = 0b00001000 & resp[2]; + report->rdssync = 0b00000010 & resp[2]; + report->rdsfifolost = 0b00000001 & resp[2]; + + report->tp = 0b00100000 & resp[3]; + report->pty = 0b00011111 & resp[3]; + + report->pi = be16_to_cpup((__be16 *)(resp + 4)); + report->rdsfifoused = resp[6]; + + report->ble[V4L2_RDS_BLOCK_A] = 0b11000000 & resp[7]; + report->ble[V4L2_RDS_BLOCK_B] = 0b00110000 & resp[7]; + report->ble[V4L2_RDS_BLOCK_C] = 0b00001100 & resp[7]; + report->ble[V4L2_RDS_BLOCK_D] = 0b00000011 & resp[7]; + + report->rds[V4L2_RDS_BLOCK_A].block = V4L2_RDS_BLOCK_A; + report->rds[V4L2_RDS_BLOCK_A].msb = resp[8]; + report->rds[V4L2_RDS_BLOCK_A].lsb = resp[9]; + + report->rds[V4L2_RDS_BLOCK_B].block = V4L2_RDS_BLOCK_B; + report->rds[V4L2_RDS_BLOCK_B].msb = resp[10]; + report->rds[V4L2_RDS_BLOCK_B].lsb = resp[11]; + + report->rds[V4L2_RDS_BLOCK_C].block = V4L2_RDS_BLOCK_C; + report->rds[V4L2_RDS_BLOCK_C].msb = resp[12]; + report->rds[V4L2_RDS_BLOCK_C].lsb = resp[13]; + + report->rds[V4L2_RDS_BLOCK_D].block = V4L2_RDS_BLOCK_D; + report->rds[V4L2_RDS_BLOCK_D].msb = resp[14]; + report->rds[V4L2_RDS_BLOCK_D].lsb = resp[15]; + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rds_status); + +int si476x_core_cmd_fm_rds_blockcount(struct si476x_core *core, + bool clear, + struct si476x_rds_blockcount_report *report) +{ + int err; + u8 resp[CMD_FM_RDS_BLOCKCOUNT_NRESP]; + const u8 args[CMD_FM_RDS_BLOCKCOUNT_NARGS] = { + clear, + }; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_FM_RDS_BLOCKCOUNT, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + + if (!err) { + report->expected = be16_to_cpup((__be16 *)(resp + 2)); + report->received = be16_to_cpup((__be16 *)(resp + 4)); + report->uncorrectable = be16_to_cpup((__be16 *)(resp + 6)); + } + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rds_blockcount); + +int si476x_core_cmd_fm_phase_diversity(struct si476x_core *core, + enum si476x_phase_diversity_mode mode) +{ + u8 resp[CMD_FM_PHASE_DIVERSITY_NRESP]; + const u8 args[CMD_FM_PHASE_DIVERSITY_NARGS] = { + mode & 0b111, + }; + + return si476x_core_send_command(core, CMD_FM_PHASE_DIVERSITY, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_phase_diversity); +/** + * si476x_core_cmd_fm_phase_div_status() - get the phase diversity + * status + * + * @core: si476x device + * + * NOTE caller must hold core lock + * + * Function returns the value of the status bit in case of success and + * negative error code in case of failre. + */ +int si476x_core_cmd_fm_phase_div_status(struct si476x_core *core) +{ + int err; + u8 resp[CMD_FM_PHASE_DIV_STATUS_NRESP]; + + err = si476x_core_send_command(core, CMD_FM_PHASE_DIV_STATUS, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + + return (err < 0) ? err : resp[1]; +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_phase_div_status); + + +/** + * si476x_cmd_am_seek_start - send 'FM_SEEK_START' command to the + * device + * @core - device to send the command to + * @seekup - if set the direction of the search is 'up' + * @wrap - if set seek wraps when hitting band limit + * + * This function begins search for a valid station. The station is + * considered valid when 'FM_VALID_SNR_THRESHOLD' and + * 'FM_VALID_RSSI_THRESHOLD' and 'FM_VALID_MAX_TUNE_ERROR' criteria + * are met. + * + * Function returns 0 on success and negative error code on failure + */ +int si476x_core_cmd_am_seek_start(struct si476x_core *core, + bool seekup, bool wrap) +{ + u8 resp[CMD_AM_SEEK_START_NRESP]; + const u8 args[CMD_AM_SEEK_START_NARGS] = { + seekup << 3 | wrap << 2, + }; + + return si476x_cmd_tune_seek_freq(core, CMD_AM_SEEK_START, + args, sizeof(args), + resp, sizeof(resp)); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_am_seek_start); + + + +static int si476x_core_cmd_power_up_a10(struct si476x_core *core, + struct si476x_power_up_args *puargs) +{ + u8 resp[CMD_POWER_UP_A10_NRESP]; + const bool intsel = (core->pinmux.a1 == SI476X_A1_IRQ); + const bool ctsen = (core->client->irq != 0); + const u8 args[CMD_POWER_UP_A10_NARGS] = { + 0xF7, /* Reserved, always 0xF7 */ + 0x3F & puargs->xcload, /* First two bits are reserved to be + * zeros */ + ctsen << 7 | intsel << 6 | 0x07, /* Last five bits + * are reserved to + * be written as 0x7 */ + puargs->func << 4 | puargs->freq, + 0x11, /* Reserved, always 0x11 */ + }; + + return si476x_core_send_command(core, CMD_POWER_UP, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_TIMEOUT_POWER_UP); +} + +static int si476x_core_cmd_power_up_a20(struct si476x_core *core, + struct si476x_power_up_args *puargs) +{ + u8 resp[CMD_POWER_UP_A20_NRESP]; + const bool intsel = (core->pinmux.a1 == SI476X_A1_IRQ); + const bool ctsen = (core->client->irq != 0); + const u8 args[CMD_POWER_UP_A20_NARGS] = { + puargs->ibias6x << 7 | puargs->xstart, + 0x3F & puargs->xcload, /* First two bits are reserved to be + * zeros */ + ctsen << 7 | intsel << 6 | puargs->fastboot << 5 | + puargs->xbiashc << 3 | puargs->xbias, + puargs->func << 4 | puargs->freq, + 0x10 | puargs->xmode, + }; + + return si476x_core_send_command(core, CMD_POWER_UP, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_TIMEOUT_POWER_UP); +} + +static int si476x_core_cmd_power_down_a10(struct si476x_core *core, + struct si476x_power_down_args *pdargs) +{ + u8 resp[CMD_POWER_DOWN_A10_NRESP]; + + return si476x_core_send_command(core, CMD_POWER_DOWN, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} + +static int si476x_core_cmd_power_down_a20(struct si476x_core *core, + struct si476x_power_down_args *pdargs) +{ + u8 resp[CMD_POWER_DOWN_A20_NRESP]; + const u8 args[CMD_POWER_DOWN_A20_NARGS] = { + pdargs->xosc, + }; + return si476x_core_send_command(core, CMD_POWER_DOWN, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); +} + +static int si476x_core_cmd_am_tune_freq_a10(struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs) +{ + + const int am_freq = tuneargs->freq; + u8 resp[CMD_AM_TUNE_FREQ_NRESP]; + const u8 args[CMD_AM_TUNE_FREQ_NARGS] = { + (tuneargs->hd << 6), + msb(am_freq), + lsb(am_freq), + }; + + return si476x_cmd_tune_seek_freq(core, CMD_AM_TUNE_FREQ, args, + sizeof(args), + resp, sizeof(resp)); +} + +static int si476x_core_cmd_am_tune_freq_a20(struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs) +{ + const int am_freq = tuneargs->freq; + u8 resp[CMD_AM_TUNE_FREQ_NRESP]; + const u8 args[CMD_AM_TUNE_FREQ_NARGS] = { + (tuneargs->zifsr << 6) | (tuneargs->injside & 0b11), + msb(am_freq), + lsb(am_freq), + }; + + return si476x_cmd_tune_seek_freq(core, CMD_AM_TUNE_FREQ, + args, sizeof(args), + resp, sizeof(resp)); +} + +static int si476x_core_cmd_fm_rsq_status_a10(struct si476x_core *core, + struct si476x_rsq_status_args *rsqargs, + struct si476x_rsq_status_report *report) +{ + int err; + u8 resp[CMD_FM_RSQ_STATUS_A10_NRESP]; + const u8 args[CMD_FM_RSQ_STATUS_A10_NARGS] = { + rsqargs->rsqack << 3 | rsqargs->attune << 2 | + rsqargs->cancel << 1 | rsqargs->stcack, + }; + + err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting received signal quality information this + * command can be used to just acknowledge different interrupt + * flags in those cases it is useless to copy and parse + * received data so user can pass NULL, and thus avoid + * unnecessary copying. + */ + if (err < 0 || report == NULL) + return err; + + report->multhint = 0b10000000 & resp[1]; + report->multlint = 0b01000000 & resp[1]; + report->snrhint = 0b00001000 & resp[1]; + report->snrlint = 0b00000100 & resp[1]; + report->rssihint = 0b00000010 & resp[1]; + report->rssilint = 0b00000001 & resp[1]; + + report->bltf = 0b10000000 & resp[2]; + report->snr_ready = 0b00100000 & resp[2]; + report->rssiready = 0b00001000 & resp[2]; + report->afcrl = 0b00000010 & resp[2]; + report->valid = 0b00000001 & resp[2]; + + report->readfreq = be16_to_cpup((__be16 *)(resp + 3)); + report->freqoff = resp[5]; + report->rssi = resp[6]; + report->snr = resp[7]; + report->lassi = resp[9]; + report->hassi = resp[10]; + report->mult = resp[11]; + report->dev = resp[12]; + report->readantcap = be16_to_cpup((__be16 *)(resp + 13)); + report->assi = resp[15]; + report->usn = resp[16]; + + return err; +} + +static int si476x_core_cmd_fm_rsq_status_a20(struct si476x_core *core, + struct si476x_rsq_status_args *rsqargs, + struct si476x_rsq_status_report *report) +{ + int err; + u8 resp[CMD_FM_RSQ_STATUS_A10_NRESP]; + const u8 args[CMD_FM_RSQ_STATUS_A30_NARGS] = { + rsqargs->primary << 4 | rsqargs->rsqack << 3 | + rsqargs->attune << 2 | rsqargs->cancel << 1 | + rsqargs->stcack, + }; + + err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting received signal quality information this + * command can be used to just acknowledge different interrupt + * flags in those cases it is useless to copy and parse + * received data so user can pass NULL, and thus avoid + * unnecessary copying. + */ + if (err < 0 || report == NULL) + return err; + + report->multhint = 0b10000000 & resp[1]; + report->multlint = 0b01000000 & resp[1]; + report->snrhint = 0b00001000 & resp[1]; + report->snrlint = 0b00000100 & resp[1]; + report->rssihint = 0b00000010 & resp[1]; + report->rssilint = 0b00000001 & resp[1]; + + report->bltf = 0b10000000 & resp[2]; + report->snr_ready = 0b00100000 & resp[2]; + report->rssiready = 0b00001000 & resp[2]; + report->afcrl = 0b00000010 & resp[2]; + report->valid = 0b00000001 & resp[2]; + + report->readfreq = be16_to_cpup((__be16 *)(resp + 3)); + report->freqoff = resp[5]; + report->rssi = resp[6]; + report->snr = resp[7]; + report->lassi = resp[9]; + report->hassi = resp[10]; + report->mult = resp[11]; + report->dev = resp[12]; + report->readantcap = be16_to_cpup((__be16 *)(resp + 13)); + report->assi = resp[15]; + report->usn = resp[16]; + + return err; +} + + +static int si476x_core_cmd_fm_rsq_status_a30(struct si476x_core *core, + struct si476x_rsq_status_args *rsqargs, + struct si476x_rsq_status_report *report) +{ + int err; + u8 resp[CMD_FM_RSQ_STATUS_A30_NRESP]; + const u8 args[CMD_FM_RSQ_STATUS_A30_NARGS] = { + rsqargs->primary << 4 | rsqargs->rsqack << 3 | + rsqargs->attune << 2 | rsqargs->cancel << 1 | + rsqargs->stcack, + }; + + err = si476x_core_send_command(core, CMD_FM_RSQ_STATUS, + args, ARRAY_SIZE(args), + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + /* + * Besides getting received signal quality information this + * command can be used to just acknowledge different interrupt + * flags in those cases it is useless to copy and parse + * received data so user can pass NULL, and thus avoid + * unnecessary copying. + */ + if (err < 0 || report == NULL) + return err; + + report->multhint = 0b10000000 & resp[1]; + report->multlint = 0b01000000 & resp[1]; + report->snrhint = 0b00001000 & resp[1]; + report->snrlint = 0b00000100 & resp[1]; + report->rssihint = 0b00000010 & resp[1]; + report->rssilint = 0b00000001 & resp[1]; + + report->bltf = 0b10000000 & resp[2]; + report->snr_ready = 0b00100000 & resp[2]; + report->rssiready = 0b00001000 & resp[2]; + report->injside = 0b00000100 & resp[2]; + report->afcrl = 0b00000010 & resp[2]; + report->valid = 0b00000001 & resp[2]; + + report->readfreq = be16_to_cpup((__be16 *)(resp + 3)); + report->freqoff = resp[5]; + report->rssi = resp[6]; + report->snr = resp[7]; + report->issi = resp[8]; + report->lassi = resp[9]; + report->hassi = resp[10]; + report->mult = resp[11]; + report->dev = resp[12]; + report->readantcap = be16_to_cpup((__be16 *)(resp + 13)); + report->assi = resp[15]; + report->usn = resp[16]; + + report->pilotdev = resp[17]; + report->rdsdev = resp[18]; + report->assidev = resp[19]; + report->strongdev = resp[20]; + report->rdspi = be16_to_cpup((__be16 *)(resp + 21)); + + return err; +} + +static int si476x_core_cmd_fm_tune_freq_a10(struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs) +{ + u8 resp[CMD_FM_TUNE_FREQ_NRESP]; + const u8 args[CMD_FM_TUNE_FREQ_A10_NARGS] = { + (tuneargs->hd << 6) | (tuneargs->tunemode << 4) + | (tuneargs->smoothmetrics << 2), + msb(tuneargs->freq), + lsb(tuneargs->freq), + msb(tuneargs->antcap), + lsb(tuneargs->antcap) + }; + + return si476x_cmd_tune_seek_freq(core, CMD_FM_TUNE_FREQ, + args, sizeof(args), + resp, sizeof(resp)); +} + +static int si476x_core_cmd_fm_tune_freq_a20(struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs) +{ + u8 resp[CMD_FM_TUNE_FREQ_NRESP]; + const u8 args[CMD_FM_TUNE_FREQ_A20_NARGS] = { + (tuneargs->hd << 6) | (tuneargs->tunemode << 4) + | (tuneargs->smoothmetrics << 2) | (tuneargs->injside), + msb(tuneargs->freq), + lsb(tuneargs->freq), + }; + + return si476x_cmd_tune_seek_freq(core, CMD_FM_TUNE_FREQ, + args, sizeof(args), + resp, sizeof(resp)); +} + +static int si476x_core_cmd_agc_status_a20(struct si476x_core *core, + struct si476x_agc_status_report *report) +{ + int err; + u8 resp[CMD_AGC_STATUS_NRESP_A20]; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_AGC_STATUS, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + + report->mxhi = resp[1] & SI476X_AGC_MXHI; + report->mxlo = resp[1] & SI476X_AGC_MXLO; + report->lnahi = resp[1] & SI476X_AGC_LNAHI; + report->lnalo = resp[1] & SI476X_AGC_LNALO; + report->fmagc1 = resp[2]; + report->fmagc2 = resp[3]; + report->pgagain = resp[4]; + report->fmwblang = resp[5]; + + return err; +} + +static int si476x_core_cmd_agc_status_a10(struct si476x_core *core, + struct si476x_agc_status_report *report) +{ + int err; + u8 resp[CMD_AGC_STATUS_NRESP_A10]; + + if (!report) + return -EINVAL; + + err = si476x_core_send_command(core, CMD_AGC_STATUS, + NULL, 0, + resp, ARRAY_SIZE(resp), + SI476X_DEFAULT_TIMEOUT); + if (err < 0) + return err; + + report->mxhi = resp[1] & SI476X_AGC_MXHI; + report->mxlo = resp[1] & SI476X_AGC_MXLO; + report->lnahi = resp[1] & SI476X_AGC_LNAHI; + report->lnalo = resp[1] & SI476X_AGC_LNALO; + + return err; +} + +typedef int (*tune_freq_func_t) (struct si476x_core *core, + struct si476x_tune_freq_args *tuneargs); + +static struct { + int (*power_up) (struct si476x_core *, + struct si476x_power_up_args *); + int (*power_down) (struct si476x_core *, + struct si476x_power_down_args *); + + tune_freq_func_t fm_tune_freq; + tune_freq_func_t am_tune_freq; + + int (*fm_rsq_status)(struct si476x_core *, + struct si476x_rsq_status_args *, + struct si476x_rsq_status_report *); + + int (*agc_status)(struct si476x_core *, + struct si476x_agc_status_report *); + int (*intb_pin_cfg)(struct si476x_core *core, + enum si476x_intb_config intb, + enum si476x_a1_config a1); +} si476x_cmds_vtable[] = { + [SI476X_REVISION_A10] = { + .power_up = si476x_core_cmd_power_up_a10, + .power_down = si476x_core_cmd_power_down_a10, + .fm_tune_freq = si476x_core_cmd_fm_tune_freq_a10, + .am_tune_freq = si476x_core_cmd_am_tune_freq_a10, + .fm_rsq_status = si476x_core_cmd_fm_rsq_status_a10, + .agc_status = si476x_core_cmd_agc_status_a10, + .intb_pin_cfg = si476x_core_cmd_intb_pin_cfg_a10, + }, + [SI476X_REVISION_A20] = { + .power_up = si476x_core_cmd_power_up_a20, + .power_down = si476x_core_cmd_power_down_a20, + .fm_tune_freq = si476x_core_cmd_fm_tune_freq_a20, + .am_tune_freq = si476x_core_cmd_am_tune_freq_a20, + .fm_rsq_status = si476x_core_cmd_fm_rsq_status_a20, + .agc_status = si476x_core_cmd_agc_status_a20, + .intb_pin_cfg = si476x_core_cmd_intb_pin_cfg_a20, + }, + [SI476X_REVISION_A30] = { + .power_up = si476x_core_cmd_power_up_a20, + .power_down = si476x_core_cmd_power_down_a20, + .fm_tune_freq = si476x_core_cmd_fm_tune_freq_a20, + .am_tune_freq = si476x_core_cmd_am_tune_freq_a20, + .fm_rsq_status = si476x_core_cmd_fm_rsq_status_a30, + .agc_status = si476x_core_cmd_agc_status_a20, + .intb_pin_cfg = si476x_core_cmd_intb_pin_cfg_a20, + }, +}; + +int si476x_core_cmd_power_up(struct si476x_core *core, + struct si476x_power_up_args *args) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].power_up(core, args); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_power_up); + +int si476x_core_cmd_power_down(struct si476x_core *core, + struct si476x_power_down_args *args) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].power_down(core, args); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_power_down); + +int si476x_core_cmd_fm_tune_freq(struct si476x_core *core, + struct si476x_tune_freq_args *args) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].fm_tune_freq(core, args); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_tune_freq); + +int si476x_core_cmd_am_tune_freq(struct si476x_core *core, + struct si476x_tune_freq_args *args) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].am_tune_freq(core, args); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_am_tune_freq); + +int si476x_core_cmd_fm_rsq_status(struct si476x_core *core, + struct si476x_rsq_status_args *args, + struct si476x_rsq_status_report *report) + +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].fm_rsq_status(core, args, + report); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_fm_rsq_status); + +int si476x_core_cmd_agc_status(struct si476x_core *core, + struct si476x_agc_status_report *report) + +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return si476x_cmds_vtable[core->revision].agc_status(core, report); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_agc_status); + +int si476x_core_cmd_intb_pin_cfg(struct si476x_core *core, + enum si476x_intb_config intb, + enum si476x_a1_config a1) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + + return si476x_cmds_vtable[core->revision].intb_pin_cfg(core, intb, a1); +} +EXPORT_SYMBOL_GPL(si476x_core_cmd_intb_pin_cfg); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Andrey Smirnov <[email protected]>"); +MODULE_DESCRIPTION("API for command exchange for si476x"); diff --git a/drivers/mfd/si476x-i2c.c b/drivers/mfd/si476x-i2c.c new file mode 100644 index 000000000000..f5bc8e4bd4bf --- /dev/null +++ b/drivers/mfd/si476x-i2c.c @@ -0,0 +1,886 @@ +/* + * drivers/mfd/si476x-i2c.c -- Core device driver for si476x MFD + * device + * + * Copyright (C) 2012 Innovative Converged Devices(ICD) + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ +#include <linux/module.h> + +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/gpio.h> +#include <linux/regulator/consumer.h> +#include <linux/i2c.h> +#include <linux/err.h> + +#include <linux/mfd/si476x-core.h> + +#define SI476X_MAX_IO_ERRORS 10 +#define SI476X_DRIVER_RDS_FIFO_DEPTH 128 + +/** + * si476x_core_config_pinmux() - pin function configuration function + * + * @core: Core device structure + * + * Configure the functions of the pins of the radio chip. + * + * The function returns zero in case of succes or negative error code + * otherwise. + */ +static int si476x_core_config_pinmux(struct si476x_core *core) +{ + int err; + dev_dbg(&core->client->dev, "Configuring pinmux\n"); + err = si476x_core_cmd_dig_audio_pin_cfg(core, + core->pinmux.dclk, + core->pinmux.dfs, + core->pinmux.dout, + core->pinmux.xout); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure digital audio pins(err = %d)\n", + err); + return err; + } + + err = si476x_core_cmd_zif_pin_cfg(core, + core->pinmux.iqclk, + core->pinmux.iqfs, + core->pinmux.iout, + core->pinmux.qout); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure ZIF pins(err = %d)\n", + err); + return err; + } + + err = si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(core, + core->pinmux.icin, + core->pinmux.icip, + core->pinmux.icon, + core->pinmux.icop); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure IC-Link/GPO pins(err = %d)\n", + err); + return err; + } + + err = si476x_core_cmd_ana_audio_pin_cfg(core, + core->pinmux.lrout); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure analog audio pins(err = %d)\n", + err); + return err; + } + + err = si476x_core_cmd_intb_pin_cfg(core, + core->pinmux.intb, + core->pinmux.a1); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure interrupt pins(err = %d)\n", + err); + return err; + } + + return 0; +} + +static inline void si476x_core_schedule_polling_work(struct si476x_core *core) +{ + schedule_delayed_work(&core->status_monitor, + usecs_to_jiffies(SI476X_STATUS_POLL_US)); +} + +/** + * si476x_core_start() - early chip startup function + * @core: Core device structure + * @soft: When set, this flag forces "soft" startup, where "soft" + * power down is the one done by sending appropriate command instead + * of using reset pin of the tuner + * + * Perform required startup sequence to correctly power + * up the chip and perform initial configuration. It does the + * following sequence of actions: + * 1. Claims and enables the power supplies VD and VIO1 required + * for I2C interface of the chip operation. + * 2. Waits for 100us, pulls the reset line up, enables irq, + * waits for another 100us as it is specified by the + * datasheet. + * 3. Sends 'POWER_UP' command to the device with all provided + * information about power-up parameters. + * 4. Configures, pin multiplexor, disables digital audio and + * configures interrupt sources. + * + * The function returns zero in case of succes or negative error code + * otherwise. + */ +int si476x_core_start(struct si476x_core *core, bool soft) +{ + struct i2c_client *client = core->client; + int err; + + if (!soft) { + if (gpio_is_valid(core->gpio_reset)) + gpio_set_value_cansleep(core->gpio_reset, 1); + + if (client->irq) + enable_irq(client->irq); + + udelay(100); + + if (!client->irq) { + atomic_set(&core->is_alive, 1); + si476x_core_schedule_polling_work(core); + } + } else { + if (client->irq) + enable_irq(client->irq); + else { + atomic_set(&core->is_alive, 1); + si476x_core_schedule_polling_work(core); + } + } + + err = si476x_core_cmd_power_up(core, + &core->power_up_parameters); + + if (err < 0) { + dev_err(&core->client->dev, + "Power up failure(err = %d)\n", + err); + goto disable_irq; + } + + if (client->irq) + atomic_set(&core->is_alive, 1); + + err = si476x_core_config_pinmux(core); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure pinmux(err = %d)\n", + err); + goto disable_irq; + } + + if (client->irq) { + err = regmap_write(core->regmap, + SI476X_PROP_INT_CTL_ENABLE, + SI476X_RDSIEN | + SI476X_STCIEN | + SI476X_CTSIEN); + if (err < 0) { + dev_err(&core->client->dev, + "Failed to configure interrupt sources" + "(err = %d)\n", err); + goto disable_irq; + } + } + + return 0; + +disable_irq: + if (err == -ENODEV) + atomic_set(&core->is_alive, 0); + + if (client->irq) + disable_irq(client->irq); + else + cancel_delayed_work_sync(&core->status_monitor); + + if (gpio_is_valid(core->gpio_reset)) + gpio_set_value_cansleep(core->gpio_reset, 0); + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_start); + +/** + * si476x_core_stop() - chip power-down function + * @core: Core device structure + * @soft: When set, function sends a POWER_DOWN command instead of + * bringing reset line low + * + * Power down the chip by performing following actions: + * 1. Disable IRQ or stop the polling worker + * 2. Send the POWER_DOWN command if the power down is soft or bring + * reset line low if not. + * + * The function returns zero in case of succes or negative error code + * otherwise. + */ +int si476x_core_stop(struct si476x_core *core, bool soft) +{ + int err = 0; + atomic_set(&core->is_alive, 0); + + if (soft) { + /* TODO: This probably shoud be a configurable option, + * so it is possible to have the chips keep their + * oscillators running + */ + struct si476x_power_down_args args = { + .xosc = false, + }; + err = si476x_core_cmd_power_down(core, &args); + } + + /* We couldn't disable those before + * 'si476x_core_cmd_power_down' since we expect to get CTS + * interrupt */ + if (core->client->irq) + disable_irq(core->client->irq); + else + cancel_delayed_work_sync(&core->status_monitor); + + if (!soft) { + if (gpio_is_valid(core->gpio_reset)) + gpio_set_value_cansleep(core->gpio_reset, 0); + } + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_stop); + +/** + * si476x_core_set_power_state() - set the level at which the power is + * supplied for the chip. + * @core: Core device structure + * @next_state: enum si476x_power_state describing power state to + * switch to. + * + * Switch on all the required power supplies + * + * This function returns 0 in case of suvccess and negative error code + * otherwise. + */ +int si476x_core_set_power_state(struct si476x_core *core, + enum si476x_power_state next_state) +{ + /* + It is not clear form the datasheet if it is possible to + work with device if not all power domains are operational. + So for now the power-up policy is "power-up all the things!" + */ + int err = 0; + + if (core->power_state == SI476X_POWER_INCONSISTENT) { + dev_err(&core->client->dev, + "The device in inconsistent power state\n"); + return -EINVAL; + } + + if (next_state != core->power_state) { + switch (next_state) { + case SI476X_POWER_UP_FULL: + err = regulator_bulk_enable(ARRAY_SIZE(core->supplies), + core->supplies); + if (err < 0) { + core->power_state = SI476X_POWER_INCONSISTENT; + break; + } + /* + * Startup timing diagram recommends to have a + * 100 us delay between enabling of the power + * supplies and turning the tuner on. + */ + udelay(100); + + err = si476x_core_start(core, false); + if (err < 0) + goto disable_regulators; + + core->power_state = next_state; + break; + + case SI476X_POWER_DOWN: + core->power_state = next_state; + err = si476x_core_stop(core, false); + if (err < 0) + core->power_state = SI476X_POWER_INCONSISTENT; +disable_regulators: + err = regulator_bulk_disable(ARRAY_SIZE(core->supplies), + core->supplies); + if (err < 0) + core->power_state = SI476X_POWER_INCONSISTENT; + break; + default: + BUG(); + } + } + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_set_power_state); + +/** + * si476x_core_report_drainer_stop() - mark the completion of the RDS + * buffer drain porcess by the worker. + * + * @core: Core device structure + */ +static inline void si476x_core_report_drainer_stop(struct si476x_core *core) +{ + mutex_lock(&core->rds_drainer_status_lock); + core->rds_drainer_is_working = false; + mutex_unlock(&core->rds_drainer_status_lock); +} + +/** + * si476x_core_start_rds_drainer_once() - start RDS drainer worker if + * ther is none working, do nothing otherwise + * + * @core: Datastructure corresponding to the chip. + */ +static inline void si476x_core_start_rds_drainer_once(struct si476x_core *core) +{ + mutex_lock(&core->rds_drainer_status_lock); + if (!core->rds_drainer_is_working) { + core->rds_drainer_is_working = true; + schedule_work(&core->rds_fifo_drainer); + } + mutex_unlock(&core->rds_drainer_status_lock); +} +/** + * si476x_drain_rds_fifo() - RDS buffer drainer. + * @work: struct work_struct being ppassed to the function by the + * kernel. + * + * Drain the contents of the RDS FIFO of + */ +static void si476x_core_drain_rds_fifo(struct work_struct *work) +{ + int err; + + struct si476x_core *core = container_of(work, struct si476x_core, + rds_fifo_drainer); + + struct si476x_rds_status_report report; + + si476x_core_lock(core); + err = si476x_core_cmd_fm_rds_status(core, true, false, false, &report); + if (!err) { + int i = report.rdsfifoused; + dev_dbg(&core->client->dev, + "%d elements in RDS FIFO. Draining.\n", i); + for (; i > 0; --i) { + err = si476x_core_cmd_fm_rds_status(core, false, false, + (i == 1), &report); + if (err < 0) + goto unlock; + + kfifo_in(&core->rds_fifo, report.rds, + sizeof(report.rds)); + dev_dbg(&core->client->dev, "RDS data:\n %*ph\n", + (int)sizeof(report.rds), report.rds); + } + dev_dbg(&core->client->dev, "Drrrrained!\n"); + wake_up_interruptible(&core->rds_read_queue); + } + +unlock: + si476x_core_unlock(core); + si476x_core_report_drainer_stop(core); +} + +/** + * si476x_core_pronounce_dead() + * + * @core: Core device structure + * + * Mark the device as being dead and wake up all potentially waiting + * threads of execution. + * + */ +static void si476x_core_pronounce_dead(struct si476x_core *core) +{ + dev_info(&core->client->dev, "Core device is dead.\n"); + + atomic_set(&core->is_alive, 0); + + /* Wake up al possible waiting processes */ + wake_up_interruptible(&core->rds_read_queue); + + atomic_set(&core->cts, 1); + wake_up(&core->command); + + atomic_set(&core->stc, 1); + wake_up(&core->tuning); +} + +/** + * si476x_core_i2c_xfer() + * + * @core: Core device structure + * @type: Transfer type + * @buf: Transfer buffer for/with data + * @count: Transfer buffer size + * + * Perfrom and I2C transfer(either read or write) and keep a counter + * of I/O errors. If the error counter rises above the threshold + * pronounce device dead. + * + * The function returns zero on succes or negative error code on + * failure. + */ +int si476x_core_i2c_xfer(struct si476x_core *core, + enum si476x_i2c_type type, + char *buf, int count) +{ + static int io_errors_count; + int err; + if (type == SI476X_I2C_SEND) + err = i2c_master_send(core->client, buf, count); + else + err = i2c_master_recv(core->client, buf, count); + + if (err < 0) { + if (io_errors_count++ > SI476X_MAX_IO_ERRORS) + si476x_core_pronounce_dead(core); + } else { + io_errors_count = 0; + } + + return err; +} +EXPORT_SYMBOL_GPL(si476x_core_i2c_xfer); + +/** + * si476x_get_status() + * @core: Core device structure + * + * Get the status byte of the core device by berforming one byte I2C + * read. + * + * The function returns a status value or a negative error code on + * error. + */ +static int si476x_core_get_status(struct si476x_core *core) +{ + u8 response; + int err = si476x_core_i2c_xfer(core, SI476X_I2C_RECV, + &response, sizeof(response)); + + return (err < 0) ? err : response; +} + +/** + * si476x_get_and_signal_status() - IRQ dispatcher + * @core: Core device structure + * + * Dispatch the arrived interrupt request based on the value of the + * status byte reported by the tuner. + * + */ +static void si476x_core_get_and_signal_status(struct si476x_core *core) +{ + int status = si476x_core_get_status(core); + if (status < 0) { + dev_err(&core->client->dev, "Failed to get status\n"); + return; + } + + if (status & SI476X_CTS) { + /* Unfortunately completions could not be used for + * signalling CTS since this flag cannot be cleared + * in status byte, and therefore once it becomes true + * multiple calls to 'complete' would cause the + * commands following the current one to be completed + * before they actually are */ + dev_dbg(&core->client->dev, "[interrupt] CTSINT\n"); + atomic_set(&core->cts, 1); + wake_up(&core->command); + } + + if (status & SI476X_FM_RDS_INT) { + dev_dbg(&core->client->dev, "[interrupt] RDSINT\n"); + si476x_core_start_rds_drainer_once(core); + } + + if (status & SI476X_STC_INT) { + dev_dbg(&core->client->dev, "[interrupt] STCINT\n"); + atomic_set(&core->stc, 1); + wake_up(&core->tuning); + } +} + +static void si476x_core_poll_loop(struct work_struct *work) +{ + struct si476x_core *core = SI476X_WORK_TO_CORE(work); + + si476x_core_get_and_signal_status(core); + + if (atomic_read(&core->is_alive)) + si476x_core_schedule_polling_work(core); +} + +static irqreturn_t si476x_core_interrupt(int irq, void *dev) +{ + struct si476x_core *core = dev; + + si476x_core_get_and_signal_status(core); + + return IRQ_HANDLED; +} + +/** + * si476x_firmware_version_to_revision() + * @core: Core device structure + * @major: Firmware major number + * @minor1: Firmware first minor number + * @minor2: Firmware second minor number + * + * Convert a chip's firmware version number into an offset that later + * will be used to as offset in "vtable" of tuner functions + * + * This function returns a positive offset in case of success and a -1 + * in case of failure. + */ +static int si476x_core_fwver_to_revision(struct si476x_core *core, + int func, int major, + int minor1, int minor2) +{ + switch (func) { + case SI476X_FUNC_FM_RECEIVER: + switch (major) { + case 5: + return SI476X_REVISION_A10; + case 8: + return SI476X_REVISION_A20; + case 10: + return SI476X_REVISION_A30; + default: + goto unknown_revision; + } + case SI476X_FUNC_AM_RECEIVER: + switch (major) { + case 5: + return SI476X_REVISION_A10; + case 7: + return SI476X_REVISION_A20; + case 9: + return SI476X_REVISION_A30; + default: + goto unknown_revision; + } + case SI476X_FUNC_WB_RECEIVER: + switch (major) { + case 3: + return SI476X_REVISION_A10; + case 5: + return SI476X_REVISION_A20; + case 7: + return SI476X_REVISION_A30; + default: + goto unknown_revision; + } + case SI476X_FUNC_BOOTLOADER: + default: /* FALLTHROUG */ + BUG(); + return -1; + } + +unknown_revision: + dev_err(&core->client->dev, + "Unsupported version of the firmware: %d.%d.%d, " + "reverting to A10 comptible functions\n", + major, minor1, minor2); + + return SI476X_REVISION_A10; +} + +/** + * si476x_get_revision_info() + * @core: Core device structure + * + * Get the firmware version number of the device. It is done in + * following three steps: + * 1. Power-up the device + * 2. Send the 'FUNC_INFO' command + * 3. Powering the device down. + * + * The function return zero on success and a negative error code on + * failure. + */ +static int si476x_core_get_revision_info(struct si476x_core *core) +{ + int rval; + struct si476x_func_info info; + + si476x_core_lock(core); + rval = si476x_core_set_power_state(core, SI476X_POWER_UP_FULL); + if (rval < 0) + goto exit; + + rval = si476x_core_cmd_func_info(core, &info); + if (rval < 0) + goto power_down; + + core->revision = si476x_core_fwver_to_revision(core, info.func, + info.firmware.major, + info.firmware.minor[0], + info.firmware.minor[1]); +power_down: + si476x_core_set_power_state(core, SI476X_POWER_DOWN); +exit: + si476x_core_unlock(core); + + return rval; +} + +bool si476x_core_has_am(struct si476x_core *core) +{ + return core->chip_id == SI476X_CHIP_SI4761 || + core->chip_id == SI476X_CHIP_SI4764; +} +EXPORT_SYMBOL_GPL(si476x_core_has_am); + +bool si476x_core_has_diversity(struct si476x_core *core) +{ + return core->chip_id == SI476X_CHIP_SI4764; +} +EXPORT_SYMBOL_GPL(si476x_core_has_diversity); + +bool si476x_core_is_a_secondary_tuner(struct si476x_core *core) +{ + return si476x_core_has_diversity(core) && + (core->diversity_mode == SI476X_PHDIV_SECONDARY_ANTENNA || + core->diversity_mode == SI476X_PHDIV_SECONDARY_COMBINING); +} +EXPORT_SYMBOL_GPL(si476x_core_is_a_secondary_tuner); + +bool si476x_core_is_a_primary_tuner(struct si476x_core *core) +{ + return si476x_core_has_diversity(core) && + (core->diversity_mode == SI476X_PHDIV_PRIMARY_ANTENNA || + core->diversity_mode == SI476X_PHDIV_PRIMARY_COMBINING); +} +EXPORT_SYMBOL_GPL(si476x_core_is_a_primary_tuner); + +bool si476x_core_is_in_am_receiver_mode(struct si476x_core *core) +{ + return si476x_core_has_am(core) && + (core->power_up_parameters.func == SI476X_FUNC_AM_RECEIVER); +} +EXPORT_SYMBOL_GPL(si476x_core_is_in_am_receiver_mode); + +bool si476x_core_is_powered_up(struct si476x_core *core) +{ + return core->power_state == SI476X_POWER_UP_FULL; +} +EXPORT_SYMBOL_GPL(si476x_core_is_powered_up); + +static int si476x_core_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rval; + struct si476x_core *core; + struct si476x_platform_data *pdata; + struct mfd_cell *cell; + int cell_num; + + core = devm_kzalloc(&client->dev, sizeof(*core), GFP_KERNEL); + if (!core) { + dev_err(&client->dev, + "failed to allocate 'struct si476x_core'\n"); + return -ENOMEM; + } + core->client = client; + + core->regmap = devm_regmap_init_si476x(core); + if (IS_ERR(core->regmap)) { + rval = PTR_ERR(core->regmap); + dev_err(&client->dev, + "Failed to allocate register map: %d\n", + rval); + return rval; + } + + i2c_set_clientdata(client, core); + + atomic_set(&core->is_alive, 0); + core->power_state = SI476X_POWER_DOWN; + + pdata = client->dev.platform_data; + if (pdata) { + memcpy(&core->power_up_parameters, + &pdata->power_up_parameters, + sizeof(core->power_up_parameters)); + + core->gpio_reset = -1; + if (gpio_is_valid(pdata->gpio_reset)) { + rval = gpio_request(pdata->gpio_reset, "si476x reset"); + if (rval) { + dev_err(&client->dev, + "Failed to request gpio: %d\n", rval); + return rval; + } + core->gpio_reset = pdata->gpio_reset; + gpio_direction_output(core->gpio_reset, 0); + } + + core->diversity_mode = pdata->diversity_mode; + memcpy(&core->pinmux, &pdata->pinmux, + sizeof(struct si476x_pinmux)); + } else { + dev_err(&client->dev, "No platform data provided\n"); + return -EINVAL; + } + + core->supplies[0].supply = "vd"; + core->supplies[1].supply = "va"; + core->supplies[2].supply = "vio1"; + core->supplies[3].supply = "vio2"; + + rval = devm_regulator_bulk_get(&client->dev, + ARRAY_SIZE(core->supplies), + core->supplies); + if (rval) { + dev_err(&client->dev, "Failet to gett all of the regulators\n"); + goto free_gpio; + } + + mutex_init(&core->cmd_lock); + init_waitqueue_head(&core->command); + init_waitqueue_head(&core->tuning); + + rval = kfifo_alloc(&core->rds_fifo, + SI476X_DRIVER_RDS_FIFO_DEPTH * + sizeof(struct v4l2_rds_data), + GFP_KERNEL); + if (rval) { + dev_err(&client->dev, "Could not alloate the FIFO\n"); + goto free_gpio; + } + mutex_init(&core->rds_drainer_status_lock); + init_waitqueue_head(&core->rds_read_queue); + INIT_WORK(&core->rds_fifo_drainer, si476x_core_drain_rds_fifo); + + if (client->irq) { + rval = devm_request_threaded_irq(&client->dev, + client->irq, NULL, + si476x_core_interrupt, + IRQF_TRIGGER_FALLING, + client->name, core); + if (rval < 0) { + dev_err(&client->dev, "Could not request IRQ %d\n", + client->irq); + goto free_kfifo; + } + disable_irq(client->irq); + dev_dbg(&client->dev, "IRQ requested.\n"); + + core->rds_fifo_depth = 20; + } else { + INIT_DELAYED_WORK(&core->status_monitor, + si476x_core_poll_loop); + dev_info(&client->dev, + "No IRQ number specified, will use polling\n"); + + core->rds_fifo_depth = 5; + } + + core->chip_id = id->driver_data; + + rval = si476x_core_get_revision_info(core); + if (rval < 0) { + rval = -ENODEV; + goto free_kfifo; + } + + cell_num = 0; + + cell = &core->cells[SI476X_RADIO_CELL]; + cell->name = "si476x-radio"; + cell_num++; + +#ifdef CONFIG_SND_SOC_SI476X + if ((core->chip_id == SI476X_CHIP_SI4761 || + core->chip_id == SI476X_CHIP_SI4764) && + core->pinmux.dclk == SI476X_DCLK_DAUDIO && + core->pinmux.dfs == SI476X_DFS_DAUDIO && + core->pinmux.dout == SI476X_DOUT_I2S_OUTPUT && + core->pinmux.xout == SI476X_XOUT_TRISTATE) { + cell = &core->cells[SI476X_CODEC_CELL]; + cell->name = "si476x-codec"; + cell_num++; + } +#endif + rval = mfd_add_devices(&client->dev, + (client->adapter->nr << 8) + client->addr, + core->cells, cell_num, + NULL, 0, NULL); + if (!rval) + return 0; + +free_kfifo: + kfifo_free(&core->rds_fifo); + +free_gpio: + if (gpio_is_valid(core->gpio_reset)) + gpio_free(core->gpio_reset); + + return rval; +} + +static int si476x_core_remove(struct i2c_client *client) +{ + struct si476x_core *core = i2c_get_clientdata(client); + + si476x_core_pronounce_dead(core); + mfd_remove_devices(&client->dev); + + if (client->irq) + disable_irq(client->irq); + else + cancel_delayed_work_sync(&core->status_monitor); + + kfifo_free(&core->rds_fifo); + + if (gpio_is_valid(core->gpio_reset)) + gpio_free(core->gpio_reset); + + return 0; +} + + +static const struct i2c_device_id si476x_id[] = { + { "si4761", SI476X_CHIP_SI4761 }, + { "si4764", SI476X_CHIP_SI4764 }, + { "si4768", SI476X_CHIP_SI4768 }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, si476x_id); + +static struct i2c_driver si476x_core_driver = { + .driver = { + .name = "si476x-core", + .owner = THIS_MODULE, + }, + .probe = si476x_core_probe, + .remove = si476x_core_remove, + .id_table = si476x_id, +}; +module_i2c_driver(si476x_core_driver); + + +MODULE_AUTHOR("Andrey Smirnov <[email protected]>"); +MODULE_DESCRIPTION("Si4761/64/68 AM/FM MFD core device driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/si476x-prop.c b/drivers/mfd/si476x-prop.c new file mode 100644 index 000000000000..cfeffa6e15d9 --- /dev/null +++ b/drivers/mfd/si476x-prop.c @@ -0,0 +1,241 @@ +/* + * drivers/mfd/si476x-prop.c -- Subroutines to access + * properties of si476x chips + * + * Copyright (C) 2012 Innovative Converged Devices(ICD) + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/module.h> + +#include <linux/mfd/si476x-core.h> + +struct si476x_property_range { + u16 low, high; +}; + +static bool si476x_core_element_is_in_array(u16 element, + const u16 array[], + size_t size) +{ + int i; + + for (i = 0; i < size; i++) + if (element == array[i]) + return true; + + return false; +} + +static bool si476x_core_element_is_in_range(u16 element, + const struct si476x_property_range range[], + size_t size) +{ + int i; + + for (i = 0; i < size; i++) + if (element <= range[i].high && element >= range[i].low) + return true; + + return false; +} + +static bool si476x_core_is_valid_property_a10(struct si476x_core *core, + u16 property) +{ + static const u16 valid_properties[] = { + 0x0000, + 0x0500, 0x0501, + 0x0600, + 0x0709, 0x070C, 0x070D, 0x70E, 0x710, + 0x0718, + 0x1207, 0x1208, + 0x2007, + 0x2300, + }; + + static const struct si476x_property_range valid_ranges[] = { + { 0x0200, 0x0203 }, + { 0x0300, 0x0303 }, + { 0x0400, 0x0404 }, + { 0x0700, 0x0707 }, + { 0x1100, 0x1102 }, + { 0x1200, 0x1204 }, + { 0x1300, 0x1306 }, + { 0x2000, 0x2005 }, + { 0x2100, 0x2104 }, + { 0x2106, 0x2106 }, + { 0x2200, 0x220E }, + { 0x3100, 0x3104 }, + { 0x3207, 0x320F }, + { 0x3300, 0x3304 }, + { 0x3500, 0x3517 }, + { 0x3600, 0x3617 }, + { 0x3700, 0x3717 }, + { 0x4000, 0x4003 }, + }; + + return si476x_core_element_is_in_range(property, valid_ranges, + ARRAY_SIZE(valid_ranges)) || + si476x_core_element_is_in_array(property, valid_properties, + ARRAY_SIZE(valid_properties)); +} + +static bool si476x_core_is_valid_property_a20(struct si476x_core *core, + u16 property) +{ + static const u16 valid_properties[] = { + 0x071B, + 0x1006, + 0x2210, + 0x3401, + }; + + static const struct si476x_property_range valid_ranges[] = { + { 0x2215, 0x2219 }, + }; + + return si476x_core_is_valid_property_a10(core, property) || + si476x_core_element_is_in_range(property, valid_ranges, + ARRAY_SIZE(valid_ranges)) || + si476x_core_element_is_in_array(property, valid_properties, + ARRAY_SIZE(valid_properties)); +} + +static bool si476x_core_is_valid_property_a30(struct si476x_core *core, + u16 property) +{ + static const u16 valid_properties[] = { + 0x071C, 0x071D, + 0x1007, 0x1008, + 0x220F, 0x2214, + 0x2301, + 0x3105, 0x3106, + 0x3402, + }; + + static const struct si476x_property_range valid_ranges[] = { + { 0x0405, 0x0411 }, + { 0x2008, 0x200B }, + { 0x2220, 0x2223 }, + { 0x3100, 0x3106 }, + }; + + return si476x_core_is_valid_property_a20(core, property) || + si476x_core_element_is_in_range(property, valid_ranges, + ARRAY_SIZE(valid_ranges)) || + si476x_core_element_is_in_array(property, valid_properties, + ARRAY_SIZE(valid_properties)); +} + +typedef bool (*valid_property_pred_t) (struct si476x_core *, u16); + +static bool si476x_core_is_valid_property(struct si476x_core *core, + u16 property) +{ + static const valid_property_pred_t is_valid_property[] = { + [SI476X_REVISION_A10] = si476x_core_is_valid_property_a10, + [SI476X_REVISION_A20] = si476x_core_is_valid_property_a20, + [SI476X_REVISION_A30] = si476x_core_is_valid_property_a30, + }; + + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + return is_valid_property[core->revision](core, property); +} + + +static bool si476x_core_is_readonly_property(struct si476x_core *core, + u16 property) +{ + BUG_ON(core->revision > SI476X_REVISION_A30 || + core->revision == -1); + + switch (core->revision) { + case SI476X_REVISION_A10: + return (property == 0x3200); + case SI476X_REVISION_A20: + return (property == 0x1006 || + property == 0x2210 || + property == 0x3200); + case SI476X_REVISION_A30: + return false; + } + + return false; +} + +static bool si476x_core_regmap_readable_register(struct device *dev, + unsigned int reg) +{ + struct i2c_client *client = to_i2c_client(dev); + struct si476x_core *core = i2c_get_clientdata(client); + + return si476x_core_is_valid_property(core, (u16) reg); + +} + +static bool si476x_core_regmap_writable_register(struct device *dev, + unsigned int reg) +{ + struct i2c_client *client = to_i2c_client(dev); + struct si476x_core *core = i2c_get_clientdata(client); + + return si476x_core_is_valid_property(core, (u16) reg) && + !si476x_core_is_readonly_property(core, (u16) reg); +} + + +static int si476x_core_regmap_write(void *context, unsigned int reg, + unsigned int val) +{ + return si476x_core_cmd_set_property(context, reg, val); +} + +static int si476x_core_regmap_read(void *context, unsigned int reg, + unsigned *val) +{ + struct si476x_core *core = context; + int err; + + err = si476x_core_cmd_get_property(core, reg); + if (err < 0) + return err; + + *val = err; + + return 0; +} + + +static const struct regmap_config si476x_regmap_config = { + .reg_bits = 16, + .val_bits = 16, + + .max_register = 0x4003, + + .writeable_reg = si476x_core_regmap_writable_register, + .readable_reg = si476x_core_regmap_readable_register, + + .reg_read = si476x_core_regmap_read, + .reg_write = si476x_core_regmap_write, + + .cache_type = REGCACHE_RBTREE, +}; + +struct regmap *devm_regmap_init_si476x(struct si476x_core *core) +{ + return devm_regmap_init(&core->client->dev, NULL, + core, &si476x_regmap_config); +} +EXPORT_SYMBOL_GPL(devm_regmap_init_si476x); diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c index 9bd33169a111..d70a343078fd 100644 --- a/drivers/mfd/sta2x11-mfd.c +++ b/drivers/mfd/sta2x11-mfd.c @@ -98,17 +98,6 @@ static int sta2x11_mfd_add(struct pci_dev *pdev, gfp_t flags) return 0; } -static int mfd_remove(struct pci_dev *pdev) -{ - struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev); - - if (!mfd) - return -ENODEV; - list_del(&mfd->list); - kfree(mfd); - return 0; -} - /* This function is exported and is not expected to fail */ u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val, enum sta2x11_mfd_plat_dev index) diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c index fd5fcb630685..0da02e11d58e 100644 --- a/drivers/mfd/stmpe-i2c.c +++ b/drivers/mfd/stmpe-i2c.c @@ -75,6 +75,7 @@ static const struct i2c_device_id stmpe_i2c_id[] = { { "stmpe801", STMPE801 }, { "stmpe811", STMPE811 }, { "stmpe1601", STMPE1601 }, + { "stmpe1801", STMPE1801 }, { "stmpe2401", STMPE2401 }, { "stmpe2403", STMPE2403 }, { } diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index 973659f8abd9..a81badbaa917 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -103,7 +103,7 @@ stmpe_spi_probe(struct spi_device *spi) static int stmpe_spi_remove(struct spi_device *spi) { - struct stmpe *stmpe = dev_get_drvdata(&spi->dev); + struct stmpe *stmpe = spi_get_drvdata(spi); return stmpe_remove(stmpe); } diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index 4b11202061be..bbccd514d3ec 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -19,6 +19,7 @@ #include <linux/pm.h> #include <linux/slab.h> #include <linux/mfd/core.h> +#include <linux/delay.h> #include "stmpe.h" static int __stmpe_enable(struct stmpe *stmpe, unsigned int blocks) @@ -643,6 +644,88 @@ static struct stmpe_variant_info stmpe1601 = { }; /* + * STMPE1801 + */ +static const u8 stmpe1801_regs[] = { + [STMPE_IDX_CHIP_ID] = STMPE1801_REG_CHIP_ID, + [STMPE_IDX_ICR_LSB] = STMPE1801_REG_INT_CTRL_LOW, + [STMPE_IDX_IER_LSB] = STMPE1801_REG_INT_EN_MASK_LOW, + [STMPE_IDX_ISR_LSB] = STMPE1801_REG_INT_STA_LOW, + [STMPE_IDX_GPMR_LSB] = STMPE1801_REG_GPIO_MP_LOW, + [STMPE_IDX_GPSR_LSB] = STMPE1801_REG_GPIO_SET_LOW, + [STMPE_IDX_GPCR_LSB] = STMPE1801_REG_GPIO_CLR_LOW, + [STMPE_IDX_GPDR_LSB] = STMPE1801_REG_GPIO_SET_DIR_LOW, + [STMPE_IDX_GPRER_LSB] = STMPE1801_REG_GPIO_RE_LOW, + [STMPE_IDX_GPFER_LSB] = STMPE1801_REG_GPIO_FE_LOW, + [STMPE_IDX_IEGPIOR_LSB] = STMPE1801_REG_INT_EN_GPIO_MASK_LOW, + [STMPE_IDX_ISGPIOR_LSB] = STMPE1801_REG_INT_STA_GPIO_LOW, +}; + +static struct stmpe_variant_block stmpe1801_blocks[] = { + { + .cell = &stmpe_gpio_cell, + .irq = STMPE1801_IRQ_GPIOC, + .block = STMPE_BLOCK_GPIO, + }, + { + .cell = &stmpe_keypad_cell, + .irq = STMPE1801_IRQ_KEYPAD, + .block = STMPE_BLOCK_KEYPAD, + }, +}; + +static int stmpe1801_enable(struct stmpe *stmpe, unsigned int blocks, + bool enable) +{ + unsigned int mask = 0; + if (blocks & STMPE_BLOCK_GPIO) + mask |= STMPE1801_MSK_INT_EN_GPIO; + + if (blocks & STMPE_BLOCK_KEYPAD) + mask |= STMPE1801_MSK_INT_EN_KPC; + + return __stmpe_set_bits(stmpe, STMPE1801_REG_INT_EN_MASK_LOW, mask, + enable ? mask : 0); +} + +static int stmpe1801_reset(struct stmpe *stmpe) +{ + unsigned long timeout; + int ret = 0; + + ret = __stmpe_set_bits(stmpe, STMPE1801_REG_SYS_CTRL, + STMPE1801_MSK_SYS_CTRL_RESET, STMPE1801_MSK_SYS_CTRL_RESET); + if (ret < 0) + return ret; + + timeout = jiffies + msecs_to_jiffies(100); + while (time_before(jiffies, timeout)) { + ret = __stmpe_reg_read(stmpe, STMPE1801_REG_SYS_CTRL); + if (ret < 0) + return ret; + if (!(ret & STMPE1801_MSK_SYS_CTRL_RESET)) + return 0; + usleep_range(100, 200); + }; + return -EIO; +} + +static struct stmpe_variant_info stmpe1801 = { + .name = "stmpe1801", + .id_val = STMPE1801_ID, + .id_mask = 0xfff0, + .num_gpios = 18, + .af_bits = 0, + .regs = stmpe1801_regs, + .blocks = stmpe1801_blocks, + .num_blocks = ARRAY_SIZE(stmpe1801_blocks), + .num_irqs = STMPE1801_NR_INTERNAL_IRQS, + .enable = stmpe1801_enable, + /* stmpe1801 do not have any gpio alternate function */ + .get_altfunc = NULL, +}; + +/* * STMPE24XX */ @@ -740,6 +823,7 @@ static struct stmpe_variant_info *stmpe_variant_info[STMPE_NBR_PARTS] = { [STMPE801] = &stmpe801, [STMPE811] = &stmpe811, [STMPE1601] = &stmpe1601, + [STMPE1801] = &stmpe1801, [STMPE2401] = &stmpe2401, [STMPE2403] = &stmpe2403, }; @@ -759,7 +843,7 @@ static irqreturn_t stmpe_irq(int irq, void *data) struct stmpe *stmpe = data; struct stmpe_variant_info *variant = stmpe->variant; int num = DIV_ROUND_UP(variant->num_irqs, 8); - u8 israddr = stmpe->regs[STMPE_IDX_ISR_MSB]; + u8 israddr; u8 isr[num]; int ret; int i; @@ -771,6 +855,11 @@ static irqreturn_t stmpe_irq(int irq, void *data) return IRQ_HANDLED; } + if (variant->id_val == STMPE1801_ID) + israddr = stmpe->regs[STMPE_IDX_ISR_LSB]; + else + israddr = stmpe->regs[STMPE_IDX_ISR_MSB]; + ret = stmpe_block_read(stmpe, israddr, num, isr); if (ret < 0) return IRQ_NONE; @@ -938,6 +1027,12 @@ static int stmpe_chip_init(struct stmpe *stmpe) if (ret) return ret; + if (id == STMPE1801_ID) { + ret = stmpe1801_reset(stmpe); + if (ret < 0) + return ret; + } + if (stmpe->irq >= 0) { if (id == STMPE801_ID) icr = STMPE801_REG_SYS_CTRL_INT_EN; @@ -1015,7 +1110,10 @@ void stmpe_of_probe(struct stmpe_platform_data *pdata, struct device_node *np) { struct device_node *child; - pdata->id = -1; + pdata->id = of_alias_get_id(np, "stmpe-i2c"); + if (pdata->id < 0) + pdata->id = -1; + pdata->irq_trigger = IRQF_TRIGGER_NONE; of_property_read_u32(np, "st,autosleep-timeout", @@ -1057,6 +1155,9 @@ int stmpe_probe(struct stmpe_client_info *ci, int partnum) return -ENOMEM; stmpe_of_probe(pdata, np); + + if (of_find_property(np, "interrupts", NULL) == NULL) + ci->irq = -1; } stmpe = devm_kzalloc(ci->dev, sizeof(struct stmpe), GFP_KERNEL); diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h index 7b8e13f5b764..ff2b09ba8797 100644 --- a/drivers/mfd/stmpe.h +++ b/drivers/mfd/stmpe.h @@ -199,6 +199,55 @@ int stmpe_remove(struct stmpe *stmpe); #define STPME1601_AUTOSLEEP_ENABLE (1 << 3) /* + * STMPE1801 + */ +#define STMPE1801_ID 0xc110 +#define STMPE1801_NR_INTERNAL_IRQS 5 +#define STMPE1801_IRQ_KEYPAD_COMBI 4 +#define STMPE1801_IRQ_GPIOC 3 +#define STMPE1801_IRQ_KEYPAD_OVER 2 +#define STMPE1801_IRQ_KEYPAD 1 +#define STMPE1801_IRQ_WAKEUP 0 + +#define STMPE1801_REG_CHIP_ID 0x00 +#define STMPE1801_REG_SYS_CTRL 0x02 +#define STMPE1801_REG_INT_CTRL_LOW 0x04 +#define STMPE1801_REG_INT_EN_MASK_LOW 0x06 +#define STMPE1801_REG_INT_STA_LOW 0x08 +#define STMPE1801_REG_INT_EN_GPIO_MASK_LOW 0x0A +#define STMPE1801_REG_INT_EN_GPIO_MASK_MID 0x0B +#define STMPE1801_REG_INT_EN_GPIO_MASK_HIGH 0x0C +#define STMPE1801_REG_INT_STA_GPIO_LOW 0x0D +#define STMPE1801_REG_INT_STA_GPIO_MID 0x0E +#define STMPE1801_REG_INT_STA_GPIO_HIGH 0x0F +#define STMPE1801_REG_GPIO_SET_LOW 0x10 +#define STMPE1801_REG_GPIO_SET_MID 0x11 +#define STMPE1801_REG_GPIO_SET_HIGH 0x12 +#define STMPE1801_REG_GPIO_CLR_LOW 0x13 +#define STMPE1801_REG_GPIO_CLR_MID 0x14 +#define STMPE1801_REG_GPIO_CLR_HIGH 0x15 +#define STMPE1801_REG_GPIO_MP_LOW 0x16 +#define STMPE1801_REG_GPIO_MP_MID 0x17 +#define STMPE1801_REG_GPIO_MP_HIGH 0x18 +#define STMPE1801_REG_GPIO_SET_DIR_LOW 0x19 +#define STMPE1801_REG_GPIO_SET_DIR_MID 0x1A +#define STMPE1801_REG_GPIO_SET_DIR_HIGH 0x1B +#define STMPE1801_REG_GPIO_RE_LOW 0x1C +#define STMPE1801_REG_GPIO_RE_MID 0x1D +#define STMPE1801_REG_GPIO_RE_HIGH 0x1E +#define STMPE1801_REG_GPIO_FE_LOW 0x1F +#define STMPE1801_REG_GPIO_FE_MID 0x20 +#define STMPE1801_REG_GPIO_FE_HIGH 0x21 +#define STMPE1801_REG_GPIO_PULL_UP_LOW 0x22 +#define STMPE1801_REG_GPIO_PULL_UP_MID 0x23 +#define STMPE1801_REG_GPIO_PULL_UP_HIGH 0x24 + +#define STMPE1801_MSK_SYS_CTRL_RESET (1 << 7) + +#define STMPE1801_MSK_INT_EN_KPC (1 << 1) +#define STMPE1801_MSK_INT_EN_GPIO (1 << 3) + +/* * STMPE24xx */ diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 61aea6381cdf..962a6e17a01a 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -25,17 +25,15 @@ static struct platform_driver syscon_driver; struct syscon { - struct device *dev; void __iomem *base; struct regmap *regmap; }; -static int syscon_match(struct device *dev, void *data) +static int syscon_match_node(struct device *dev, void *data) { - struct syscon *syscon = dev_get_drvdata(dev); struct device_node *dn = data; - return (syscon->dev->of_node == dn) ? 1 : 0; + return (dev->of_node == dn) ? 1 : 0; } struct regmap *syscon_node_to_regmap(struct device_node *np) @@ -44,7 +42,7 @@ struct regmap *syscon_node_to_regmap(struct device_node *np) struct device *dev; dev = driver_find_device(&syscon_driver.driver, NULL, np, - syscon_match); + syscon_match_node); if (!dev) return ERR_PTR(-EPROBE_DEFER); @@ -70,6 +68,34 @@ struct regmap *syscon_regmap_lookup_by_compatible(const char *s) } EXPORT_SYMBOL_GPL(syscon_regmap_lookup_by_compatible); +static int syscon_match_pdevname(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + const struct platform_device_id *id = platform_get_device_id(pdev); + + if (id) + if (!strcmp(id->name, (const char *)data)) + return 1; + + return !strcmp(dev_name(dev), (const char *)data); +} + +struct regmap *syscon_regmap_lookup_by_pdevname(const char *s) +{ + struct device *dev; + struct syscon *syscon; + + dev = driver_find_device(&syscon_driver.driver, NULL, (void *)s, + syscon_match_pdevname); + if (!dev) + return ERR_PTR(-EPROBE_DEFER); + + syscon = dev_get_drvdata(dev); + + return syscon->regmap; +} +EXPORT_SYMBOL_GPL(syscon_regmap_lookup_by_pdevname); + struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, const char *property) { @@ -101,28 +127,22 @@ static struct regmap_config syscon_regmap_config = { static int syscon_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; struct syscon *syscon; - struct resource res; - int ret; - - if (!np) - return -ENOENT; + struct resource *res; - syscon = devm_kzalloc(dev, sizeof(struct syscon), - GFP_KERNEL); + syscon = devm_kzalloc(dev, sizeof(*syscon), GFP_KERNEL); if (!syscon) return -ENOMEM; - syscon->base = of_iomap(np, 0); - if (!syscon->base) - return -EADDRNOTAVAIL; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; - ret = of_address_to_resource(np, 0, &res); - if (ret) - return ret; + syscon->base = devm_ioremap(dev, res->start, resource_size(res)); + if (!syscon->base) + return -ENOMEM; - syscon_regmap_config.max_register = res.end - res.start - 3; + syscon_regmap_config.max_register = res->end - res->start - 3; syscon->regmap = devm_regmap_init_mmio(dev, syscon->base, &syscon_regmap_config); if (IS_ERR(syscon->regmap)) { @@ -130,25 +150,17 @@ static int syscon_probe(struct platform_device *pdev) return PTR_ERR(syscon->regmap); } - syscon->dev = dev; platform_set_drvdata(pdev, syscon); - dev_info(dev, "syscon regmap start 0x%x end 0x%x registered\n", - res.start, res.end); + dev_info(dev, "regmap %pR registered\n", res); return 0; } -static int syscon_remove(struct platform_device *pdev) -{ - struct syscon *syscon; - - syscon = platform_get_drvdata(pdev); - iounmap(syscon->base); - platform_set_drvdata(pdev, NULL); - - return 0; -} +static const struct platform_device_id syscon_ids[] = { + { "syscon", }, + { } +}; static struct platform_driver syscon_driver = { .driver = { @@ -157,7 +169,7 @@ static struct platform_driver syscon_driver = { .of_match_table = of_syscon_match, }, .probe = syscon_probe, - .remove = syscon_remove, + .id_table = syscon_ids, }; static int __init syscon_init(void) diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c index ecc092c7f745..4cb92bb2aea2 100644 --- a/drivers/mfd/tc3589x.c +++ b/drivers/mfd/tc3589x.c @@ -350,7 +350,8 @@ static int tc3589x_probe(struct i2c_client *i2c, | I2C_FUNC_SMBUS_I2C_BLOCK)) return -EIO; - tc3589x = kzalloc(sizeof(struct tc3589x), GFP_KERNEL); + tc3589x = devm_kzalloc(&i2c->dev, sizeof(struct tc3589x), + GFP_KERNEL); if (!tc3589x) return -ENOMEM; @@ -366,33 +367,27 @@ static int tc3589x_probe(struct i2c_client *i2c, ret = tc3589x_chip_init(tc3589x); if (ret) - goto out_free; + return ret; ret = tc3589x_irq_init(tc3589x, np); if (ret) - goto out_free; + return ret; ret = request_threaded_irq(tc3589x->i2c->irq, NULL, tc3589x_irq, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "tc3589x", tc3589x); if (ret) { dev_err(tc3589x->dev, "failed to request IRQ: %d\n", ret); - goto out_free; + return ret; } ret = tc3589x_device_init(tc3589x); if (ret) { dev_err(tc3589x->dev, "failed to add child devices\n"); - goto out_freeirq; + return ret; } return 0; - -out_freeirq: - free_irq(tc3589x->i2c->irq, tc3589x); -out_free: - kfree(tc3589x); - return ret; } static int tc3589x_remove(struct i2c_client *client) @@ -401,10 +396,6 @@ static int tc3589x_remove(struct i2c_client *client) mfd_remove_devices(tc3589x->dev); - free_irq(tc3589x->i2c->irq, tc3589x); - - kfree(tc3589x); - return 0; } diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c index 98edb5be85c6..fbd6ee67b5a5 100644 --- a/drivers/mfd/tps65090.c +++ b/drivers/mfd/tps65090.c @@ -56,12 +56,23 @@ #define TPS65090_INT2_MASK_OVERLOAD_FET6 6 #define TPS65090_INT2_MASK_OVERLOAD_FET7 7 +static struct resource charger_resources[] = { + { + .start = TPS65090_IRQ_VAC_STATUS_CHANGE, + .end = TPS65090_IRQ_VAC_STATUS_CHANGE, + .flags = IORESOURCE_IRQ, + } +}; + static struct mfd_cell tps65090s[] = { { .name = "tps65090-pmic", }, { .name = "tps65090-charger", + .num_resources = ARRAY_SIZE(charger_resources), + .resources = &charger_resources[0], + .of_compatible = "ti,tps65090-charger", }, }; diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c index 942b666a2a07..42bd3ea5df3c 100644 --- a/drivers/mfd/twl4030-madc.c +++ b/drivers/mfd/twl4030-madc.c @@ -211,12 +211,14 @@ static int twl4030battery_current(int raw_volt) * @reg_base - Base address of the first channel * @Channels - 16 bit bitmap. If the bit is set, channel value is read * @buf - The channel values are stored here. if read fails error + * @raw - Return raw values without conversion * value is stored * Returns the number of successfully read channels. */ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc, u8 reg_base, unsigned - long channels, int *buf) + long channels, int *buf, + bool raw) { int count = 0, count_req = 0, i; u8 reg; @@ -230,6 +232,10 @@ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc, count_req++; continue; } + if (raw) { + count++; + continue; + } switch (i) { case 10: buf[i] = twl4030battery_current(buf[i]); @@ -371,7 +377,7 @@ static irqreturn_t twl4030_madc_threaded_irq_handler(int irq, void *_madc) method = &twl4030_conversion_methods[r->method]; /* Read results */ len = twl4030_madc_read_channels(madc, method->rbase, - r->channels, r->rbuf); + r->channels, r->rbuf, r->raw); /* Return results to caller */ if (r->func_cb != NULL) { r->func_cb(len, r->channels, r->rbuf); @@ -397,7 +403,7 @@ err_i2c: method = &twl4030_conversion_methods[r->method]; /* Read results */ len = twl4030_madc_read_channels(madc, method->rbase, - r->channels, r->rbuf); + r->channels, r->rbuf, r->raw); /* Return results to caller */ if (r->func_cb != NULL) { r->func_cb(len, r->channels, r->rbuf); @@ -585,7 +591,7 @@ int twl4030_madc_conversion(struct twl4030_madc_request *req) goto out; } ret = twl4030_madc_read_channels(twl4030_madc, method->rbase, - req->channels, req->rbuf); + req->channels, req->rbuf, req->raw); twl4030_madc->requests[req->method].active = 0; out: diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index f361bf38a0aa..492ee2cd3400 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -554,7 +554,7 @@ static int twl6040_probe(struct i2c_client *client, twl6040->supplies[0].supply = "vio"; twl6040->supplies[1].supply = "v2v1"; - ret = regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES, + ret = devm_regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES, twl6040->supplies); if (ret != 0) { dev_err(&client->dev, "Failed to get supplies: %d\n", ret); @@ -564,7 +564,7 @@ static int twl6040_probe(struct i2c_client *client, ret = regulator_bulk_enable(TWL6040_NUM_SUPPLIES, twl6040->supplies); if (ret != 0) { dev_err(&client->dev, "Failed to enable supplies: %d\n", ret); - goto power_err; + goto regulator_get_err; } twl6040->dev = &client->dev; @@ -586,8 +586,8 @@ static int twl6040_probe(struct i2c_client *client, twl6040->audpwron = -EINVAL; if (gpio_is_valid(twl6040->audpwron)) { - ret = gpio_request_one(twl6040->audpwron, GPIOF_OUT_INIT_LOW, - "audpwron"); + ret = devm_gpio_request_one(&client->dev, twl6040->audpwron, + GPIOF_OUT_INIT_LOW, "audpwron"); if (ret) goto gpio_err; } @@ -596,14 +596,14 @@ static int twl6040_probe(struct i2c_client *client, IRQF_ONESHOT, 0, &twl6040_irq_chip, &twl6040->irq_data); if (ret < 0) - goto irq_init_err; + goto gpio_err; twl6040->irq_ready = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_READY); twl6040->irq_th = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_TH); - ret = request_threaded_irq(twl6040->irq_ready, NULL, + ret = devm_request_threaded_irq(twl6040->dev, twl6040->irq_ready, NULL, twl6040_readyint_handler, IRQF_ONESHOT, "twl6040_irq_ready", twl6040); if (ret) { @@ -611,7 +611,7 @@ static int twl6040_probe(struct i2c_client *client, goto readyirq_err; } - ret = request_threaded_irq(twl6040->irq_th, NULL, + ret = devm_request_threaded_irq(twl6040->dev, twl6040->irq_th, NULL, twl6040_thint_handler, IRQF_ONESHOT, "twl6040_irq_th", twl6040); if (ret) { @@ -681,18 +681,13 @@ static int twl6040_probe(struct i2c_client *client, return 0; mfd_err: - free_irq(twl6040->irq_th, twl6040); + devm_free_irq(&client->dev, twl6040->irq_th, twl6040); thirq_err: - free_irq(twl6040->irq_ready, twl6040); + devm_free_irq(&client->dev, twl6040->irq_ready, twl6040); readyirq_err: regmap_del_irq_chip(twl6040->irq, twl6040->irq_data); -irq_init_err: - if (gpio_is_valid(twl6040->audpwron)) - gpio_free(twl6040->audpwron); gpio_err: regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies); -power_err: - regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies); regulator_get_err: i2c_set_clientdata(client, NULL); err: @@ -706,18 +701,14 @@ static int twl6040_remove(struct i2c_client *client) if (twl6040->power_count) twl6040_power(twl6040, 0); - if (gpio_is_valid(twl6040->audpwron)) - gpio_free(twl6040->audpwron); - - free_irq(twl6040->irq_ready, twl6040); - free_irq(twl6040->irq_th, twl6040); + devm_free_irq(&client->dev, twl6040->irq_ready, twl6040); + devm_free_irq(&client->dev, twl6040->irq_th, twl6040); regmap_del_irq_chip(twl6040->irq, twl6040->irq_data); mfd_remove_devices(&client->dev); i2c_set_clientdata(client, NULL); regulator_bulk_disable(TWL6040_NUM_SUPPLIES, twl6040->supplies); - regulator_bulk_free(TWL6040_NUM_SUPPLIES, twl6040->supplies); return 0; } diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c index daf69527ed83..e9031fa9d53d 100644 --- a/drivers/mfd/ucb1400_core.c +++ b/drivers/mfd/ucb1400_core.c @@ -75,6 +75,11 @@ static int ucb1400_core_probe(struct device *dev) /* GPIO */ ucb_gpio.ac97 = ac97; + if (pdata) { + ucb_gpio.gpio_setup = pdata->gpio_setup; + ucb_gpio.gpio_teardown = pdata->gpio_teardown; + ucb_gpio.gpio_offset = pdata->gpio_offset; + } ucb->ucb1400_gpio = platform_device_alloc("ucb1400_gpio", -1); if (!ucb->ucb1400_gpio) { err = -ENOMEM; diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c index 3c1723aa6225..84ce6b9daa3d 100644 --- a/drivers/mfd/vexpress-config.c +++ b/drivers/mfd/vexpress-config.c @@ -184,13 +184,14 @@ static int vexpress_config_schedule(struct vexpress_config_trans *trans) spin_lock_irqsave(&bridge->transactions_lock, flags); - vexpress_config_dump_trans("Executing", trans); - - if (list_empty(&bridge->transactions)) + if (list_empty(&bridge->transactions)) { + vexpress_config_dump_trans("Executing", trans); status = bridge->info->func_exec(trans->func->func, trans->offset, trans->write, trans->data); - else + } else { + vexpress_config_dump_trans("Queuing", trans); status = VEXPRESS_CONFIG_STATUS_WAIT; + } switch (status) { case VEXPRESS_CONFIG_STATUS_DONE: @@ -212,25 +213,31 @@ void vexpress_config_complete(struct vexpress_config_bridge *bridge, { struct vexpress_config_trans *trans; unsigned long flags; + const char *message = "Completed"; spin_lock_irqsave(&bridge->transactions_lock, flags); trans = list_first_entry(&bridge->transactions, struct vexpress_config_trans, list); - vexpress_config_dump_trans("Completed", trans); - trans->status = status; - list_del(&trans->list); - if (!list_empty(&bridge->transactions)) { - vexpress_config_dump_trans("Pending", trans); + do { + vexpress_config_dump_trans(message, trans); + list_del(&trans->list); + complete(&trans->completion); - bridge->info->func_exec(trans->func->func, trans->offset, - trans->write, trans->data); - } - spin_unlock_irqrestore(&bridge->transactions_lock, flags); + if (list_empty(&bridge->transactions)) + break; + + trans = list_first_entry(&bridge->transactions, + struct vexpress_config_trans, list); + vexpress_config_dump_trans("Executing pending", trans); + trans->status = bridge->info->func_exec(trans->func->func, + trans->offset, trans->write, trans->data); + message = "Finished pending"; + } while (trans->status == VEXPRESS_CONFIG_STATUS_DONE); - complete(&trans->completion); + spin_unlock_irqrestore(&bridge->transactions_lock, flags); } EXPORT_SYMBOL(vexpress_config_complete); diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c index bf75e967a1f3..96a020b1dcd1 100644 --- a/drivers/mfd/vexpress-sysreg.c +++ b/drivers/mfd/vexpress-sysreg.c @@ -490,12 +490,12 @@ static int vexpress_sysreg_probe(struct platform_device *pdev) return err; } + vexpress_sysreg_dev = &pdev->dev; + platform_device_register_data(vexpress_sysreg_dev, "leds-gpio", PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata, sizeof(vexpress_sysreg_leds_pdata)); - vexpress_sysreg_dev = &pdev->dev; - device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id); return 0; diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c index f70c4956ff9d..155c4a1a6a99 100644 --- a/drivers/mfd/wm5102-tables.c +++ b/drivers/mfd/wm5102-tables.c @@ -10,6 +10,7 @@ * published by the Free Software Foundation. */ +#include <linux/device.h> #include <linux/module.h> #include <linux/mfd/arizona/core.h> @@ -57,31 +58,54 @@ static const struct reg_default wm5102_reva_patch[] = { }; static const struct reg_default wm5102_revb_patch[] = { + { 0x19, 0x0001 }, { 0x80, 0x0003 }, { 0x081, 0xE022 }, - { 0x410, 0x4080 }, - { 0x418, 0x4080 }, - { 0x420, 0x4080 }, - { 0x428, 0xC000 }, + { 0x410, 0x6080 }, + { 0x418, 0xa080 }, + { 0x420, 0xa080 }, + { 0x428, 0xe000 }, + { 0x443, 0xDC1A }, { 0x4B0, 0x0066 }, { 0x458, 0x000b }, { 0x212, 0x0000 }, + { 0x171, 0x0000 }, + { 0x35E, 0x000C }, + { 0x2D4, 0x0000 }, { 0x80, 0x0000 }, }; /* We use a function so we can use ARRAY_SIZE() */ int wm5102_patch(struct arizona *arizona) { + const struct reg_default *wm5102_patch; + int ret = 0; + int i, patch_size; + switch (arizona->rev) { case 0: - return regmap_register_patch(arizona->regmap, - wm5102_reva_patch, - ARRAY_SIZE(wm5102_reva_patch)); + wm5102_patch = wm5102_reva_patch; + patch_size = ARRAY_SIZE(wm5102_reva_patch); default: - return regmap_register_patch(arizona->regmap, - wm5102_revb_patch, - ARRAY_SIZE(wm5102_revb_patch)); + wm5102_patch = wm5102_revb_patch; + patch_size = ARRAY_SIZE(wm5102_revb_patch); + } + + regcache_cache_bypass(arizona->regmap, true); + + for (i = 0; i < patch_size; i++) { + ret = regmap_write(arizona->regmap, wm5102_patch[i].reg, + wm5102_patch[i].def); + if (ret != 0) { + dev_err(arizona->dev, "Failed to write %x = %x: %d\n", + wm5102_patch[i].reg, wm5102_patch[i].def, ret); + goto out; + } } + +out: + regcache_cache_bypass(arizona->regmap, false); + return ret; } static const struct regmap_irq wm5102_aod_irqs[ARIZONA_NUM_IRQ] = { @@ -282,7 +306,7 @@ static const struct reg_default wm5102_reg_default[] = { { 0x00000155, 0x0000 }, /* R341 - Rate Estimator 4 */ { 0x00000156, 0x0000 }, /* R342 - Rate Estimator 5 */ { 0x00000161, 0x0000 }, /* R353 - Dynamic Frequency Scaling 1 */ - { 0x00000171, 0x0002 }, /* R369 - FLL1 Control 1 */ + { 0x00000171, 0x0000 }, /* R369 - FLL1 Control 1 */ { 0x00000172, 0x0008 }, /* R370 - FLL1 Control 2 */ { 0x00000173, 0x0018 }, /* R371 - FLL1 Control 3 */ { 0x00000174, 0x007D }, /* R372 - FLL1 Control 4 */ @@ -366,7 +390,7 @@ static const struct reg_default wm5102_reg_default[] = { { 0x00000400, 0x0000 }, /* R1024 - Output Enables 1 */ { 0x00000408, 0x0000 }, /* R1032 - Output Rate 1 */ { 0x00000409, 0x0022 }, /* R1033 - Output Volume Ramp */ - { 0x00000410, 0x4080 }, /* R1040 - Output Path Config 1L */ + { 0x00000410, 0x6080 }, /* R1040 - Output Path Config 1L */ { 0x00000411, 0x0180 }, /* R1041 - DAC Digital Volume 1L */ { 0x00000412, 0x0081 }, /* R1042 - DAC Volume Limit 1L */ { 0x00000413, 0x0001 }, /* R1043 - Noise Gate Select 1L */ @@ -374,7 +398,7 @@ static const struct reg_default wm5102_reg_default[] = { { 0x00000415, 0x0180 }, /* R1045 - DAC Digital Volume 1R */ { 0x00000416, 0x0081 }, /* R1046 - DAC Volume Limit 1R */ { 0x00000417, 0x0002 }, /* R1047 - Noise Gate Select 1R */ - { 0x00000418, 0x4080 }, /* R1048 - Output Path Config 2L */ + { 0x00000418, 0xA080 }, /* R1048 - Output Path Config 2L */ { 0x00000419, 0x0180 }, /* R1049 - DAC Digital Volume 2L */ { 0x0000041A, 0x0081 }, /* R1050 - DAC Volume Limit 2L */ { 0x0000041B, 0x0004 }, /* R1051 - Noise Gate Select 2L */ @@ -382,11 +406,11 @@ static const struct reg_default wm5102_reg_default[] = { { 0x0000041D, 0x0180 }, /* R1053 - DAC Digital Volume 2R */ { 0x0000041E, 0x0081 }, /* R1054 - DAC Volume Limit 2R */ { 0x0000041F, 0x0008 }, /* R1055 - Noise Gate Select 2R */ - { 0x00000420, 0x4080 }, /* R1056 - Output Path Config 3L */ + { 0x00000420, 0xA080 }, /* R1056 - Output Path Config 3L */ { 0x00000421, 0x0180 }, /* R1057 - DAC Digital Volume 3L */ { 0x00000422, 0x0081 }, /* R1058 - DAC Volume Limit 3L */ { 0x00000423, 0x0010 }, /* R1059 - Noise Gate Select 3L */ - { 0x00000428, 0xC000 }, /* R1064 - Output Path Config 4L */ + { 0x00000428, 0xE000 }, /* R1064 - Output Path Config 4L */ { 0x00000429, 0x0180 }, /* R1065 - DAC Digital Volume 4L */ { 0x0000042A, 0x0081 }, /* R1066 - Out Volume 4L */ { 0x0000042B, 0x0040 }, /* R1067 - Noise Gate Select 4L */ @@ -401,7 +425,7 @@ static const struct reg_default wm5102_reg_default[] = { { 0x00000436, 0x0081 }, /* R1078 - DAC Volume Limit 5R */ { 0x00000437, 0x0200 }, /* R1079 - Noise Gate Select 5R */ { 0x00000450, 0x0000 }, /* R1104 - DAC AEC Control 1 */ - { 0x00000458, 0x0001 }, /* R1112 - Noise Gate Control */ + { 0x00000458, 0x000B }, /* R1112 - Noise Gate Control */ { 0x00000490, 0x0069 }, /* R1168 - PDM SPK1 CTRL 1 */ { 0x00000491, 0x0000 }, /* R1169 - PDM SPK1 CTRL 2 */ { 0x00000500, 0x000C }, /* R1280 - AIF1 BCLK Ctrl */ diff --git a/drivers/mfd/wm831x-spi.c b/drivers/mfd/wm831x-spi.c index 4e70e157a909..e7ed14f661d8 100644 --- a/drivers/mfd/wm831x-spi.c +++ b/drivers/mfd/wm831x-spi.c @@ -37,7 +37,7 @@ static int wm831x_spi_probe(struct spi_device *spi) spi->bits_per_word = 16; spi->mode = SPI_MODE_0; - dev_set_drvdata(&spi->dev, wm831x); + spi_set_drvdata(spi, wm831x); wm831x->dev = &spi->dev; wm831x->regmap = devm_regmap_init_spi(spi, &wm831x_regmap_config); @@ -53,7 +53,7 @@ static int wm831x_spi_probe(struct spi_device *spi) static int wm831x_spi_remove(struct spi_device *spi) { - struct wm831x *wm831x = dev_get_drvdata(&spi->dev); + struct wm831x *wm831x = spi_get_drvdata(spi); wm831x_device_exit(wm831x); @@ -69,7 +69,7 @@ static int wm831x_spi_suspend(struct device *dev) static void wm831x_spi_shutdown(struct spi_device *spi) { - struct wm831x *wm831x = dev_get_drvdata(&spi->dev); + struct wm831x *wm831x = spi_get_drvdata(spi); wm831x_device_shutdown(wm831x); } diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 803e93fae56a..00e4fe2f3c75 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -19,6 +19,9 @@ #include <linux/err.h> #include <linux/delay.h> #include <linux/mfd/core.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_gpio.h> #include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> @@ -191,7 +194,7 @@ static const char *wm8958_main_supplies[] = { "SPKVDD2", }; -#ifdef CONFIG_PM +#ifdef CONFIG_PM_RUNTIME static int wm8994_suspend(struct device *dev) { struct wm8994 *wm8994 = dev_get_drvdata(dev); @@ -396,6 +399,60 @@ static const struct reg_default wm1811_reva_patch[] = { { 0x102, 0x0 }, }; +#ifdef CONFIG_OF +static int wm8994_set_pdata_from_of(struct wm8994 *wm8994) +{ + struct device_node *np = wm8994->dev->of_node; + struct wm8994_pdata *pdata = &wm8994->pdata; + int i; + + if (!np) + return 0; + + if (of_property_read_u32_array(np, "wlf,gpio-cfg", pdata->gpio_defaults, + ARRAY_SIZE(pdata->gpio_defaults)) >= 0) { + for (i = 0; i < ARRAY_SIZE(pdata->gpio_defaults); i++) { + if (wm8994->pdata.gpio_defaults[i] == 0) + pdata->gpio_defaults[i] + = WM8994_CONFIGURE_GPIO; + } + } + + of_property_read_u32_array(np, "wlf,micbias-cfg", pdata->micbias, + ARRAY_SIZE(pdata->micbias)); + + pdata->lineout1_diff = true; + pdata->lineout2_diff = true; + if (of_find_property(np, "wlf,lineout1-se", NULL)) + pdata->lineout1_diff = false; + if (of_find_property(np, "wlf,lineout2-se", NULL)) + pdata->lineout2_diff = false; + + if (of_find_property(np, "wlf,lineout1-feedback", NULL)) + pdata->lineout1fb = true; + if (of_find_property(np, "wlf,lineout2-feedback", NULL)) + pdata->lineout2fb = true; + + if (of_find_property(np, "wlf,ldoena-always-driven", NULL)) + pdata->lineout2fb = true; + + pdata->ldo[0].enable = of_get_named_gpio(np, "wlf,ldo1ena", 0); + if (pdata->ldo[0].enable < 0) + pdata->ldo[0].enable = 0; + + pdata->ldo[1].enable = of_get_named_gpio(np, "wlf,ldo2ena", 0); + if (pdata->ldo[1].enable < 0) + pdata->ldo[1].enable = 0; + + return 0; +} +#else +static int wm8994_set_pdata_from_of(struct wm8994 *wm8994) +{ + return 0; +} +#endif + /* * Instantiate the generic non-control parts of the device. */ @@ -405,7 +462,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq) struct regmap_config *regmap_config; const struct reg_default *regmap_patch = NULL; const char *devname; - int ret, i, patch_regs; + int ret, i, patch_regs = 0; int pulls = 0; if (dev_get_platdata(wm8994->dev)) { @@ -414,6 +471,10 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq) } pdata = &wm8994->pdata; + ret = wm8994_set_pdata_from_of(wm8994); + if (ret != 0) + return ret; + dev_set_drvdata(wm8994->dev, wm8994); /* Add the on-chip regulators first for bootstrapping */ @@ -673,9 +734,9 @@ static void wm8994_device_exit(struct wm8994 *wm8994) } static const struct of_device_id wm8994_of_match[] = { - { .compatible = "wlf,wm1811", }, - { .compatible = "wlf,wm8994", }, - { .compatible = "wlf,wm8958", }, + { .compatible = "wlf,wm1811", .data = (void *)WM1811 }, + { .compatible = "wlf,wm8994", .data = (void *)WM8994 }, + { .compatible = "wlf,wm8958", .data = (void *)WM8958 }, { } }; MODULE_DEVICE_TABLE(of, wm8994_of_match); @@ -683,6 +744,7 @@ MODULE_DEVICE_TABLE(of, wm8994_of_match); static int wm8994_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { + const struct of_device_id *of_id; struct wm8994 *wm8994; int ret; @@ -693,7 +755,14 @@ static int wm8994_i2c_probe(struct i2c_client *i2c, i2c_set_clientdata(i2c, wm8994); wm8994->dev = &i2c->dev; wm8994->irq = i2c->irq; - wm8994->type = id->driver_data; + + if (i2c->dev.of_node) { + of_id = of_match_device(wm8994_of_match, &i2c->dev); + if (of_id) + wm8994->type = (int)of_id->data; + } else { + wm8994->type = id->driver_data; + } wm8994->regmap = devm_regmap_init_i2c(i2c, &wm8994_base_regmap_config); if (IS_ERR(wm8994->regmap)) { @@ -724,15 +793,16 @@ static const struct i2c_device_id wm8994_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, wm8994_i2c_id); -static UNIVERSAL_DEV_PM_OPS(wm8994_pm_ops, wm8994_suspend, wm8994_resume, - NULL); +static const struct dev_pm_ops wm8994_pm_ops = { + SET_RUNTIME_PM_OPS(wm8994_suspend, wm8994_resume, NULL) +}; static struct i2c_driver wm8994_i2c_driver = { .driver = { .name = "wm8994", .owner = THIS_MODULE, .pm = &wm8994_pm_ops, - .of_match_table = wm8994_of_match, + .of_match_table = of_match_ptr(wm8994_of_match), }, .probe = wm8994_i2c_probe, .remove = wm8994_i2c_remove, diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c index 3b9a2843c5f8..74dbb6bcf488 100644 --- a/drivers/mtd/chips/gen_probe.c +++ b/drivers/mtd/chips/gen_probe.c @@ -204,14 +204,16 @@ static inline struct mtd_info *cfi_cmdset_unknown(struct map_info *map, struct cfi_private *cfi = map->fldrv_priv; __u16 type = primary?cfi->cfiq->P_ID:cfi->cfiq->A_ID; #ifdef CONFIG_MODULES - char probename[16+sizeof(MODULE_SYMBOL_PREFIX)]; + char probename[sizeof(VMLINUX_SYMBOL_STR(cfi_cmdset_%4.4X))]; cfi_cmdset_fn_t *probe_function; - sprintf(probename, MODULE_SYMBOL_PREFIX "cfi_cmdset_%4.4X", type); + sprintf(probename, VMLINUX_SYMBOL_STR(cfi_cmdset_%4.4X), type); probe_function = __symbol_get(probename); if (!probe_function) { - request_module(probename + sizeof(MODULE_SYMBOL_PREFIX) - 1); + char modname[sizeof("cfi_cmdset_%4.4X")]; + sprintf(modname, "cfi_cmdset_%4.4X", type); + request_module(modname); probe_function = __symbol_get(probename); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c59ec3ddaa66..3cd397d60434 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5204,7 +5204,7 @@ static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev) if (t4_wait_dev_ready(adap) < 0) return PCI_ERS_RESULT_DISCONNECT; - if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL)) + if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL) < 0) return PCI_ERS_RESULT_DISCONNECT; adap->flags |= FW_OK; if (adap_init1(adap, &c)) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 234ce6f07544..f544b297c9ab 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -327,6 +327,7 @@ enum vf_state { #define BE_FLAGS_LINK_STATUS_INIT 1 #define BE_FLAGS_WORKER_SCHEDULED (1 << 3) +#define BE_FLAGS_NAPI_ENABLED (1 << 9) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 #define BE_FLAGS_QNQ_ASYNC_EVT_RCVD (1 << 11) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 25d3290b8cac..e1e5bb9d9054 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -961,19 +961,8 @@ int be_cmd_cq_create(struct be_adapter *adapter, struct be_queue_info *cq, OPCODE_COMMON_CQ_CREATE, sizeof(*req), wrb, NULL); req->num_pages = cpu_to_le16(PAGES_4K_SPANNED(q_mem->va, q_mem->size)); - if (lancer_chip(adapter)) { - req->hdr.version = 2; - req->page_size = 1; /* 1 for 4K */ - AMAP_SET_BITS(struct amap_cq_context_lancer, nodelay, ctxt, - no_delay); - AMAP_SET_BITS(struct amap_cq_context_lancer, count, ctxt, - __ilog2_u32(cq->len/256)); - AMAP_SET_BITS(struct amap_cq_context_lancer, valid, ctxt, 1); - AMAP_SET_BITS(struct amap_cq_context_lancer, eventable, - ctxt, 1); - AMAP_SET_BITS(struct amap_cq_context_lancer, eqid, - ctxt, eq->id); - } else { + + if (BEx_chip(adapter)) { AMAP_SET_BITS(struct amap_cq_context_be, coalescwm, ctxt, coalesce_wm); AMAP_SET_BITS(struct amap_cq_context_be, nodelay, @@ -983,6 +972,18 @@ int be_cmd_cq_create(struct be_adapter *adapter, struct be_queue_info *cq, AMAP_SET_BITS(struct amap_cq_context_be, valid, ctxt, 1); AMAP_SET_BITS(struct amap_cq_context_be, eventable, ctxt, 1); AMAP_SET_BITS(struct amap_cq_context_be, eqid, ctxt, eq->id); + } else { + req->hdr.version = 2; + req->page_size = 1; /* 1 for 4K */ + AMAP_SET_BITS(struct amap_cq_context_v2, nodelay, ctxt, + no_delay); + AMAP_SET_BITS(struct amap_cq_context_v2, count, ctxt, + __ilog2_u32(cq->len/256)); + AMAP_SET_BITS(struct amap_cq_context_v2, valid, ctxt, 1); + AMAP_SET_BITS(struct amap_cq_context_v2, eventable, + ctxt, 1); + AMAP_SET_BITS(struct amap_cq_context_v2, eqid, + ctxt, eq->id); } be_dws_cpu_to_le(ctxt, sizeof(req->context)); @@ -1763,10 +1764,12 @@ int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) req->if_id = cpu_to_le32(adapter->if_handle); if (flags & IFF_PROMISC) { req->if_flags_mask = cpu_to_le32(BE_IF_FLAGS_PROMISCUOUS | - BE_IF_FLAGS_VLAN_PROMISCUOUS); + BE_IF_FLAGS_VLAN_PROMISCUOUS | + BE_IF_FLAGS_MCAST_PROMISCUOUS); if (value == ON) req->if_flags = cpu_to_le32(BE_IF_FLAGS_PROMISCUOUS | - BE_IF_FLAGS_VLAN_PROMISCUOUS); + BE_IF_FLAGS_VLAN_PROMISCUOUS | + BE_IF_FLAGS_MCAST_PROMISCUOUS); } else if (flags & IFF_ALLMULTI) { req->if_flags_mask = req->if_flags = cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS); @@ -2084,7 +2087,7 @@ int lancer_cmd_write_object(struct be_adapter *adapter, struct be_dma_mem *cmd, spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->flash_compl, - msecs_to_jiffies(30000))) + msecs_to_jiffies(60000))) status = -1; else status = adapter->flash_status; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index a855668e0cc5..025bdb0d1764 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -381,7 +381,7 @@ struct amap_cq_context_be { u8 rsvd5[32]; /* dword 3*/ } __packed; -struct amap_cq_context_lancer { +struct amap_cq_context_v2 { u8 rsvd0[12]; /* dword 0*/ u8 coalescwm[2]; /* dword 0*/ u8 nodelay; /* dword 0*/ diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 5733cde88e2c..3d4461adb3b4 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -85,6 +85,7 @@ static const struct be_ethtool_stat et_stats[] = { {DRVSTAT_INFO(tx_pauseframes)}, {DRVSTAT_INFO(tx_controlframes)}, {DRVSTAT_INFO(rx_priority_pause_frames)}, + {DRVSTAT_INFO(tx_priority_pauseframes)}, /* Received packets dropped when an internal fifo going into * main packet buffer tank (PMEM) overflows. */ diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4babc8a4a543..6c52a60dcdb7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -410,6 +410,7 @@ static void populate_be_v1_stats(struct be_adapter *adapter) drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop; drvs->tx_pauseframes = port_stats->tx_pauseframes; drvs->tx_controlframes = port_stats->tx_controlframes; + drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes; drvs->jabber_events = port_stats->jabber_events; drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf; drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr; @@ -471,11 +472,26 @@ static void accumulate_16bit_val(u32 *acc, u16 val) ACCESS_ONCE(*acc) = newacc; } +void populate_erx_stats(struct be_adapter *adapter, + struct be_rx_obj *rxo, + u32 erx_stat) +{ + if (!BEx_chip(adapter)) + rx_stats(rxo)->rx_drops_no_frags = erx_stat; + else + /* below erx HW counter can actually wrap around after + * 65535. Driver accumulates a 32-bit value + */ + accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags, + (u16)erx_stat); +} + void be_parse_stats(struct be_adapter *adapter) { struct be_erx_stats_v1 *erx = be_erx_stats_from_cmd(adapter); struct be_rx_obj *rxo; int i; + u32 erx_stat; if (lancer_chip(adapter)) { populate_lancer_stats(adapter); @@ -488,12 +504,8 @@ void be_parse_stats(struct be_adapter *adapter) /* as erx_v1 is longer than v0, ok to use v1 for v0 access */ for_all_rx_queues(adapter, rxo, i) { - /* below erx HW counter can actually wrap around after - * 65535. Driver accumulates a 32-bit value - */ - accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags, - (u16)erx->rx_drops_no_fragments \ - [rxo->q.id]); + erx_stat = erx->rx_drops_no_fragments[rxo->q.id]; + populate_erx_stats(adapter, rxo, erx_stat); } } } @@ -2378,7 +2390,7 @@ static uint be_num_rss_want(struct be_adapter *adapter) return num; } -static void be_msix_enable(struct be_adapter *adapter) +static int be_msix_enable(struct be_adapter *adapter) { #define BE_MIN_MSIX_VECTORS 1 int i, status, num_vec, num_roce_vec = 0; @@ -2403,13 +2415,17 @@ static void be_msix_enable(struct be_adapter *adapter) goto done; } else if (status >= BE_MIN_MSIX_VECTORS) { num_vec = status; - if (pci_enable_msix(adapter->pdev, adapter->msix_entries, - num_vec) == 0) + status = pci_enable_msix(adapter->pdev, adapter->msix_entries, + num_vec); + if (!status) goto done; } dev_warn(dev, "MSIx enable failed\n"); - return; + /* INTx is not supported in VFs, so fail probe if enable_msix fails */ + if (!be_physfn(adapter)) + return status; + return 0; done: if (be_roce_supported(adapter)) { if (num_vec > num_roce_vec) { @@ -2423,7 +2439,7 @@ done: } else adapter->num_msix_vec = num_vec; dev_info(dev, "enabled %d MSI-x vector(s)\n", adapter->num_msix_vec); - return; + return 0; } static inline int be_msix_vec_get(struct be_adapter *adapter, @@ -2536,8 +2552,11 @@ static int be_close(struct net_device *netdev) be_roce_dev_close(adapter); - for_all_evt_queues(adapter, eqo, i) - napi_disable(&eqo->napi); + if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { + for_all_evt_queues(adapter, eqo, i) + napi_disable(&eqo->napi); + adapter->flags &= ~BE_FLAGS_NAPI_ENABLED; + } be_async_mcc_disable(adapter); @@ -2631,7 +2650,9 @@ static int be_open(struct net_device *netdev) if (status) goto err; - be_irq_register(adapter); + status = be_irq_register(adapter); + if (status) + goto err; for_all_rx_queues(adapter, rxo, i) be_cq_notify(adapter, rxo->cq.id, true, 0); @@ -2645,6 +2666,7 @@ static int be_open(struct net_device *netdev) napi_enable(&eqo->napi); be_eq_notify(adapter, eqo->q.id, true, false, 0); } + adapter->flags |= BE_FLAGS_NAPI_ENABLED; status = be_cmd_link_status_query(adapter, NULL, &link_status, 0); if (!status) @@ -3100,7 +3122,9 @@ static int be_setup(struct be_adapter *adapter) if (status) goto err; - be_msix_enable(adapter); + status = be_msix_enable(adapter); + if (status) + goto err; status = be_evt_queues_create(adapter); if (status) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 256ae789c143..d175bbd3ffd3 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2496,10 +2496,12 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2, skb->ip_summed = re->skb->ip_summed; skb->csum = re->skb->csum; skb->rxhash = re->skb->rxhash; + skb->vlan_proto = re->skb->vlan_proto; skb->vlan_tci = re->skb->vlan_tci; pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr, length, PCI_DMA_FROMDEVICE); + re->skb->vlan_proto = 0; re->skb->vlan_tci = 0; re->skb->rxhash = 0; re->skb->ip_summed = CHECKSUM_NONE; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 59c43918883e..21a5b291b4b3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -555,8 +555,8 @@ static int cpsw_poll(struct napi_struct *napi, int budget) cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { - cpsw_enable_irq(priv); prim_cpsw->irq_enabled = true; + cpsw_enable_irq(priv); } } diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index f7f623a5390e..577c72d5f369 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -100,6 +100,9 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, netdev_err(dev->net, "asix_rx_fixup() Bad RX Length %d\n", rx->size); kfree_skb(rx->ax_skb); + rx->ax_skb = NULL; + rx->size = 0U; + return 0; } diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 09699054b54f..03e8a15d7deb 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -256,8 +256,9 @@ static int mdio_read(struct net_device *dev, int phy_id, int loc) static void mdio_write(struct net_device *dev, int phy_id, int loc, int val) { pegasus_t *pegasus = netdev_priv(dev); + u16 data = val; - write_mii_word(pegasus, phy_id, loc, (__u16 *)&val); + write_mii_word(pegasus, phy_id, loc, &data); } static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 5a88e72090ce..834e405fb57a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -548,6 +548,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0265, 4)}, /* ONDA MT8205 4G LTE */ {QMI_FIXED_INTF(0x19d2, 0x0284, 4)}, /* ZTE MF880 */ {QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */ + {QMI_FIXED_INTF(0x19d2, 0x0412, 4)}, /* Telewell TW-LTE 4G */ {QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */ {QMI_FIXED_INTF(0x19d2, 0x1010, 4)}, /* ZTE (Vodafone) K3571-Z */ {QMI_FIXED_INTF(0x19d2, 0x1012, 4)}, diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c index 23a3498f14d4..830bb1d1f957 100644 --- a/drivers/net/wireless/atmel.c +++ b/drivers/net/wireless/atmel.c @@ -1502,7 +1502,7 @@ static const struct file_operations atmel_proc_fops = { .open = atmel_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static const struct net_device_ops atmel_netdev_ops = { diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c index 19c45e363aa7..d6033a8e5dea 100644 --- a/drivers/net/wireless/hostap/hostap_ap.c +++ b/drivers/net/wireless/hostap/hostap_ap.c @@ -89,7 +89,7 @@ static const struct file_operations ap_debug_proc_fops = { .open = ap_debug_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif /* PRISM2_NO_PROCFS_DEBUG */ @@ -1116,7 +1116,7 @@ static const struct file_operations prism2_sta_proc_fops = { .open = prism2_sta_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static void handle_add_proc_queue(struct work_struct *work) diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index 507ab99eef4e..6307a4e36c85 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -2957,7 +2957,7 @@ static const struct file_operations prism2_registers_proc_fops = { .open = prism2_registers_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif /* PRISM2_NO_PROCFS_DEBUG */ diff --git a/drivers/net/wireless/hostap/hostap_proc.c b/drivers/net/wireless/hostap/hostap_proc.c index 7491dab2c105..aa7ad3a7a69b 100644 --- a/drivers/net/wireless/hostap/hostap_proc.c +++ b/drivers/net/wireless/hostap/hostap_proc.c @@ -52,7 +52,7 @@ static const struct file_operations prism2_debug_proc_fops = { .open = prism2_debug_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif /* PRISM2_NO_PROCFS_DEBUG */ @@ -103,7 +103,7 @@ static const struct file_operations prism2_stats_proc_fops = { .open = prism2_stats_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; @@ -265,7 +265,7 @@ static const struct file_operations prism2_crypt_proc_fops = { .open = prism2_crypt_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index a2865f17c667..37984e6d4e99 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -51,9 +51,17 @@ * This is the maximum slots a skb can have. If a guest sends a skb * which exceeds this limit it is considered malicious. */ -#define MAX_SKB_SLOTS_DEFAULT 20 -static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT; -module_param(max_skb_slots, uint, 0444); +#define FATAL_SKB_SLOTS_DEFAULT 20 +static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; +module_param(fatal_skb_slots, uint, 0444); + +/* + * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating + * the maximum slots a valid packet can use. Now this value is defined + * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by + * all backend. + */ +#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN typedef unsigned int pending_ring_idx_t; #define INVALID_PENDING_RING_IDX (~0U) @@ -928,18 +936,20 @@ static void netbk_fatal_tx_err(struct xenvif *vif) static int netbk_count_requests(struct xenvif *vif, struct xen_netif_tx_request *first, - RING_IDX first_idx, struct xen_netif_tx_request *txp, int work_to_do) { RING_IDX cons = vif->tx.req_cons; int slots = 0; int drop_err = 0; + int more_data; if (!(first->flags & XEN_NETTXF_more_data)) return 0; do { + struct xen_netif_tx_request dropped_tx = { 0 }; + if (slots >= work_to_do) { netdev_err(vif->dev, "Asked for %d slots but exceeds this limit\n", @@ -951,28 +961,32 @@ static int netbk_count_requests(struct xenvif *vif, /* This guest is really using too many slots and * considered malicious. */ - if (unlikely(slots >= max_skb_slots)) { + if (unlikely(slots >= fatal_skb_slots)) { netdev_err(vif->dev, "Malicious frontend using %d slots, threshold %u\n", - slots, max_skb_slots); + slots, fatal_skb_slots); netbk_fatal_tx_err(vif); return -E2BIG; } /* Xen network protocol had implicit dependency on - * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the - * historical MAX_SKB_FRAGS value 18 to honor the same - * behavior as before. Any packet using more than 18 - * slots but less than max_skb_slots slots is dropped + * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to + * the historical MAX_SKB_FRAGS value 18 to honor the + * same behavior as before. Any packet using more than + * 18 slots but less than fatal_skb_slots slots is + * dropped */ - if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) { + if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { if (net_ratelimit()) netdev_dbg(vif->dev, "Too many slots (%d) exceeding limit (%d), dropping packet\n", - slots, XEN_NETIF_NR_SLOTS_MIN); + slots, XEN_NETBK_LEGACY_SLOTS_MAX); drop_err = -E2BIG; } + if (drop_err) + txp = &dropped_tx; + memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); @@ -1002,10 +1016,16 @@ static int netbk_count_requests(struct xenvif *vif, netbk_fatal_tx_err(vif); return -EINVAL; } - } while ((txp++)->flags & XEN_NETTXF_more_data); + + more_data = txp->flags & XEN_NETTXF_more_data; + + if (!drop_err) + txp++; + + } while (more_data); if (drop_err) { - netbk_tx_err(vif, first, first_idx + slots); + netbk_tx_err(vif, first, cons + slots); return drop_err; } @@ -1042,7 +1062,7 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, struct pending_tx_info *first = NULL; /* At this point shinfo->nr_frags is in fact the number of - * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN. + * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ nr_slots = shinfo->nr_frags; @@ -1404,12 +1424,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) struct sk_buff *skb; int ret; - while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + while ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[max_skb_slots]; + struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; @@ -1470,8 +1490,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) continue; } - ret = netbk_count_requests(vif, &txreq, idx, - txfrags, work_to_do); + ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); if (unlikely(ret < 0)) continue; @@ -1498,7 +1517,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) pending_idx = netbk->pending_ring[index]; data_len = (txreq.size > PKT_PROT_LEN && - ret < XEN_NETIF_NR_SLOTS_MIN) ? + ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, @@ -1777,7 +1796,7 @@ static inline int rx_work_todo(struct xen_netbk *netbk) static inline int tx_work_todo(struct xen_netbk *netbk) { - if ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + if ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) return 1; @@ -1862,11 +1881,11 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; - if (max_skb_slots < XEN_NETIF_NR_SLOTS_MIN) { + if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { printk(KERN_INFO - "xen-netback: max_skb_slots too small (%d), bump it to XEN_NETIF_NR_SLOTS_MIN (%d)\n", - max_skb_slots, XEN_NETIF_NR_SLOTS_MIN); - max_skb_slots = XEN_NETIF_NR_SLOTS_MIN; + "xen-netback: fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", + fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); + fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; } xen_netbk_group_nr = num_online_cpus(); diff --git a/drivers/power/rx51_battery.c b/drivers/power/rx51_battery.c index 1a1dcb831a17..cbde1d6d3228 100644 --- a/drivers/power/rx51_battery.c +++ b/drivers/power/rx51_battery.c @@ -42,6 +42,7 @@ static int rx51_battery_read_adc(int channel) req.method = TWL4030_MADC_SW1; req.func_cb = NULL; req.type = TWL4030_MADC_WAIT; + req.raw = true; if (twl4030_madc_conversion(&req) <= 0) return -ENODATA; diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 178836ec252b..bf07c3a188d4 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -345,7 +345,6 @@ struct memory_increment { struct list_head list; u16 rn; int standby; - int usecount; }; struct assign_storage_sccb { @@ -463,21 +462,10 @@ static int sclp_mem_change_state(unsigned long start, unsigned long size, break; if (start > istart + rzm - 1) continue; - if (online) { - if (incr->usecount++) - continue; - /* - * Don't break the loop if one assign fails. Loop may - * be walked again on CANCEL and we can't save - * information if state changed before or not. - * So continue and increase usecount for all increments. - */ + if (online) rc |= sclp_assign_storage(incr->rn); - } else { - if (--incr->usecount) - continue; + else sclp_unassign_storage(incr->rn); - } } return rc ? -EIO : 0; } @@ -561,8 +549,6 @@ static void __init sclp_add_standby_memory(void) add_memory_merged(0); } -#define MEM_SCT_SIZE (1UL << SECTION_SIZE_BITS) - static void __init insert_increment(u16 rn, int standby, int assigned) { struct memory_increment *incr, *new_incr; @@ -574,8 +560,6 @@ static void __init insert_increment(u16 rn, int standby, int assigned) return; new_incr->rn = rn; new_incr->standby = standby; - if (!standby) - new_incr->usecount = rzm > MEM_SCT_SIZE ? rzm/MEM_SCT_SIZE : 1; last_rn = 0; prev = &sclp_mem_list; list_for_each_entry(incr, &sclp_mem_list, list) { diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 22820610022c..9e5e14686e75 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -426,7 +426,7 @@ static int zcore_memmap_open(struct inode *inode, struct file *filp) GFP_KERNEL); if (!chunk_array) return -ENOMEM; - detect_memory_layout(chunk_array); + detect_memory_layout(chunk_array, 0); buf = kzalloc(MEMORY_CHUNKS * CHUNK_INFO_SIZE, GFP_KERNEL); if (!buf) { kfree(chunk_array); @@ -557,7 +557,7 @@ static void __init set_lc_mask(struct save_area *map) /* * Initialize dump globals for a given architecture */ -static int __init sys_info_init(enum arch_id arch) +static int __init sys_info_init(enum arch_id arch, unsigned long mem_end) { int rc; @@ -579,7 +579,7 @@ static int __init sys_info_init(enum arch_id arch) rc = init_cpu_info(arch); if (rc) return rc; - sys_info.mem_size = real_memory_size; + sys_info.mem_size = mem_end; return 0; } @@ -601,7 +601,7 @@ static int __init check_sdias(void) return 0; } -static int __init get_mem_size(unsigned long *mem) +static int __init get_mem_info(unsigned long *mem, unsigned long *end) { int i; struct mem_chunk *chunk_array; @@ -610,33 +610,31 @@ static int __init get_mem_size(unsigned long *mem) GFP_KERNEL); if (!chunk_array) return -ENOMEM; - detect_memory_layout(chunk_array); + detect_memory_layout(chunk_array, 0); for (i = 0; i < MEMORY_CHUNKS; i++) { if (chunk_array[i].size == 0) break; *mem += chunk_array[i].size; + *end = max(*end, chunk_array[i].addr + chunk_array[i].size); } kfree(chunk_array); return 0; } -static int __init zcore_header_init(int arch, struct zcore_header *hdr) +static void __init zcore_header_init(int arch, struct zcore_header *hdr, + unsigned long mem_size) { - int rc, i; - unsigned long memory = 0; u32 prefix; + int i; if (arch == ARCH_S390X) hdr->arch_id = DUMP_ARCH_S390X; else hdr->arch_id = DUMP_ARCH_S390; - rc = get_mem_size(&memory); - if (rc) - return rc; - hdr->mem_size = memory; - hdr->rmem_size = memory; + hdr->mem_size = mem_size; + hdr->rmem_size = mem_size; hdr->mem_end = sys_info.mem_size; - hdr->num_pages = memory / PAGE_SIZE; + hdr->num_pages = mem_size / PAGE_SIZE; hdr->tod = get_tod_clock(); get_cpu_id(&hdr->cpu_id); for (i = 0; zfcpdump_save_areas[i]; i++) { @@ -647,7 +645,6 @@ static int __init zcore_header_init(int arch, struct zcore_header *hdr) hdr->lc_vec[hdr->cpu_cnt] = prefix; hdr->cpu_cnt++; } - return 0; } /* @@ -682,9 +679,11 @@ static int __init zcore_reipl_init(void) static int __init zcore_init(void) { + unsigned long mem_size, mem_end; unsigned char arch; int rc; + mem_size = mem_end = 0; if (ipl_info.type != IPL_TYPE_FCP_DUMP) return -ENODATA; if (OLDMEM_BASE) @@ -727,13 +726,14 @@ static int __init zcore_init(void) } #endif /* CONFIG_64BIT */ - rc = sys_info_init(arch); + rc = get_mem_info(&mem_size, &mem_end); if (rc) goto fail; - rc = zcore_header_init(arch, &zcore_header); + rc = sys_info_init(arch, mem_end); if (rc) goto fail; + zcore_header_init(arch, &zcore_header, mem_size); rc = zcore_reipl_init(); if (rc) diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 2d2a966a3b39..a9fe3de2dec1 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -1,7 +1,7 @@ /* * S/390 common I/O routines -- blacklisting of specific devices * - * Copyright IBM Corp. 1999, 2002 + * Copyright IBM Corp. 1999, 2013 * Author(s): Ingo Adlung ([email protected]) * Cornelia Huck ([email protected]) * Arnd Bergmann ([email protected]) @@ -17,8 +17,9 @@ #include <linux/ctype.h> #include <linux/device.h> -#include <asm/cio.h> #include <asm/uaccess.h> +#include <asm/cio.h> +#include <asm/ipl.h> #include "blacklist.h" #include "cio.h" @@ -172,6 +173,29 @@ static int blacklist_parse_parameters(char *str, range_action action, to_cssid = __MAX_CSSID; to_ssid = __MAX_SSID; to = __MAX_SUBCHANNEL; + } else if (strcmp(parm, "ipldev") == 0) { + if (ipl_info.type == IPL_TYPE_CCW) { + from_cssid = 0; + from_ssid = ipl_info.data.ccw.dev_id.ssid; + from = ipl_info.data.ccw.dev_id.devno; + } else if (ipl_info.type == IPL_TYPE_FCP || + ipl_info.type == IPL_TYPE_FCP_DUMP) { + from_cssid = 0; + from_ssid = ipl_info.data.fcp.dev_id.ssid; + from = ipl_info.data.fcp.dev_id.devno; + } else { + continue; + } + to_cssid = from_cssid; + to_ssid = from_ssid; + to = from; + } else if (strcmp(parm, "condev") == 0) { + if (console_devno == -1) + continue; + + from_cssid = to_cssid = 0; + from_ssid = to_ssid = 0; + from = to = console_devno; } else { rc = parse_busid(strsep(&parm, "-"), &from_cssid, &from_ssid, &from, msgtrigger); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index b8b340ac5332..9de41aa14896 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -954,15 +954,11 @@ EXPORT_SYMBOL(ap_driver_unregister); void ap_bus_force_rescan(void) { - /* Delete the AP bus rescan timer. */ - del_timer(&ap_config_timer); - - /* processing a synchonuous bus rescan */ - ap_scan_bus(NULL); - - /* Setup the AP bus rescan timer again. */ - ap_config_timer.expires = jiffies + ap_config_time * HZ; - add_timer(&ap_config_timer); + /* reconfigure the AP bus rescan timer. */ + mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ); + /* processing a asynchronous bus rescan */ + queue_work(ap_work_queue, &ap_config_work); + flush_work(&ap_config_work); } EXPORT_SYMBOL(ap_bus_force_rescan); @@ -1305,8 +1301,9 @@ static void ap_scan_bus(struct work_struct *unused) int rc, i; ap_query_configuration(); - if (ap_select_domain() != 0) + if (ap_select_domain() != 0) { return; + } for (i = 0; i < AP_DEVICES; i++) { qid = AP_MKQID(i, ap_domain_index); dev = bus_find_device(&ap_bus_type, NULL, diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 6711e65764b5..2ea6165366b6 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -443,29 +443,30 @@ static int __init test_devices_support(unsigned long addr) } /* * Init function for virtio - * devices are in a single page above top of "normal" mem + * devices are in a single page above top of "normal" + standby mem */ static int __init kvm_devices_init(void) { int rc; + unsigned long total_memory_size = sclp_get_rzm() * sclp_get_rnmax(); if (!MACHINE_IS_KVM) return -ENODEV; - if (test_devices_support(real_memory_size) < 0) + if (test_devices_support(total_memory_size) < 0) return -ENODEV; - rc = vmem_add_mapping(real_memory_size, PAGE_SIZE); + rc = vmem_add_mapping(total_memory_size, PAGE_SIZE); if (rc) return rc; - kvm_devices = (void *) real_memory_size; + kvm_devices = (void *) total_memory_size; kvm_root = root_device_register("kvm_s390"); if (IS_ERR(kvm_root)) { rc = PTR_ERR(kvm_root); printk(KERN_ERR "Could not register kvm_s390 root device"); - vmem_remove_mapping(real_memory_size, PAGE_SIZE); + vmem_remove_mapping(total_memory_size, PAGE_SIZE); return rc; } diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index fb877b59ec57..779dc5136291 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c @@ -31,6 +31,7 @@ #include <asm/irq.h> #include <asm/cio.h> #include <asm/ccwdev.h> +#include <asm/virtio-ccw.h> /* * virtio related functions @@ -77,12 +78,9 @@ struct virtio_ccw_vq_info { void *queue; struct vq_info_block *info_block; struct list_head node; + long cookie; }; -#define KVM_VIRTIO_CCW_RING_ALIGN 4096 - -#define KVM_S390_VIRTIO_CCW_NOTIFY 3 - #define CCW_CMD_SET_VQ 0x13 #define CCW_CMD_VDEV_RESET 0x33 #define CCW_CMD_SET_IND 0x43 @@ -135,8 +133,11 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, do { spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags); ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0); - if (!ret) + if (!ret) { + if (!vcdev->curr_io) + vcdev->err = 0; vcdev->curr_io |= flag; + } spin_unlock_irqrestore(get_ccwdev_lock(vcdev->cdev), flags); cpu_relax(); } while (ret == -EBUSY); @@ -145,15 +146,18 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev, } static inline long do_kvm_notify(struct subchannel_id schid, - unsigned long queue_index) + unsigned long queue_index, + long cookie) { register unsigned long __nr asm("1") = KVM_S390_VIRTIO_CCW_NOTIFY; register struct subchannel_id __schid asm("2") = schid; register unsigned long __index asm("3") = queue_index; register long __rc asm("2"); + register long __cookie asm("4") = cookie; asm volatile ("diag 2,4,0x500\n" - : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index) + : "=d" (__rc) : "d" (__nr), "d" (__schid), "d" (__index), + "d"(__cookie) : "memory", "cc"); return __rc; } @@ -166,7 +170,7 @@ static void virtio_ccw_kvm_notify(struct virtqueue *vq) vcdev = to_vc_device(info->vq->vdev); ccw_device_get_schid(vcdev->cdev, &schid); - do_kvm_notify(schid, vq->index); + info->cookie = do_kvm_notify(schid, vq->index, info->cookie); } static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 7373255aa1e8..846f475f62c1 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -2770,7 +2770,7 @@ static const struct file_operations mega_proc_fops = { .open = mega_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; /* diff --git a/drivers/staging/comedi/proc.c b/drivers/staging/comedi/proc.c index db790f9fc9db..886c202de9ab 100644 --- a/drivers/staging/comedi/proc.c +++ b/drivers/staging/comedi/proc.c @@ -86,7 +86,7 @@ static const struct file_operations comedi_proc_fops = { .open = comedi_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; void comedi_proc_init(void) diff --git a/drivers/staging/csr/io.c b/drivers/staging/csr/io.c index f9b5c22c00b8..fe4a7ba2acc9 100644 --- a/drivers/staging/csr/io.c +++ b/drivers/staging/csr/io.c @@ -95,7 +95,7 @@ static const struct file_operations uf_proc_fops = { .open = uf_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif /* CONFIG_PROC_FS */ diff --git a/drivers/staging/cxt1e1/sbeproc.c b/drivers/staging/cxt1e1/sbeproc.c index 49f10f0b7d29..9361dd8ce125 100644 --- a/drivers/staging/cxt1e1/sbeproc.c +++ b/drivers/staging/cxt1e1/sbeproc.c @@ -189,7 +189,7 @@ static const struct file_operations sbecom_proc_fops = { .open = sbecom_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; /* diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c index 21b369e0150f..94e426e4d98b 100644 --- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c +++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c @@ -158,7 +158,7 @@ static const struct file_operations ft1000_proc_fops = { .open = ft1000_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int ft1000NotifyProc(struct notifier_block *this, unsigned long event, diff --git a/drivers/staging/ft1000/ft1000-usb/ft1000_proc.c b/drivers/staging/ft1000/ft1000-usb/ft1000_proc.c index d8294d6c9560..eca6f0292b4b 100644 --- a/drivers/staging/ft1000/ft1000-usb/ft1000_proc.c +++ b/drivers/staging/ft1000/ft1000-usb/ft1000_proc.c @@ -160,7 +160,7 @@ static const struct file_operations ft1000_proc_fops = { .open = ft1000_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int diff --git a/drivers/staging/rtl8187se/r8180_core.c b/drivers/staging/rtl8187se/r8180_core.c index f7c1d9905ec6..ca691550436a 100644 --- a/drivers/staging/rtl8187se/r8180_core.c +++ b/drivers/staging/rtl8187se/r8180_core.c @@ -306,7 +306,7 @@ static const struct file_operations rtl8180_proc_fops = { .open = rtl8180_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; /* diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index 145923397556..71f5cde9ed1c 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -647,7 +647,7 @@ static const struct file_operations rtl8192_proc_fops = { .open = rtl8192_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; /* diff --git a/drivers/staging/wlags49_h2/wl_main.c b/drivers/staging/wlags49_h2/wl_main.c index c4264e8c877d..f28f15baea96 100644 --- a/drivers/staging/wlags49_h2/wl_main.c +++ b/drivers/staging/wlags49_h2/wl_main.c @@ -160,7 +160,7 @@ static const struct file_operations scull_read_procmem_fops = { .open = scull_read_procmem_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif /* SCULL_USE_PROC */ diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c index 451687cb9685..0d8465728473 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -1592,6 +1592,7 @@ static int __init sunsu_init(void) static void __exit sunsu_exit(void) { + platform_driver_unregister(&su_driver); if (sunsu_reg.nr) sunserial_unregister_minors(&sunsu_reg, sunsu_reg.nr); } diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index 2d8c1cfea699..a766a4ca1cb7 100644 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -2211,7 +2211,7 @@ static const struct file_operations fsl_proc_fops = { .open = fsl_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #define create_proc_file() proc_create(proc_filename, 0, NULL, &fsl_proc_fops) diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index 480eeb7cfd92..52dd6cc6c0aa 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -1214,7 +1214,7 @@ static const struct file_operations udc_proc_fops = { .open = udc_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif /* CONFIG_USB_GADGET_DEBUG_FILES */ diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 01443ce43ee7..13ddec92341c 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -61,15 +61,6 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) goto done; - - /* - * Otherwise it's an offset mount and we need to check - * if we can umount its mount, if there is one. - */ - if (!d_mountpoint(path.dentry)) { - status = 0; - goto done; - } } /* Update the expiry counter if fs is busy */ diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 9bd16255dd9c..085da86e07c2 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -408,7 +408,7 @@ done: return NULL; } -int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) +static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index a8ece9a33aef..2c9e62c2bfd0 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -16,21 +16,27 @@ #ifndef _ASM_GENERIC_CPUTIME_NSECS_H #define _ASM_GENERIC_CPUTIME_NSECS_H +#include <linux/math64.h> + typedef u64 __nocast cputime_t; typedef u64 __nocast cputime64_t; #define cputime_one_jiffy jiffies_to_cputime(1) +#define cputime_div(__ct, divisor) div_u64((__force u64)__ct, divisor) +#define cputime_div_rem(__ct, divisor, remainder) \ + div_u64_rem((__force u64)__ct, divisor, remainder); + /* * Convert cputime <-> jiffies (HZ) */ #define cputime_to_jiffies(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) + cputime_div(__ct, NSEC_PER_SEC / HZ) #define cputime_to_scaled(__ct) (__ct) #define jiffies_to_cputime(__jif) \ (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) #define cputime64_to_jiffies64(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) + cputime_div(__ct, NSEC_PER_SEC / HZ) #define jiffies64_to_cputime64(__jif) \ (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) @@ -45,7 +51,7 @@ typedef u64 __nocast cputime64_t; * Convert cputime <-> microseconds */ #define cputime_to_usecs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_USEC) + cputime_div(__ct, NSEC_PER_USEC) #define usecs_to_cputime(__usecs) \ (__force cputime_t)((__usecs) * NSEC_PER_USEC) #define usecs_to_cputime64(__usecs) \ @@ -55,7 +61,7 @@ typedef u64 __nocast cputime64_t; * Convert cputime <-> seconds */ #define cputime_to_secs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_SEC) + cputime_div(__ct, NSEC_PER_SEC) #define secs_to_cputime(__secs) \ (__force cputime_t)((__secs) * NSEC_PER_SEC) @@ -69,8 +75,10 @@ static inline cputime_t timespec_to_cputime(const struct timespec *val) } static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) { - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; + u32 rem; + + val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem); + val->tv_nsec = rem; } /* @@ -83,15 +91,17 @@ static inline cputime_t timeval_to_cputime(const struct timeval *val) } static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) { - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; + u32 rem; + + val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem); + val->tv_usec = rem / NSEC_PER_USEC; } /* * Convert cputime <-> clock (USER_HZ) */ #define cputime_to_clock_t(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) + cputime_div(__ct, (NSEC_PER_SEC / USER_HZ)) #define clock_t_to_cputime(__x) \ (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 0501fa3f783d..cccc86ecfeaa 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -1,4 +1,5 @@ #include <uapi/asm-generic/unistd.h> +#include <linux/export.h> /* * These are required system calls, we should diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index afa12c7a025c..eb58d2d7d971 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -52,13 +52,7 @@ #define LOAD_OFFSET 0 #endif -#ifndef SYMBOL_PREFIX -#define VMLINUX_SYMBOL(sym) sym -#else -#define PASTE2(x,y) x##y -#define PASTE(x,y) PASTE2(x,y) -#define VMLINUX_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym) -#endif +#include <linux/export.h> /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) diff --git a/include/linux/export.h b/include/linux/export.h index 696c0f48afc7..412cd509effe 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -5,17 +5,24 @@ * to reduce the amount of pointless cruft we feed to gcc when only * exporting a simple symbol or two. * - * If you feel the need to add #include <linux/foo.h> to this file - * then you are doing something wrong and should go away silently. + * Try not to add #includes here. It slows compilation and makes kernel + * hackers place grumpy comments in header files. */ /* Some toolchains use a `_' prefix for all user symbols. */ -#ifdef CONFIG_SYMBOL_PREFIX -#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX +#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX +#define __VMLINUX_SYMBOL(x) _##x +#define __VMLINUX_SYMBOL_STR(x) "_" #x #else -#define MODULE_SYMBOL_PREFIX "" +#define __VMLINUX_SYMBOL(x) x +#define __VMLINUX_SYMBOL_STR(x) #x #endif +/* Indirect, so macros are expanded before pasting. */ +#define VMLINUX_SYMBOL(x) __VMLINUX_SYMBOL(x) +#define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x) + +#ifndef __ASSEMBLY__ struct kernel_symbol { unsigned long value; @@ -51,7 +58,7 @@ extern struct module __this_module; __CRC_SYMBOL(sym, sec) \ static const char __kstrtab_##sym[] \ __attribute__((section("__ksymtab_strings"), aligned(1))) \ - = MODULE_SYMBOL_PREFIX #sym; \ + = VMLINUX_SYMBOL_STR(sym); \ static const struct kernel_symbol __ksymtab_##sym \ __used \ __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ @@ -85,5 +92,6 @@ extern struct module __this_module; #define EXPORT_UNUSED_SYMBOL_GPL(sym) #endif /* CONFIG_MODULES */ +#endif /* !__ASSEMBLY__ */ #endif /* _LINUX_EXPORT_H */ diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h index 530e11ba0738..01f595107048 100644 --- a/include/linux/i2c/twl4030-madc.h +++ b/include/linux/i2c/twl4030-madc.h @@ -39,6 +39,7 @@ struct twl4030_madc_conversion_method { * @do_avgP: sample the input channel for 4 consecutive cycles * @method: RT, SW1, SW2 * @type: Polling or interrupt based method + * @raw: Return raw value, do not convert it */ struct twl4030_madc_request { @@ -48,6 +49,7 @@ struct twl4030_madc_request { u16 type; bool active; bool result_pending; + bool raw; int rbuf[TWL4030_MADC_MAX_CHANNELS]; void (*func_cb)(int len, int channels, int *buf); }; diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 5f3aa6b11bfa..27e06acc509a 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -81,4 +81,23 @@ int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data, unsigned short *keymap, struct input_dev *input_dev); +#ifdef CONFIG_OF +/** + * matrix_keypad_parse_of_params() - Read parameters from matrix-keypad node + * + * @dev: Device containing of_node + * @rows: Returns number of matrix rows + * @cols: Returns number of matrix columns + * @return 0 if OK, <0 on error + */ +int matrix_keypad_parse_of_params(struct device *dev, + unsigned int *rows, unsigned int *cols); +#else +static inline int matrix_keypad_parse_of_params(struct device *dev, + unsigned int *rows, unsigned int *cols) +{ + return -ENOSYS; +} +#endif /* CONFIG_OF */ + #endif /* _MATRIX_KEYPAD_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6d1844f393c0..e96329ceb28c 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -786,13 +786,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) -/* This helps us to avoid #ifdef CONFIG_SYMBOL_PREFIX */ -#ifdef CONFIG_SYMBOL_PREFIX -#define SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX -#else -#define SYMBOL_PREFIX "" -#endif - /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c13958251927..f0eea07d2c2b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -117,14 +117,13 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_APF_HALT 12 #define KVM_REQ_STEAL_UPDATE 13 #define KVM_REQ_NMI 14 -#define KVM_REQ_IMMEDIATE_EXIT 15 -#define KVM_REQ_PMU 16 -#define KVM_REQ_PMI 17 -#define KVM_REQ_WATCHDOG 18 -#define KVM_REQ_MASTERCLOCK_UPDATE 19 -#define KVM_REQ_MCLOCK_INPROGRESS 20 -#define KVM_REQ_EPR_EXIT 21 -#define KVM_REQ_EOIBITMAP 22 +#define KVM_REQ_PMU 15 +#define KVM_REQ_PMI 16 +#define KVM_REQ_WATCHDOG 17 +#define KVM_REQ_MASTERCLOCK_UPDATE 18 +#define KVM_REQ_MCLOCK_INPROGRESS 19 +#define KVM_REQ_EPR_EXIT 20 +#define KVM_REQ_SCAN_IOAPIC 21 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -133,6 +132,9 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; +extern raw_spinlock_t kvm_lock; +extern struct list_head vm_list; + struct kvm_io_range { gpa_t addr; int len; @@ -149,6 +151,7 @@ struct kvm_io_bus { enum kvm_bus { KVM_MMIO_BUS, KVM_PIO_BUS, + KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_NR_BUSES }; @@ -252,6 +255,7 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif + bool preempted; struct kvm_vcpu_arch arch; }; @@ -285,7 +289,8 @@ struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level); + struct kvm *kvm, int irq_source_id, int level, + bool line_status); union { struct { unsigned irqchip; @@ -296,10 +301,10 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; struct kvm_kernel_irq_routing_entry *rt_entries; u32 nr_rt_entries; /* @@ -385,6 +390,7 @@ struct kvm { long mmu_notifier_count; #endif long tlbs_dirty; + struct list_head devices; }; #define kvm_err(fmt, ...) \ @@ -424,6 +430,19 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +int kvm_irqfd_init(void); +void kvm_irqfd_exit(void); +#else +static inline int kvm_irqfd_init(void) +{ + return 0; +} + +static inline void kvm_irqfd_exit(void) +{ +} +#endif int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); @@ -452,24 +471,39 @@ id_to_memslot(struct kvm_memslots *slots, int id) return slot; } +/* + * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: + * - create a new memory slot + * - delete an existing memory slot + * - modify an existing memory slot + * -- move it in the guest physical memory space + * -- just change its flags + * + * Since flags can be changed by some of these operations, the following + * differentiation is the best we can do for __kvm_set_memory_region(): + */ +enum kvm_mr_change { + KVM_MR_CREATE, + KVM_MR_DELETE, + KVM_MR_MOVE, + KVM_MR_FLAGS_ONLY, +}; + int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc); + struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - bool user_alloc); + enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, - struct kvm_memory_slot old, - bool user_alloc); + const struct kvm_memory_slot *old, + enum kvm_mr_change change); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); /* flush all memory translations */ @@ -539,7 +573,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); void kvm_make_mclock_inprogress_request(struct kvm *kvm); -void kvm_make_update_eoibitmap_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request(struct kvm *kvm); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -555,10 +589,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc); -int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level); + struct kvm_userspace_memory_region *mem); +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); @@ -632,7 +665,6 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); -void kvm_free_all_assigned_devices(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); @@ -684,15 +716,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -#ifdef __KVM_HAVE_IOAPIC -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, - union kvm_ioapic_redirect_entry *entry, - unsigned long *deliver_bitmask); -#endif -int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, + bool line_status); int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, - int irq_source_id, int level); + int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -705,7 +733,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); /* For vcpu->arch.iommu_flags */ #define KVM_IOMMU_CACHE_COHERENCY 0x1 -#ifdef CONFIG_IOMMU_API +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); int kvm_iommu_map_guest(struct kvm *kvm); @@ -714,7 +742,7 @@ int kvm_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); int kvm_deassign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); -#else /* CONFIG_IOMMU_API */ +#else static inline int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { @@ -726,28 +754,11 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm, { } -static inline int kvm_iommu_map_guest(struct kvm *kvm) -{ - return -ENODEV; -} - static inline int kvm_iommu_unmap_guest(struct kvm *kvm) { return 0; } - -static inline int kvm_assign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - return 0; -} - -static inline int kvm_deassign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - return 0; -} -#endif /* CONFIG_IOMMU_API */ +#endif static inline void __guest_enter(void) { @@ -921,7 +932,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) } #endif -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING #define KVM_MAX_IRQ_ROUTES 1024 @@ -930,6 +941,9 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); @@ -998,11 +1012,13 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } #endif -#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, unsigned long arg); +void kvm_free_all_assigned_devices(struct kvm *kvm); + #else static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, @@ -1011,6 +1027,8 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, return -ENOTTY; } +static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {} + #endif static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) @@ -1028,6 +1046,46 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } } +extern bool kvm_rebooting; + +struct kvm_device_ops; + +struct kvm_device { + struct kvm_device_ops *ops; + struct kvm *kvm; + void *private; + struct list_head vm_node; +}; + +/* create, destroy, and name are mandatory */ +struct kvm_device_ops { + const char *name; + int (*create)(struct kvm_device *dev, u32 type); + + /* + * Destroy is responsible for freeing dev. + * + * Destroy may be called before or after destructors are called + * on emulated I/O regions, depending on whether a reference is + * held by a vcpu or other kvm component that gets destroyed + * after the emulated I/O. + */ + void (*destroy)(struct kvm_device *dev); + + int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, + unsigned long arg); +}; + +void kvm_device_get(struct kvm_device *dev); +void kvm_device_put(struct kvm_device *dev); +struct kvm_device *kvm_device_from_filp(struct file *filp); + +extern struct kvm_device_ops kvm_mpic_ops; +extern struct kvm_device_ops kvm_xics_ops; + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) diff --git a/include/linux/linkage.h b/include/linux/linkage.h index de09dec25ec3..d3e8ad23a8e0 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -3,6 +3,7 @@ #include <linux/compiler.h> #include <linux/stringify.h> +#include <linux/export.h> #include <asm/linkage.h> #ifdef __cplusplus @@ -15,21 +16,18 @@ #define asmlinkage CPP_ASMLINKAGE #endif -#ifdef CONFIG_SYMBOL_PREFIX -#define __SYMBOL_NAME(x) CONFIG_SYMBOL_PREFIX __stringify(x) -#else -#define __SYMBOL_NAME(x) __stringify(x) -#endif - #ifndef cond_syscall -#define cond_syscall(x) asm(".weak\t" __SYMBOL_NAME(x) \ - "\n\t.set\t" __SYMBOL_NAME(x) "," __SYMBOL_NAME(sys_ni_syscall)); +#define cond_syscall(x) asm( \ + ".weak " VMLINUX_SYMBOL_STR(x) "\n\t" \ + ".set " VMLINUX_SYMBOL_STR(x) "," \ + VMLINUX_SYMBOL_STR(sys_ni_syscall)) #endif #ifndef SYSCALL_ALIAS -#define SYSCALL_ALIAS(alias, name) \ - asm ("\t.globl " __SYMBOL_NAME(alias) \ - "\n\t.set\t" __SYMBOL_NAME(alias) "," __SYMBOL_NAME(name)) +#define SYSCALL_ALIAS(alias, name) asm( \ + ".globl " VMLINUX_SYMBOL_STR(alias) "\n\t" \ + ".set " VMLINUX_SYMBOL_STR(alias) "," \ + VMLINUX_SYMBOL_STR(name)) #endif #define __page_aligned_data __section(.data..page_aligned) __aligned(PAGE_SIZE) diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index a0f940987a3e..80dead1f7100 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -78,6 +78,7 @@ struct arizona_micbias { unsigned int ext_cap:1; /** External capacitor fitted */ unsigned int discharge:1; /** Actively discharge */ unsigned int fast_start:1; /** Enable aggressive startup ramp rate */ + unsigned int bypass:1; /** Use bypass mode */ }; struct arizona_micd_config { @@ -104,7 +105,8 @@ struct arizona_pdata { /** If a direct 32kHz clock is provided on an MCLK specify it here */ int clk32k_src; - bool irq_active_high; /** IRQ polarity */ + /** Mode for primary IRQ (defaults to active low) */ + unsigned int irq_flags; /* Base GPIO */ int gpio_base; @@ -183,6 +185,9 @@ struct arizona_pdata { /** Haptic actuator type */ unsigned int hap_act; + + /** GPIO for primary IRQ (used for edge triggered emulation) */ + int irq_gpio; }; #endif diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h new file mode 100644 index 000000000000..032af7fc5b2e --- /dev/null +++ b/include/linux/mfd/cros_ec.h @@ -0,0 +1,170 @@ +/* + * ChromeOS EC multi-function device + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_MFD_CROS_EC_H +#define __LINUX_MFD_CROS_EC_H + +#include <linux/mfd/cros_ec_commands.h> + +/* + * Command interface between EC and AP, for LPC, I2C and SPI interfaces. + */ +enum { + EC_MSG_TX_HEADER_BYTES = 3, + EC_MSG_TX_TRAILER_BYTES = 1, + EC_MSG_TX_PROTO_BYTES = EC_MSG_TX_HEADER_BYTES + + EC_MSG_TX_TRAILER_BYTES, + EC_MSG_RX_PROTO_BYTES = 3, + + /* Max length of messages */ + EC_MSG_BYTES = EC_HOST_PARAM_SIZE + EC_MSG_TX_PROTO_BYTES, + +}; + +/** + * struct cros_ec_msg - A message sent to the EC, and its reply + * + * @version: Command version number (often 0) + * @cmd: Command to send (EC_CMD_...) + * @out_buf: Outgoing payload (to EC) + * @outlen: Outgoing length + * @in_buf: Incoming payload (from EC) + * @in_len: Incoming length + */ +struct cros_ec_msg { + u8 version; + u8 cmd; + uint8_t *out_buf; + int out_len; + uint8_t *in_buf; + int in_len; +}; + +/** + * struct cros_ec_device - Information about a ChromeOS EC device + * + * @name: Name of this EC interface + * @priv: Private data + * @irq: Interrupt to use + * @din: input buffer (from EC) + * @dout: output buffer (to EC) + * \note + * These two buffers will always be dword-aligned and include enough + * space for up to 7 word-alignment bytes also, so we can ensure that + * the body of the message is always dword-aligned (64-bit). + * + * We use this alignment to keep ARM and x86 happy. Probably word + * alignment would be OK, there might be a small performance advantage + * to using dword. + * @din_size: size of din buffer + * @dout_size: size of dout buffer + * @command_send: send a command + * @command_recv: receive a command + * @ec_name: name of EC device (e.g. 'chromeos-ec') + * @phys_name: name of physical comms layer (e.g. 'i2c-4') + * @parent: pointer to parent device (e.g. i2c or spi device) + * @dev: Device pointer + * dev_lock: Lock to prevent concurrent access + * @wake_enabled: true if this device can wake the system from sleep + * @was_wake_device: true if this device was set to wake the system from + * sleep at the last suspend + * @event_notifier: interrupt event notifier for transport devices + */ +struct cros_ec_device { + const char *name; + void *priv; + int irq; + uint8_t *din; + uint8_t *dout; + int din_size; + int dout_size; + int (*command_send)(struct cros_ec_device *ec, + uint16_t cmd, void *out_buf, int out_len); + int (*command_recv)(struct cros_ec_device *ec, + uint16_t cmd, void *in_buf, int in_len); + int (*command_sendrecv)(struct cros_ec_device *ec, + uint16_t cmd, void *out_buf, int out_len, + void *in_buf, int in_len); + int (*command_xfer)(struct cros_ec_device *ec, + struct cros_ec_msg *msg); + + const char *ec_name; + const char *phys_name; + struct device *parent; + + /* These are --private-- fields - do not assign */ + struct device *dev; + struct mutex dev_lock; + bool wake_enabled; + bool was_wake_device; + struct blocking_notifier_head event_notifier; +}; + +/** + * cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device + * + * This can be called by drivers to handle a suspend event. + * + * ec_dev: Device to suspend + * @return 0 if ok, -ve on error + */ +int cros_ec_suspend(struct cros_ec_device *ec_dev); + +/** + * cros_ec_resume - Handle a resume operation for the ChromeOS EC device + * + * This can be called by drivers to handle a resume event. + * + * @ec_dev: Device to resume + * @return 0 if ok, -ve on error + */ +int cros_ec_resume(struct cros_ec_device *ec_dev); + +/** + * cros_ec_prepare_tx - Prepare an outgoing message in the output buffer + * + * This is intended to be used by all ChromeOS EC drivers, but at present + * only SPI uses it. Once LPC uses the same protocol it can start using it. + * I2C could use it now, with a refactor of the existing code. + * + * @ec_dev: Device to register + * @msg: Message to write + */ +int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, + struct cros_ec_msg *msg); + +/** + * cros_ec_remove - Remove a ChromeOS EC + * + * Call this to deregister a ChromeOS EC. After this you should call + * cros_ec_free(). + * + * @ec_dev: Device to register + * @return 0 if ok, -ve on error + */ +int cros_ec_remove(struct cros_ec_device *ec_dev); + +/** + * cros_ec_register - Register a new ChromeOS EC, using the provided info + * + * Before calling this, allocate a pointer to a new device and then fill + * in all the fields up to the --private-- marker. + * + * @ec_dev: Device to register + * @return 0 if ok, -ve on error + */ +int cros_ec_register(struct cros_ec_device *ec_dev); + +#endif /* __LINUX_MFD_CROS_EC_H */ diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h new file mode 100644 index 000000000000..86fd06953bcd --- /dev/null +++ b/include/linux/mfd/cros_ec_commands.h @@ -0,0 +1,1369 @@ +/* + * Host communication command constants for ChromeOS EC + * + * Copyright (C) 2012 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * The ChromeOS EC multi function device is used to mux all the requests + * to the EC device for its multiple features: keyboard controller, + * battery charging and regulator control, firmware update. + * + * NOTE: This file is copied verbatim from the ChromeOS EC Open Source + * project in an attempt to make future updates easy to make. + */ + +#ifndef __CROS_EC_COMMANDS_H +#define __CROS_EC_COMMANDS_H + +/* + * Protocol overview + * + * request: CMD [ P0 P1 P2 ... Pn S ] + * response: ERR [ P0 P1 P2 ... Pn S ] + * + * where the bytes are defined as follow : + * - CMD is the command code. (defined by EC_CMD_ constants) + * - ERR is the error code. (defined by EC_RES_ constants) + * - Px is the optional payload. + * it is not sent if the error code is not success. + * (defined by ec_params_ and ec_response_ structures) + * - S is the checksum which is the sum of all payload bytes. + * + * On LPC, CMD and ERR are sent/received at EC_LPC_ADDR_KERNEL|USER_CMD + * and the payloads are sent/received at EC_LPC_ADDR_KERNEL|USER_PARAM. + * On I2C, all bytes are sent serially in the same message. + */ + +/* Current version of this protocol */ +#define EC_PROTO_VERSION 0x00000002 + +/* Command version mask */ +#define EC_VER_MASK(version) (1UL << (version)) + +/* I/O addresses for ACPI commands */ +#define EC_LPC_ADDR_ACPI_DATA 0x62 +#define EC_LPC_ADDR_ACPI_CMD 0x66 + +/* I/O addresses for host command */ +#define EC_LPC_ADDR_HOST_DATA 0x200 +#define EC_LPC_ADDR_HOST_CMD 0x204 + +/* I/O addresses for host command args and params */ +#define EC_LPC_ADDR_HOST_ARGS 0x800 +#define EC_LPC_ADDR_HOST_PARAM 0x804 +#define EC_HOST_PARAM_SIZE 0x0fc /* Size of param area in bytes */ + +/* I/O addresses for host command params, old interface */ +#define EC_LPC_ADDR_OLD_PARAM 0x880 +#define EC_OLD_PARAM_SIZE 0x080 /* Size of param area in bytes */ + +/* EC command register bit functions */ +#define EC_LPC_CMDR_DATA (1 << 0) /* Data ready for host to read */ +#define EC_LPC_CMDR_PENDING (1 << 1) /* Write pending to EC */ +#define EC_LPC_CMDR_BUSY (1 << 2) /* EC is busy processing a command */ +#define EC_LPC_CMDR_CMD (1 << 3) /* Last host write was a command */ +#define EC_LPC_CMDR_ACPI_BRST (1 << 4) /* Burst mode (not used) */ +#define EC_LPC_CMDR_SCI (1 << 5) /* SCI event is pending */ +#define EC_LPC_CMDR_SMI (1 << 6) /* SMI event is pending */ + +#define EC_LPC_ADDR_MEMMAP 0x900 +#define EC_MEMMAP_SIZE 255 /* ACPI IO buffer max is 255 bytes */ +#define EC_MEMMAP_TEXT_MAX 8 /* Size of a string in the memory map */ + +/* The offset address of each type of data in mapped memory. */ +#define EC_MEMMAP_TEMP_SENSOR 0x00 /* Temp sensors */ +#define EC_MEMMAP_FAN 0x10 /* Fan speeds */ +#define EC_MEMMAP_TEMP_SENSOR_B 0x18 /* Temp sensors (second set) */ +#define EC_MEMMAP_ID 0x20 /* 'E' 'C' */ +#define EC_MEMMAP_ID_VERSION 0x22 /* Version of data in 0x20 - 0x2f */ +#define EC_MEMMAP_THERMAL_VERSION 0x23 /* Version of data in 0x00 - 0x1f */ +#define EC_MEMMAP_BATTERY_VERSION 0x24 /* Version of data in 0x40 - 0x7f */ +#define EC_MEMMAP_SWITCHES_VERSION 0x25 /* Version of data in 0x30 - 0x33 */ +#define EC_MEMMAP_EVENTS_VERSION 0x26 /* Version of data in 0x34 - 0x3f */ +#define EC_MEMMAP_HOST_CMD_FLAGS 0x27 /* Host command interface flags */ +#define EC_MEMMAP_SWITCHES 0x30 +#define EC_MEMMAP_HOST_EVENTS 0x34 +#define EC_MEMMAP_BATT_VOLT 0x40 /* Battery Present Voltage */ +#define EC_MEMMAP_BATT_RATE 0x44 /* Battery Present Rate */ +#define EC_MEMMAP_BATT_CAP 0x48 /* Battery Remaining Capacity */ +#define EC_MEMMAP_BATT_FLAG 0x4c /* Battery State, defined below */ +#define EC_MEMMAP_BATT_DCAP 0x50 /* Battery Design Capacity */ +#define EC_MEMMAP_BATT_DVLT 0x54 /* Battery Design Voltage */ +#define EC_MEMMAP_BATT_LFCC 0x58 /* Battery Last Full Charge Capacity */ +#define EC_MEMMAP_BATT_CCNT 0x5c /* Battery Cycle Count */ +#define EC_MEMMAP_BATT_MFGR 0x60 /* Battery Manufacturer String */ +#define EC_MEMMAP_BATT_MODEL 0x68 /* Battery Model Number String */ +#define EC_MEMMAP_BATT_SERIAL 0x70 /* Battery Serial Number String */ +#define EC_MEMMAP_BATT_TYPE 0x78 /* Battery Type String */ + +/* Number of temp sensors at EC_MEMMAP_TEMP_SENSOR */ +#define EC_TEMP_SENSOR_ENTRIES 16 +/* + * Number of temp sensors at EC_MEMMAP_TEMP_SENSOR_B. + * + * Valid only if EC_MEMMAP_THERMAL_VERSION returns >= 2. + */ +#define EC_TEMP_SENSOR_B_ENTRIES 8 +#define EC_TEMP_SENSOR_NOT_PRESENT 0xff +#define EC_TEMP_SENSOR_ERROR 0xfe +#define EC_TEMP_SENSOR_NOT_POWERED 0xfd +#define EC_TEMP_SENSOR_NOT_CALIBRATED 0xfc +/* + * The offset of temperature value stored in mapped memory. This allows + * reporting a temperature range of 200K to 454K = -73C to 181C. + */ +#define EC_TEMP_SENSOR_OFFSET 200 + +#define EC_FAN_SPEED_ENTRIES 4 /* Number of fans at EC_MEMMAP_FAN */ +#define EC_FAN_SPEED_NOT_PRESENT 0xffff /* Entry not present */ +#define EC_FAN_SPEED_STALLED 0xfffe /* Fan stalled */ + +/* Battery bit flags at EC_MEMMAP_BATT_FLAG. */ +#define EC_BATT_FLAG_AC_PRESENT 0x01 +#define EC_BATT_FLAG_BATT_PRESENT 0x02 +#define EC_BATT_FLAG_DISCHARGING 0x04 +#define EC_BATT_FLAG_CHARGING 0x08 +#define EC_BATT_FLAG_LEVEL_CRITICAL 0x10 + +/* Switch flags at EC_MEMMAP_SWITCHES */ +#define EC_SWITCH_LID_OPEN 0x01 +#define EC_SWITCH_POWER_BUTTON_PRESSED 0x02 +#define EC_SWITCH_WRITE_PROTECT_DISABLED 0x04 +/* Recovery requested via keyboard */ +#define EC_SWITCH_KEYBOARD_RECOVERY 0x08 +/* Recovery requested via dedicated signal (from servo board) */ +#define EC_SWITCH_DEDICATED_RECOVERY 0x10 +/* Was fake developer mode switch; now unused. Remove in next refactor. */ +#define EC_SWITCH_IGNORE0 0x20 + +/* Host command interface flags */ +/* Host command interface supports LPC args (LPC interface only) */ +#define EC_HOST_CMD_FLAG_LPC_ARGS_SUPPORTED 0x01 + +/* Wireless switch flags */ +#define EC_WIRELESS_SWITCH_WLAN 0x01 +#define EC_WIRELESS_SWITCH_BLUETOOTH 0x02 + +/* + * This header file is used in coreboot both in C and ACPI code. The ACPI code + * is pre-processed to handle constants but the ASL compiler is unable to + * handle actual C code so keep it separate. + */ +#ifndef __ACPI__ + +/* LPC command status byte masks */ +/* EC has written a byte in the data register and host hasn't read it yet */ +#define EC_LPC_STATUS_TO_HOST 0x01 +/* Host has written a command/data byte and the EC hasn't read it yet */ +#define EC_LPC_STATUS_FROM_HOST 0x02 +/* EC is processing a command */ +#define EC_LPC_STATUS_PROCESSING 0x04 +/* Last write to EC was a command, not data */ +#define EC_LPC_STATUS_LAST_CMD 0x08 +/* EC is in burst mode. Unsupported by Chrome EC, so this bit is never set */ +#define EC_LPC_STATUS_BURST_MODE 0x10 +/* SCI event is pending (requesting SCI query) */ +#define EC_LPC_STATUS_SCI_PENDING 0x20 +/* SMI event is pending (requesting SMI query) */ +#define EC_LPC_STATUS_SMI_PENDING 0x40 +/* (reserved) */ +#define EC_LPC_STATUS_RESERVED 0x80 + +/* + * EC is busy. This covers both the EC processing a command, and the host has + * written a new command but the EC hasn't picked it up yet. + */ +#define EC_LPC_STATUS_BUSY_MASK \ + (EC_LPC_STATUS_FROM_HOST | EC_LPC_STATUS_PROCESSING) + +/* Host command response codes */ +enum ec_status { + EC_RES_SUCCESS = 0, + EC_RES_INVALID_COMMAND = 1, + EC_RES_ERROR = 2, + EC_RES_INVALID_PARAM = 3, + EC_RES_ACCESS_DENIED = 4, + EC_RES_INVALID_RESPONSE = 5, + EC_RES_INVALID_VERSION = 6, + EC_RES_INVALID_CHECKSUM = 7, + EC_RES_IN_PROGRESS = 8, /* Accepted, command in progress */ + EC_RES_UNAVAILABLE = 9, /* No response available */ + EC_RES_TIMEOUT = 10, /* We got a timeout */ + EC_RES_OVERFLOW = 11, /* Table / data overflow */ +}; + +/* + * Host event codes. Note these are 1-based, not 0-based, because ACPI query + * EC command uses code 0 to mean "no event pending". We explicitly specify + * each value in the enum listing so they won't change if we delete/insert an + * item or rearrange the list (it needs to be stable across platforms, not + * just within a single compiled instance). + */ +enum host_event_code { + EC_HOST_EVENT_LID_CLOSED = 1, + EC_HOST_EVENT_LID_OPEN = 2, + EC_HOST_EVENT_POWER_BUTTON = 3, + EC_HOST_EVENT_AC_CONNECTED = 4, + EC_HOST_EVENT_AC_DISCONNECTED = 5, + EC_HOST_EVENT_BATTERY_LOW = 6, + EC_HOST_EVENT_BATTERY_CRITICAL = 7, + EC_HOST_EVENT_BATTERY = 8, + EC_HOST_EVENT_THERMAL_THRESHOLD = 9, + EC_HOST_EVENT_THERMAL_OVERLOAD = 10, + EC_HOST_EVENT_THERMAL = 11, + EC_HOST_EVENT_USB_CHARGER = 12, + EC_HOST_EVENT_KEY_PRESSED = 13, + /* + * EC has finished initializing the host interface. The host can check + * for this event following sending a EC_CMD_REBOOT_EC command to + * determine when the EC is ready to accept subsequent commands. + */ + EC_HOST_EVENT_INTERFACE_READY = 14, + /* Keyboard recovery combo has been pressed */ + EC_HOST_EVENT_KEYBOARD_RECOVERY = 15, + + /* Shutdown due to thermal overload */ + EC_HOST_EVENT_THERMAL_SHUTDOWN = 16, + /* Shutdown due to battery level too low */ + EC_HOST_EVENT_BATTERY_SHUTDOWN = 17, + + /* + * The high bit of the event mask is not used as a host event code. If + * it reads back as set, then the entire event mask should be + * considered invalid by the host. This can happen when reading the + * raw event status via EC_MEMMAP_HOST_EVENTS but the LPC interface is + * not initialized on the EC, or improperly configured on the host. + */ + EC_HOST_EVENT_INVALID = 32 +}; +/* Host event mask */ +#define EC_HOST_EVENT_MASK(event_code) (1UL << ((event_code) - 1)) + +/* Arguments at EC_LPC_ADDR_HOST_ARGS */ +struct ec_lpc_host_args { + uint8_t flags; + uint8_t command_version; + uint8_t data_size; + /* + * Checksum; sum of command + flags + command_version + data_size + + * all params/response data bytes. + */ + uint8_t checksum; +} __packed; + +/* Flags for ec_lpc_host_args.flags */ +/* + * Args are from host. Data area at EC_LPC_ADDR_HOST_PARAM contains command + * params. + * + * If EC gets a command and this flag is not set, this is an old-style command. + * Command version is 0 and params from host are at EC_LPC_ADDR_OLD_PARAM with + * unknown length. EC must respond with an old-style response (that is, + * withouth setting EC_HOST_ARGS_FLAG_TO_HOST). + */ +#define EC_HOST_ARGS_FLAG_FROM_HOST 0x01 +/* + * Args are from EC. Data area at EC_LPC_ADDR_HOST_PARAM contains response. + * + * If EC responds to a command and this flag is not set, this is an old-style + * response. Command version is 0 and response data from EC is at + * EC_LPC_ADDR_OLD_PARAM with unknown length. + */ +#define EC_HOST_ARGS_FLAG_TO_HOST 0x02 + +/* + * Notes on commands: + * + * Each command is an 8-byte command value. Commands which take params or + * return response data specify structs for that data. If no struct is + * specified, the command does not input or output data, respectively. + * Parameter/response length is implicit in the structs. Some underlying + * communication protocols (I2C, SPI) may add length or checksum headers, but + * those are implementation-dependent and not defined here. + */ + +/*****************************************************************************/ +/* General / test commands */ + +/* + * Get protocol version, used to deal with non-backward compatible protocol + * changes. + */ +#define EC_CMD_PROTO_VERSION 0x00 + +struct ec_response_proto_version { + uint32_t version; +} __packed; + +/* + * Hello. This is a simple command to test the EC is responsive to + * commands. + */ +#define EC_CMD_HELLO 0x01 + +struct ec_params_hello { + uint32_t in_data; /* Pass anything here */ +} __packed; + +struct ec_response_hello { + uint32_t out_data; /* Output will be in_data + 0x01020304 */ +} __packed; + +/* Get version number */ +#define EC_CMD_GET_VERSION 0x02 + +enum ec_current_image { + EC_IMAGE_UNKNOWN = 0, + EC_IMAGE_RO, + EC_IMAGE_RW +}; + +struct ec_response_get_version { + /* Null-terminated version strings for RO, RW */ + char version_string_ro[32]; + char version_string_rw[32]; + char reserved[32]; /* Was previously RW-B string */ + uint32_t current_image; /* One of ec_current_image */ +} __packed; + +/* Read test */ +#define EC_CMD_READ_TEST 0x03 + +struct ec_params_read_test { + uint32_t offset; /* Starting value for read buffer */ + uint32_t size; /* Size to read in bytes */ +} __packed; + +struct ec_response_read_test { + uint32_t data[32]; +} __packed; + +/* + * Get build information + * + * Response is null-terminated string. + */ +#define EC_CMD_GET_BUILD_INFO 0x04 + +/* Get chip info */ +#define EC_CMD_GET_CHIP_INFO 0x05 + +struct ec_response_get_chip_info { + /* Null-terminated strings */ + char vendor[32]; + char name[32]; + char revision[32]; /* Mask version */ +} __packed; + +/* Get board HW version */ +#define EC_CMD_GET_BOARD_VERSION 0x06 + +struct ec_response_board_version { + uint16_t board_version; /* A monotonously incrementing number. */ +} __packed; + +/* + * Read memory-mapped data. + * + * This is an alternate interface to memory-mapped data for bus protocols + * which don't support direct-mapped memory - I2C, SPI, etc. + * + * Response is params.size bytes of data. + */ +#define EC_CMD_READ_MEMMAP 0x07 + +struct ec_params_read_memmap { + uint8_t offset; /* Offset in memmap (EC_MEMMAP_*) */ + uint8_t size; /* Size to read in bytes */ +} __packed; + +/* Read versions supported for a command */ +#define EC_CMD_GET_CMD_VERSIONS 0x08 + +struct ec_params_get_cmd_versions { + uint8_t cmd; /* Command to check */ +} __packed; + +struct ec_response_get_cmd_versions { + /* + * Mask of supported versions; use EC_VER_MASK() to compare with a + * desired version. + */ + uint32_t version_mask; +} __packed; + +/* + * Check EC communcations status (busy). This is needed on i2c/spi but not + * on lpc since it has its own out-of-band busy indicator. + * + * lpc must read the status from the command register. Attempting this on + * lpc will overwrite the args/parameter space and corrupt its data. + */ +#define EC_CMD_GET_COMMS_STATUS 0x09 + +/* Avoid using ec_status which is for return values */ +enum ec_comms_status { + EC_COMMS_STATUS_PROCESSING = 1 << 0, /* Processing cmd */ +}; + +struct ec_response_get_comms_status { + uint32_t flags; /* Mask of enum ec_comms_status */ +} __packed; + + +/*****************************************************************************/ +/* Flash commands */ + +/* Get flash info */ +#define EC_CMD_FLASH_INFO 0x10 + +struct ec_response_flash_info { + /* Usable flash size, in bytes */ + uint32_t flash_size; + /* + * Write block size. Write offset and size must be a multiple + * of this. + */ + uint32_t write_block_size; + /* + * Erase block size. Erase offset and size must be a multiple + * of this. + */ + uint32_t erase_block_size; + /* + * Protection block size. Protection offset and size must be a + * multiple of this. + */ + uint32_t protect_block_size; +} __packed; + +/* + * Read flash + * + * Response is params.size bytes of data. + */ +#define EC_CMD_FLASH_READ 0x11 + +struct ec_params_flash_read { + uint32_t offset; /* Byte offset to read */ + uint32_t size; /* Size to read in bytes */ +} __packed; + +/* Write flash */ +#define EC_CMD_FLASH_WRITE 0x12 + +struct ec_params_flash_write { + uint32_t offset; /* Byte offset to write */ + uint32_t size; /* Size to write in bytes */ + /* + * Data to write. Could really use EC_PARAM_SIZE - 8, but tidiest to + * use a power of 2 so writes stay aligned. + */ + uint8_t data[64]; +} __packed; + +/* Erase flash */ +#define EC_CMD_FLASH_ERASE 0x13 + +struct ec_params_flash_erase { + uint32_t offset; /* Byte offset to erase */ + uint32_t size; /* Size to erase in bytes */ +} __packed; + +/* + * Get/set flash protection. + * + * If mask!=0, sets/clear the requested bits of flags. Depending on the + * firmware write protect GPIO, not all flags will take effect immediately; + * some flags require a subsequent hard reset to take effect. Check the + * returned flags bits to see what actually happened. + * + * If mask=0, simply returns the current flags state. + */ +#define EC_CMD_FLASH_PROTECT 0x15 +#define EC_VER_FLASH_PROTECT 1 /* Command version 1 */ + +/* Flags for flash protection */ +/* RO flash code protected when the EC boots */ +#define EC_FLASH_PROTECT_RO_AT_BOOT (1 << 0) +/* + * RO flash code protected now. If this bit is set, at-boot status cannot + * be changed. + */ +#define EC_FLASH_PROTECT_RO_NOW (1 << 1) +/* Entire flash code protected now, until reboot. */ +#define EC_FLASH_PROTECT_ALL_NOW (1 << 2) +/* Flash write protect GPIO is asserted now */ +#define EC_FLASH_PROTECT_GPIO_ASSERTED (1 << 3) +/* Error - at least one bank of flash is stuck locked, and cannot be unlocked */ +#define EC_FLASH_PROTECT_ERROR_STUCK (1 << 4) +/* + * Error - flash protection is in inconsistent state. At least one bank of + * flash which should be protected is not protected. Usually fixed by + * re-requesting the desired flags, or by a hard reset if that fails. + */ +#define EC_FLASH_PROTECT_ERROR_INCONSISTENT (1 << 5) +/* Entile flash code protected when the EC boots */ +#define EC_FLASH_PROTECT_ALL_AT_BOOT (1 << 6) + +struct ec_params_flash_protect { + uint32_t mask; /* Bits in flags to apply */ + uint32_t flags; /* New flags to apply */ +} __packed; + +struct ec_response_flash_protect { + /* Current value of flash protect flags */ + uint32_t flags; + /* + * Flags which are valid on this platform. This allows the caller + * to distinguish between flags which aren't set vs. flags which can't + * be set on this platform. + */ + uint32_t valid_flags; + /* Flags which can be changed given the current protection state */ + uint32_t writable_flags; +} __packed; + +/* + * Note: commands 0x14 - 0x19 version 0 were old commands to get/set flash + * write protect. These commands may be reused with version > 0. + */ + +/* Get the region offset/size */ +#define EC_CMD_FLASH_REGION_INFO 0x16 +#define EC_VER_FLASH_REGION_INFO 1 + +enum ec_flash_region { + /* Region which holds read-only EC image */ + EC_FLASH_REGION_RO, + /* Region which holds rewritable EC image */ + EC_FLASH_REGION_RW, + /* + * Region which should be write-protected in the factory (a superset of + * EC_FLASH_REGION_RO) + */ + EC_FLASH_REGION_WP_RO, +}; + +struct ec_params_flash_region_info { + uint32_t region; /* enum ec_flash_region */ +} __packed; + +struct ec_response_flash_region_info { + uint32_t offset; + uint32_t size; +} __packed; + +/* Read/write VbNvContext */ +#define EC_CMD_VBNV_CONTEXT 0x17 +#define EC_VER_VBNV_CONTEXT 1 +#define EC_VBNV_BLOCK_SIZE 16 + +enum ec_vbnvcontext_op { + EC_VBNV_CONTEXT_OP_READ, + EC_VBNV_CONTEXT_OP_WRITE, +}; + +struct ec_params_vbnvcontext { + uint32_t op; + uint8_t block[EC_VBNV_BLOCK_SIZE]; +} __packed; + +struct ec_response_vbnvcontext { + uint8_t block[EC_VBNV_BLOCK_SIZE]; +} __packed; + +/*****************************************************************************/ +/* PWM commands */ + +/* Get fan target RPM */ +#define EC_CMD_PWM_GET_FAN_TARGET_RPM 0x20 + +struct ec_response_pwm_get_fan_rpm { + uint32_t rpm; +} __packed; + +/* Set target fan RPM */ +#define EC_CMD_PWM_SET_FAN_TARGET_RPM 0x21 + +struct ec_params_pwm_set_fan_target_rpm { + uint32_t rpm; +} __packed; + +/* Get keyboard backlight */ +#define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x22 + +struct ec_response_pwm_get_keyboard_backlight { + uint8_t percent; + uint8_t enabled; +} __packed; + +/* Set keyboard backlight */ +#define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x23 + +struct ec_params_pwm_set_keyboard_backlight { + uint8_t percent; +} __packed; + +/* Set target fan PWM duty cycle */ +#define EC_CMD_PWM_SET_FAN_DUTY 0x24 + +struct ec_params_pwm_set_fan_duty { + uint32_t percent; +} __packed; + +/*****************************************************************************/ +/* + * Lightbar commands. This looks worse than it is. Since we only use one HOST + * command to say "talk to the lightbar", we put the "and tell it to do X" part + * into a subcommand. We'll make separate structs for subcommands with + * different input args, so that we know how much to expect. + */ +#define EC_CMD_LIGHTBAR_CMD 0x28 + +struct rgb_s { + uint8_t r, g, b; +}; + +#define LB_BATTERY_LEVELS 4 +/* List of tweakable parameters. NOTE: It's __packed so it can be sent in a + * host command, but the alignment is the same regardless. Keep it that way. + */ +struct lightbar_params { + /* Timing */ + int google_ramp_up; + int google_ramp_down; + int s3s0_ramp_up; + int s0_tick_delay[2]; /* AC=0/1 */ + int s0a_tick_delay[2]; /* AC=0/1 */ + int s0s3_ramp_down; + int s3_sleep_for; + int s3_ramp_up; + int s3_ramp_down; + + /* Oscillation */ + uint8_t new_s0; + uint8_t osc_min[2]; /* AC=0/1 */ + uint8_t osc_max[2]; /* AC=0/1 */ + uint8_t w_ofs[2]; /* AC=0/1 */ + + /* Brightness limits based on the backlight and AC. */ + uint8_t bright_bl_off_fixed[2]; /* AC=0/1 */ + uint8_t bright_bl_on_min[2]; /* AC=0/1 */ + uint8_t bright_bl_on_max[2]; /* AC=0/1 */ + + /* Battery level thresholds */ + uint8_t battery_threshold[LB_BATTERY_LEVELS - 1]; + + /* Map [AC][battery_level] to color index */ + uint8_t s0_idx[2][LB_BATTERY_LEVELS]; /* AP is running */ + uint8_t s3_idx[2][LB_BATTERY_LEVELS]; /* AP is sleeping */ + + /* Color palette */ + struct rgb_s color[8]; /* 0-3 are Google colors */ +} __packed; + +struct ec_params_lightbar { + uint8_t cmd; /* Command (see enum lightbar_command) */ + union { + struct { + /* no args */ + } dump, off, on, init, get_seq, get_params; + + struct num { + uint8_t num; + } brightness, seq, demo; + + struct reg { + uint8_t ctrl, reg, value; + } reg; + + struct rgb { + uint8_t led, red, green, blue; + } rgb; + + struct lightbar_params set_params; + }; +} __packed; + +struct ec_response_lightbar { + union { + struct dump { + struct { + uint8_t reg; + uint8_t ic0; + uint8_t ic1; + } vals[23]; + } dump; + + struct get_seq { + uint8_t num; + } get_seq; + + struct lightbar_params get_params; + + struct { + /* no return params */ + } off, on, init, brightness, seq, reg, rgb, demo, set_params; + }; +} __packed; + +/* Lightbar commands */ +enum lightbar_command { + LIGHTBAR_CMD_DUMP = 0, + LIGHTBAR_CMD_OFF = 1, + LIGHTBAR_CMD_ON = 2, + LIGHTBAR_CMD_INIT = 3, + LIGHTBAR_CMD_BRIGHTNESS = 4, + LIGHTBAR_CMD_SEQ = 5, + LIGHTBAR_CMD_REG = 6, + LIGHTBAR_CMD_RGB = 7, + LIGHTBAR_CMD_GET_SEQ = 8, + LIGHTBAR_CMD_DEMO = 9, + LIGHTBAR_CMD_GET_PARAMS = 10, + LIGHTBAR_CMD_SET_PARAMS = 11, + LIGHTBAR_NUM_CMDS +}; + +/*****************************************************************************/ +/* Verified boot commands */ + +/* + * Note: command code 0x29 version 0 was VBOOT_CMD in Link EVT; it may be + * reused for other purposes with version > 0. + */ + +/* Verified boot hash command */ +#define EC_CMD_VBOOT_HASH 0x2A + +struct ec_params_vboot_hash { + uint8_t cmd; /* enum ec_vboot_hash_cmd */ + uint8_t hash_type; /* enum ec_vboot_hash_type */ + uint8_t nonce_size; /* Nonce size; may be 0 */ + uint8_t reserved0; /* Reserved; set 0 */ + uint32_t offset; /* Offset in flash to hash */ + uint32_t size; /* Number of bytes to hash */ + uint8_t nonce_data[64]; /* Nonce data; ignored if nonce_size=0 */ +} __packed; + +struct ec_response_vboot_hash { + uint8_t status; /* enum ec_vboot_hash_status */ + uint8_t hash_type; /* enum ec_vboot_hash_type */ + uint8_t digest_size; /* Size of hash digest in bytes */ + uint8_t reserved0; /* Ignore; will be 0 */ + uint32_t offset; /* Offset in flash which was hashed */ + uint32_t size; /* Number of bytes hashed */ + uint8_t hash_digest[64]; /* Hash digest data */ +} __packed; + +enum ec_vboot_hash_cmd { + EC_VBOOT_HASH_GET = 0, /* Get current hash status */ + EC_VBOOT_HASH_ABORT = 1, /* Abort calculating current hash */ + EC_VBOOT_HASH_START = 2, /* Start computing a new hash */ + EC_VBOOT_HASH_RECALC = 3, /* Synchronously compute a new hash */ +}; + +enum ec_vboot_hash_type { + EC_VBOOT_HASH_TYPE_SHA256 = 0, /* SHA-256 */ +}; + +enum ec_vboot_hash_status { + EC_VBOOT_HASH_STATUS_NONE = 0, /* No hash (not started, or aborted) */ + EC_VBOOT_HASH_STATUS_DONE = 1, /* Finished computing a hash */ + EC_VBOOT_HASH_STATUS_BUSY = 2, /* Busy computing a hash */ +}; + +/* + * Special values for offset for EC_VBOOT_HASH_START and EC_VBOOT_HASH_RECALC. + * If one of these is specified, the EC will automatically update offset and + * size to the correct values for the specified image (RO or RW). + */ +#define EC_VBOOT_HASH_OFFSET_RO 0xfffffffe +#define EC_VBOOT_HASH_OFFSET_RW 0xfffffffd + +/*****************************************************************************/ +/* USB charging control commands */ + +/* Set USB port charging mode */ +#define EC_CMD_USB_CHARGE_SET_MODE 0x30 + +struct ec_params_usb_charge_set_mode { + uint8_t usb_port_id; + uint8_t mode; +} __packed; + +/*****************************************************************************/ +/* Persistent storage for host */ + +/* Maximum bytes that can be read/written in a single command */ +#define EC_PSTORE_SIZE_MAX 64 + +/* Get persistent storage info */ +#define EC_CMD_PSTORE_INFO 0x40 + +struct ec_response_pstore_info { + /* Persistent storage size, in bytes */ + uint32_t pstore_size; + /* Access size; read/write offset and size must be a multiple of this */ + uint32_t access_size; +} __packed; + +/* + * Read persistent storage + * + * Response is params.size bytes of data. + */ +#define EC_CMD_PSTORE_READ 0x41 + +struct ec_params_pstore_read { + uint32_t offset; /* Byte offset to read */ + uint32_t size; /* Size to read in bytes */ +} __packed; + +/* Write persistent storage */ +#define EC_CMD_PSTORE_WRITE 0x42 + +struct ec_params_pstore_write { + uint32_t offset; /* Byte offset to write */ + uint32_t size; /* Size to write in bytes */ + uint8_t data[EC_PSTORE_SIZE_MAX]; +} __packed; + +/*****************************************************************************/ +/* Real-time clock */ + +/* RTC params and response structures */ +struct ec_params_rtc { + uint32_t time; +} __packed; + +struct ec_response_rtc { + uint32_t time; +} __packed; + +/* These use ec_response_rtc */ +#define EC_CMD_RTC_GET_VALUE 0x44 +#define EC_CMD_RTC_GET_ALARM 0x45 + +/* These all use ec_params_rtc */ +#define EC_CMD_RTC_SET_VALUE 0x46 +#define EC_CMD_RTC_SET_ALARM 0x47 + +/*****************************************************************************/ +/* Port80 log access */ + +/* Get last port80 code from previous boot */ +#define EC_CMD_PORT80_LAST_BOOT 0x48 + +struct ec_response_port80_last_boot { + uint16_t code; +} __packed; + +/*****************************************************************************/ +/* Thermal engine commands */ + +/* Set thershold value */ +#define EC_CMD_THERMAL_SET_THRESHOLD 0x50 + +struct ec_params_thermal_set_threshold { + uint8_t sensor_type; + uint8_t threshold_id; + uint16_t value; +} __packed; + +/* Get threshold value */ +#define EC_CMD_THERMAL_GET_THRESHOLD 0x51 + +struct ec_params_thermal_get_threshold { + uint8_t sensor_type; + uint8_t threshold_id; +} __packed; + +struct ec_response_thermal_get_threshold { + uint16_t value; +} __packed; + +/* Toggle automatic fan control */ +#define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x52 + +/* Get TMP006 calibration data */ +#define EC_CMD_TMP006_GET_CALIBRATION 0x53 + +struct ec_params_tmp006_get_calibration { + uint8_t index; +} __packed; + +struct ec_response_tmp006_get_calibration { + float s0; + float b0; + float b1; + float b2; +} __packed; + +/* Set TMP006 calibration data */ +#define EC_CMD_TMP006_SET_CALIBRATION 0x54 + +struct ec_params_tmp006_set_calibration { + uint8_t index; + uint8_t reserved[3]; /* Reserved; set 0 */ + float s0; + float b0; + float b1; + float b2; +} __packed; + +/*****************************************************************************/ +/* MKBP - Matrix KeyBoard Protocol */ + +/* + * Read key state + * + * Returns raw data for keyboard cols; see ec_response_mkbp_info.cols for + * expected response size. + */ +#define EC_CMD_MKBP_STATE 0x60 + +/* Provide information about the matrix : number of rows and columns */ +#define EC_CMD_MKBP_INFO 0x61 + +struct ec_response_mkbp_info { + uint32_t rows; + uint32_t cols; + uint8_t switches; +} __packed; + +/* Simulate key press */ +#define EC_CMD_MKBP_SIMULATE_KEY 0x62 + +struct ec_params_mkbp_simulate_key { + uint8_t col; + uint8_t row; + uint8_t pressed; +} __packed; + +/* Configure keyboard scanning */ +#define EC_CMD_MKBP_SET_CONFIG 0x64 +#define EC_CMD_MKBP_GET_CONFIG 0x65 + +/* flags */ +enum mkbp_config_flags { + EC_MKBP_FLAGS_ENABLE = 1, /* Enable keyboard scanning */ +}; + +enum mkbp_config_valid { + EC_MKBP_VALID_SCAN_PERIOD = 1 << 0, + EC_MKBP_VALID_POLL_TIMEOUT = 1 << 1, + EC_MKBP_VALID_MIN_POST_SCAN_DELAY = 1 << 3, + EC_MKBP_VALID_OUTPUT_SETTLE = 1 << 4, + EC_MKBP_VALID_DEBOUNCE_DOWN = 1 << 5, + EC_MKBP_VALID_DEBOUNCE_UP = 1 << 6, + EC_MKBP_VALID_FIFO_MAX_DEPTH = 1 << 7, +}; + +/* Configuration for our key scanning algorithm */ +struct ec_mkbp_config { + uint32_t valid_mask; /* valid fields */ + uint8_t flags; /* some flags (enum mkbp_config_flags) */ + uint8_t valid_flags; /* which flags are valid */ + uint16_t scan_period_us; /* period between start of scans */ + /* revert to interrupt mode after no activity for this long */ + uint32_t poll_timeout_us; + /* + * minimum post-scan relax time. Once we finish a scan we check + * the time until we are due to start the next one. If this time is + * shorter this field, we use this instead. + */ + uint16_t min_post_scan_delay_us; + /* delay between setting up output and waiting for it to settle */ + uint16_t output_settle_us; + uint16_t debounce_down_us; /* time for debounce on key down */ + uint16_t debounce_up_us; /* time for debounce on key up */ + /* maximum depth to allow for fifo (0 = no keyscan output) */ + uint8_t fifo_max_depth; +} __packed; + +struct ec_params_mkbp_set_config { + struct ec_mkbp_config config; +} __packed; + +struct ec_response_mkbp_get_config { + struct ec_mkbp_config config; +} __packed; + +/* Run the key scan emulation */ +#define EC_CMD_KEYSCAN_SEQ_CTRL 0x66 + +enum ec_keyscan_seq_cmd { + EC_KEYSCAN_SEQ_STATUS = 0, /* Get status information */ + EC_KEYSCAN_SEQ_CLEAR = 1, /* Clear sequence */ + EC_KEYSCAN_SEQ_ADD = 2, /* Add item to sequence */ + EC_KEYSCAN_SEQ_START = 3, /* Start running sequence */ + EC_KEYSCAN_SEQ_COLLECT = 4, /* Collect sequence summary data */ +}; + +enum ec_collect_flags { + /* + * Indicates this scan was processed by the EC. Due to timing, some + * scans may be skipped. + */ + EC_KEYSCAN_SEQ_FLAG_DONE = 1 << 0, +}; + +struct ec_collect_item { + uint8_t flags; /* some flags (enum ec_collect_flags) */ +}; + +struct ec_params_keyscan_seq_ctrl { + uint8_t cmd; /* Command to send (enum ec_keyscan_seq_cmd) */ + union { + struct { + uint8_t active; /* still active */ + uint8_t num_items; /* number of items */ + /* Current item being presented */ + uint8_t cur_item; + } status; + struct { + /* + * Absolute time for this scan, measured from the + * start of the sequence. + */ + uint32_t time_us; + uint8_t scan[0]; /* keyscan data */ + } add; + struct { + uint8_t start_item; /* First item to return */ + uint8_t num_items; /* Number of items to return */ + } collect; + }; +} __packed; + +struct ec_result_keyscan_seq_ctrl { + union { + struct { + uint8_t num_items; /* Number of items */ + /* Data for each item */ + struct ec_collect_item item[0]; + } collect; + }; +} __packed; + +/*****************************************************************************/ +/* Temperature sensor commands */ + +/* Read temperature sensor info */ +#define EC_CMD_TEMP_SENSOR_GET_INFO 0x70 + +struct ec_params_temp_sensor_get_info { + uint8_t id; +} __packed; + +struct ec_response_temp_sensor_get_info { + char sensor_name[32]; + uint8_t sensor_type; +} __packed; + +/*****************************************************************************/ + +/* + * Note: host commands 0x80 - 0x87 are reserved to avoid conflict with ACPI + * commands accidentally sent to the wrong interface. See the ACPI section + * below. + */ + +/*****************************************************************************/ +/* Host event commands */ + +/* + * Host event mask params and response structures, shared by all of the host + * event commands below. + */ +struct ec_params_host_event_mask { + uint32_t mask; +} __packed; + +struct ec_response_host_event_mask { + uint32_t mask; +} __packed; + +/* These all use ec_response_host_event_mask */ +#define EC_CMD_HOST_EVENT_GET_B 0x87 +#define EC_CMD_HOST_EVENT_GET_SMI_MASK 0x88 +#define EC_CMD_HOST_EVENT_GET_SCI_MASK 0x89 +#define EC_CMD_HOST_EVENT_GET_WAKE_MASK 0x8d + +/* These all use ec_params_host_event_mask */ +#define EC_CMD_HOST_EVENT_SET_SMI_MASK 0x8a +#define EC_CMD_HOST_EVENT_SET_SCI_MASK 0x8b +#define EC_CMD_HOST_EVENT_CLEAR 0x8c +#define EC_CMD_HOST_EVENT_SET_WAKE_MASK 0x8e +#define EC_CMD_HOST_EVENT_CLEAR_B 0x8f + +/*****************************************************************************/ +/* Switch commands */ + +/* Enable/disable LCD backlight */ +#define EC_CMD_SWITCH_ENABLE_BKLIGHT 0x90 + +struct ec_params_switch_enable_backlight { + uint8_t enabled; +} __packed; + +/* Enable/disable WLAN/Bluetooth */ +#define EC_CMD_SWITCH_ENABLE_WIRELESS 0x91 + +struct ec_params_switch_enable_wireless { + uint8_t enabled; +} __packed; + +/*****************************************************************************/ +/* GPIO commands. Only available on EC if write protect has been disabled. */ + +/* Set GPIO output value */ +#define EC_CMD_GPIO_SET 0x92 + +struct ec_params_gpio_set { + char name[32]; + uint8_t val; +} __packed; + +/* Get GPIO value */ +#define EC_CMD_GPIO_GET 0x93 + +struct ec_params_gpio_get { + char name[32]; +} __packed; +struct ec_response_gpio_get { + uint8_t val; +} __packed; + +/*****************************************************************************/ +/* I2C commands. Only available when flash write protect is unlocked. */ + +/* Read I2C bus */ +#define EC_CMD_I2C_READ 0x94 + +struct ec_params_i2c_read { + uint16_t addr; + uint8_t read_size; /* Either 8 or 16. */ + uint8_t port; + uint8_t offset; +} __packed; +struct ec_response_i2c_read { + uint16_t data; +} __packed; + +/* Write I2C bus */ +#define EC_CMD_I2C_WRITE 0x95 + +struct ec_params_i2c_write { + uint16_t data; + uint16_t addr; + uint8_t write_size; /* Either 8 or 16. */ + uint8_t port; + uint8_t offset; +} __packed; + +/*****************************************************************************/ +/* Charge state commands. Only available when flash write protect unlocked. */ + +/* Force charge state machine to stop in idle mode */ +#define EC_CMD_CHARGE_FORCE_IDLE 0x96 + +struct ec_params_force_idle { + uint8_t enabled; +} __packed; + +/*****************************************************************************/ +/* Console commands. Only available when flash write protect is unlocked. */ + +/* Snapshot console output buffer for use by EC_CMD_CONSOLE_READ. */ +#define EC_CMD_CONSOLE_SNAPSHOT 0x97 + +/* + * Read next chunk of data from saved snapshot. + * + * Response is null-terminated string. Empty string, if there is no more + * remaining output. + */ +#define EC_CMD_CONSOLE_READ 0x98 + +/*****************************************************************************/ + +/* + * Cut off battery power output if the battery supports. + * + * For unsupported battery, just don't implement this command and lets EC + * return EC_RES_INVALID_COMMAND. + */ +#define EC_CMD_BATTERY_CUT_OFF 0x99 + +/*****************************************************************************/ +/* Temporary debug commands. TODO: remove this crosbug.com/p/13849 */ + +/* + * Dump charge state machine context. + * + * Response is a binary dump of charge state machine context. + */ +#define EC_CMD_CHARGE_DUMP 0xa0 + +/* + * Set maximum battery charging current. + */ +#define EC_CMD_CHARGE_CURRENT_LIMIT 0xa1 + +struct ec_params_current_limit { + uint32_t limit; +} __packed; + +/*****************************************************************************/ +/* System commands */ + +/* + * TODO: this is a confusing name, since it doesn't necessarily reboot the EC. + * Rename to "set image" or something similar. + */ +#define EC_CMD_REBOOT_EC 0xd2 + +/* Command */ +enum ec_reboot_cmd { + EC_REBOOT_CANCEL = 0, /* Cancel a pending reboot */ + EC_REBOOT_JUMP_RO = 1, /* Jump to RO without rebooting */ + EC_REBOOT_JUMP_RW = 2, /* Jump to RW without rebooting */ + /* (command 3 was jump to RW-B) */ + EC_REBOOT_COLD = 4, /* Cold-reboot */ + EC_REBOOT_DISABLE_JUMP = 5, /* Disable jump until next reboot */ + EC_REBOOT_HIBERNATE = 6 /* Hibernate EC */ +}; + +/* Flags for ec_params_reboot_ec.reboot_flags */ +#define EC_REBOOT_FLAG_RESERVED0 (1 << 0) /* Was recovery request */ +#define EC_REBOOT_FLAG_ON_AP_SHUTDOWN (1 << 1) /* Reboot after AP shutdown */ + +struct ec_params_reboot_ec { + uint8_t cmd; /* enum ec_reboot_cmd */ + uint8_t flags; /* See EC_REBOOT_FLAG_* */ +} __packed; + +/* + * Get information on last EC panic. + * + * Returns variable-length platform-dependent panic information. See panic.h + * for details. + */ +#define EC_CMD_GET_PANIC_INFO 0xd3 + +/*****************************************************************************/ +/* + * ACPI commands + * + * These are valid ONLY on the ACPI command/data port. + */ + +/* + * ACPI Read Embedded Controller + * + * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*). + * + * Use the following sequence: + * + * - Write EC_CMD_ACPI_READ to EC_LPC_ADDR_ACPI_CMD + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write address to EC_LPC_ADDR_ACPI_DATA + * - Wait for EC_LPC_CMDR_DATA bit to set + * - Read value from EC_LPC_ADDR_ACPI_DATA + */ +#define EC_CMD_ACPI_READ 0x80 + +/* + * ACPI Write Embedded Controller + * + * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*). + * + * Use the following sequence: + * + * - Write EC_CMD_ACPI_WRITE to EC_LPC_ADDR_ACPI_CMD + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write address to EC_LPC_ADDR_ACPI_DATA + * - Wait for EC_LPC_CMDR_PENDING bit to clear + * - Write value to EC_LPC_ADDR_ACPI_DATA + */ +#define EC_CMD_ACPI_WRITE 0x81 + +/* + * ACPI Query Embedded Controller + * + * This clears the lowest-order bit in the currently pending host events, and + * sets the result code to the 1-based index of the bit (event 0x00000001 = 1, + * event 0x80000000 = 32), or 0 if no event was pending. + */ +#define EC_CMD_ACPI_QUERY_EVENT 0x84 + +/* Valid addresses in ACPI memory space, for read/write commands */ +/* Memory space version; set to EC_ACPI_MEM_VERSION_CURRENT */ +#define EC_ACPI_MEM_VERSION 0x00 +/* + * Test location; writing value here updates test compliment byte to (0xff - + * value). + */ +#define EC_ACPI_MEM_TEST 0x01 +/* Test compliment; writes here are ignored. */ +#define EC_ACPI_MEM_TEST_COMPLIMENT 0x02 +/* Keyboard backlight brightness percent (0 - 100) */ +#define EC_ACPI_MEM_KEYBOARD_BACKLIGHT 0x03 + +/* Current version of ACPI memory address space */ +#define EC_ACPI_MEM_VERSION_CURRENT 1 + + +/*****************************************************************************/ +/* + * Special commands + * + * These do not follow the normal rules for commands. See each command for + * details. + */ + +/* + * Reboot NOW + * + * This command will work even when the EC LPC interface is busy, because the + * reboot command is processed at interrupt level. Note that when the EC + * reboots, the host will reboot too, so there is no response to this command. + * + * Use EC_CMD_REBOOT_EC to reboot the EC more politely. + */ +#define EC_CMD_REBOOT 0xd1 /* Think "die" */ + +/* + * Resend last response (not supported on LPC). + * + * Returns EC_RES_UNAVAILABLE if there is no response available - for example, + * there was no previous command, or the previous command's response was too + * big to save. + */ +#define EC_CMD_RESEND_RESPONSE 0xdb + +/* + * This header byte on a command indicate version 0. Any header byte less + * than this means that we are talking to an old EC which doesn't support + * versioning. In that case, we assume version 0. + * + * Header bytes greater than this indicate a later version. For example, + * EC_CMD_VERSION0 + 1 means we are using version 1. + * + * The old EC interface must not use commands 0dc or higher. + */ +#define EC_CMD_VERSION0 0xdc + +#endif /* !__ACPI__ */ + +#endif /* __CROS_EC_COMMANDS_H */ diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index ecddc5173c7c..8f21daf62fb5 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -1,9 +1,10 @@ /* * TI Palmas * - * Copyright 2011 Texas Instruments Inc. + * Copyright 2011-2013 Texas Instruments Inc. * * Author: Graeme Gregory <[email protected]> + * Author: Ian Lartey <[email protected]> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -22,6 +23,15 @@ #define PALMAS_NUM_CLIENTS 3 +/* The ID_REVISION NUMBERS */ +#define PALMAS_CHIP_OLD_ID 0x0000 +#define PALMAS_CHIP_ID 0xC035 +#define PALMAS_CHIP_CHARGER_ID 0xC036 + +#define is_palmas(a) (((a) == PALMAS_CHIP_OLD_ID) || \ + ((a) == PALMAS_CHIP_ID)) +#define is_palmas_charger(a) ((a) == PALMAS_CHIP_CHARGER_ID) + struct palmas_pmic; struct palmas_gpadc; struct palmas_resource; diff --git a/include/linux/mfd/retu.h b/include/linux/mfd/retu.h index 1e2715d5b836..65471c4a3926 100644 --- a/include/linux/mfd/retu.h +++ b/include/linux/mfd/retu.h @@ -1,5 +1,5 @@ /* - * Retu MFD driver interface + * Retu/Tahvo MFD driver interface * * This file is subject to the terms and conditions of the GNU General * Public License. See the file "COPYING" in the main directory of this @@ -19,4 +19,10 @@ int retu_write(struct retu_dev *, u8, u16); #define RETU_REG_CC1 0x0d /* Common control register 1 */ #define RETU_REG_STATUS 0x16 /* Status register */ +/* Interrupt sources */ +#define TAHVO_INT_VBUS 0 /* VBUS state */ + +/* Interrupt status */ +#define TAHVO_STAT_VBUS (1 << TAHVO_INT_VBUS) + #endif /* __LINUX_MFD_RETU_H */ diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 26ea7f1b7caf..86bc635f8385 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -500,6 +500,8 @@ #define BPP_POWER_15_PERCENT_ON 0x08 #define BPP_POWER_ON 0x00 #define BPP_POWER_MASK 0x0F +#define SD_VCC_PARTIAL_POWER_ON 0x02 +#define SD_VCC_POWER_ON 0x00 /* PWR_GATE_CTRL */ #define PWR_GATE_EN 0x01 @@ -689,6 +691,40 @@ #define IMAGE_FLAG_ADDR0 0xCE80 #define IMAGE_FLAG_ADDR1 0xCE81 +/* Phy register */ +#define PHY_PCR 0x00 +#define PHY_RCR0 0x01 +#define PHY_RCR1 0x02 +#define PHY_RCR2 0x03 +#define PHY_RTCR 0x04 +#define PHY_RDR 0x05 +#define PHY_TCR0 0x06 +#define PHY_TCR1 0x07 +#define PHY_TUNE 0x08 +#define PHY_IMR 0x09 +#define PHY_BPCR 0x0A +#define PHY_BIST 0x0B +#define PHY_RAW_L 0x0C +#define PHY_RAW_H 0x0D +#define PHY_RAW_DATA 0x0E +#define PHY_HOST_CLK_CTRL 0x0F +#define PHY_DMR 0x10 +#define PHY_BACR 0x11 +#define PHY_IER 0x12 +#define PHY_BCSR 0x13 +#define PHY_BPR 0x14 +#define PHY_BPNR2 0x15 +#define PHY_BPNR 0x16 +#define PHY_BRNR2 0x17 +#define PHY_BENR 0x18 +#define PHY_REG_REV 0x19 +#define PHY_FLD0 0x1A +#define PHY_FLD1 0x1B +#define PHY_FLD2 0x1C +#define PHY_FLD3 0x1D +#define PHY_FLD4 0x1E +#define PHY_DUM_REG 0x1F + #define rtsx_pci_init_cmd(pcr) ((pcr)->ci = 0) struct rtsx_pcr; diff --git a/include/linux/mfd/si476x-core.h b/include/linux/mfd/si476x-core.h new file mode 100644 index 000000000000..ba89b94e4a56 --- /dev/null +++ b/include/linux/mfd/si476x-core.h @@ -0,0 +1,533 @@ +/* + * include/media/si476x-core.h -- Common definitions for si476x core + * device + * + * Copyright (C) 2012 Innovative Converged Devices(ICD) + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef SI476X_CORE_H +#define SI476X_CORE_H + +#include <linux/kfifo.h> +#include <linux/atomic.h> +#include <linux/i2c.h> +#include <linux/regmap.h> +#include <linux/mutex.h> +#include <linux/mfd/core.h> +#include <linux/videodev2.h> +#include <linux/regulator/consumer.h> + +#include <linux/mfd/si476x-platform.h> +#include <linux/mfd/si476x-reports.h> + +/* Command Timeouts */ +#define SI476X_DEFAULT_TIMEOUT 100000 +#define SI476X_TIMEOUT_TUNE 700000 +#define SI476X_TIMEOUT_POWER_UP 330000 +#define SI476X_STATUS_POLL_US 0 + +/* -------------------- si476x-i2c.c ----------------------- */ + +enum si476x_freq_supported_chips { + SI476X_CHIP_SI4761 = 1, + SI476X_CHIP_SI4764, + SI476X_CHIP_SI4768, +}; + +enum si476x_part_revisions { + SI476X_REVISION_A10 = 0, + SI476X_REVISION_A20 = 1, + SI476X_REVISION_A30 = 2, +}; + +enum si476x_mfd_cells { + SI476X_RADIO_CELL = 0, + SI476X_CODEC_CELL, + SI476X_MFD_CELLS, +}; + +/** + * enum si476x_power_state - possible power state of the si476x + * device. + * + * @SI476X_POWER_DOWN: In this state all regulators are turned off + * and the reset line is pulled low. The device is completely + * inactive. + * @SI476X_POWER_UP_FULL: In this state all the power regualtors are + * turned on, reset line pulled high, IRQ line is enabled(polling is + * active for polling use scenario) and device is turned on with + * POWER_UP command. The device is ready to be used. + * @SI476X_POWER_INCONSISTENT: This state indicates that previous + * power down was inconsistent, meaning some of the regulators were + * not turned down and thus use of the device, without power-cycling + * is impossible. + */ +enum si476x_power_state { + SI476X_POWER_DOWN = 0, + SI476X_POWER_UP_FULL = 1, + SI476X_POWER_INCONSISTENT = 2, +}; + +/** + * struct si476x_core - internal data structure representing the + * underlying "core" device which all the MFD cell-devices use. + * + * @client: Actual I2C client used to transfer commands to the chip. + * @chip_id: Last digit of the chip model(E.g. "1" for SI4761) + * @cells: MFD cell devices created by this driver. + * @cmd_lock: Mutex used to serialize all the requests to the core + * device. This filed should not be used directly. Instead + * si476x_core_lock()/si476x_core_unlock() should be used to get + * exclusive access to the "core" device. + * @users: Active users counter(Used by the radio cell) + * @rds_read_queue: Wait queue used to wait for RDS data. + * @rds_fifo: FIFO in which all the RDS data received from the chip is + * placed. + * @rds_fifo_drainer: Worker that drains on-chip RDS FIFO. + * @rds_drainer_is_working: Flag used for launching only one instance + * of the @rds_fifo_drainer. + * @rds_drainer_status_lock: Lock used to guard access to the + * @rds_drainer_is_working variable. + * @command: Wait queue for wainting on the command comapletion. + * @cts: Clear To Send flag set upon receiving first status with CTS + * set. + * @tuning: Wait queue used for wainting for tune/seek comand + * completion. + * @stc: Similar to @cts, but for the STC bit of the status value. + * @power_up_parameters: Parameters used as argument for POWER_UP + * command when the device is started. + * @state: Current power state of the device. + * @supplues: Structure containing handles to all power supplies used + * by the device (NULL ones are ignored). + * @gpio_reset: GPIO pin connectet to the RSTB pin of the chip. + * @pinmux: Chip's configurable pins configuration. + * @diversity_mode: Chips role when functioning in diversity mode. + * @status_monitor: Polling worker used in polling use case scenarion + * (when IRQ is not avalible). + * @revision: Chip's running firmware revision number(Used for correct + * command set support). + */ + +struct si476x_core { + struct i2c_client *client; + struct regmap *regmap; + int chip_id; + struct mfd_cell cells[SI476X_MFD_CELLS]; + + struct mutex cmd_lock; /* for serializing fm radio operations */ + atomic_t users; + + wait_queue_head_t rds_read_queue; + struct kfifo rds_fifo; + struct work_struct rds_fifo_drainer; + bool rds_drainer_is_working; + struct mutex rds_drainer_status_lock; + + wait_queue_head_t command; + atomic_t cts; + + wait_queue_head_t tuning; + atomic_t stc; + + struct si476x_power_up_args power_up_parameters; + + enum si476x_power_state power_state; + + struct regulator_bulk_data supplies[4]; + + int gpio_reset; + + struct si476x_pinmux pinmux; + enum si476x_phase_diversity_mode diversity_mode; + + atomic_t is_alive; + + struct delayed_work status_monitor; +#define SI476X_WORK_TO_CORE(w) container_of(to_delayed_work(w), \ + struct si476x_core, \ + status_monitor) + + int revision; + + int rds_fifo_depth; +}; + +static inline struct si476x_core *i2c_mfd_cell_to_core(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev->parent); + return i2c_get_clientdata(client); +} + + +/** + * si476x_core_lock() - lock the core device to get an exclusive access + * to it. + */ +static inline void si476x_core_lock(struct si476x_core *core) +{ + mutex_lock(&core->cmd_lock); +} + +/** + * si476x_core_unlock() - unlock the core device to relinquish an + * exclusive access to it. + */ +static inline void si476x_core_unlock(struct si476x_core *core) +{ + mutex_unlock(&core->cmd_lock); +} + +/* *_TUNE_FREQ family of commands accept frequency in multiples of + 10kHz */ +static inline u16 hz_to_si476x(struct si476x_core *core, int freq) +{ + u16 result; + + switch (core->power_up_parameters.func) { + default: + case SI476X_FUNC_FM_RECEIVER: + result = freq / 10000; + break; + case SI476X_FUNC_AM_RECEIVER: + result = freq / 1000; + break; + } + + return result; +} + +static inline int si476x_to_hz(struct si476x_core *core, u16 freq) +{ + int result; + + switch (core->power_up_parameters.func) { + default: + case SI476X_FUNC_FM_RECEIVER: + result = freq * 10000; + break; + case SI476X_FUNC_AM_RECEIVER: + result = freq * 1000; + break; + } + + return result; +} + +/* Since the V4L2_TUNER_CAP_LOW flag is supplied, V4L2 subsystem + * mesures frequency in 62.5 Hz units */ + +static inline int hz_to_v4l2(int freq) +{ + return (freq * 10) / 625; +} + +static inline int v4l2_to_hz(int freq) +{ + return (freq * 625) / 10; +} + +static inline u16 v4l2_to_si476x(struct si476x_core *core, int freq) +{ + return hz_to_si476x(core, v4l2_to_hz(freq)); +} + +static inline int si476x_to_v4l2(struct si476x_core *core, u16 freq) +{ + return hz_to_v4l2(si476x_to_hz(core, freq)); +} + + + +/** + * struct si476x_func_info - structure containing result of the + * FUNC_INFO command. + * + * @firmware.major: Firmware major number. + * @firmware.minor[...]: Firmware minor numbers. + * @patch_id: + * @func: Mode tuner is working in. + */ +struct si476x_func_info { + struct { + u8 major, minor[2]; + } firmware; + u16 patch_id; + enum si476x_func func; +}; + +/** + * struct si476x_power_down_args - structure used to pass parameters + * to POWER_DOWN command + * + * @xosc: true - Power down, but leav oscillator running. + * false - Full power down. + */ +struct si476x_power_down_args { + bool xosc; +}; + +/** + * enum si476x_tunemode - enum representing possible tune modes for + * the chip. + * @SI476X_TM_VALIDATED_NORMAL_TUNE: Unconditionally stay on the new + * channel after tune, tune status is valid. + * @SI476X_TM_INVALIDATED_FAST_TUNE: Unconditionally stay in the new + * channel after tune, tune status invalid. + * @SI476X_TM_VALIDATED_AF_TUNE: Jump back to previous channel if + * metric thresholds are not met. + * @SI476X_TM_VALIDATED_AF_CHECK: Unconditionally jump back to the + * previous channel. + */ +enum si476x_tunemode { + SI476X_TM_VALIDATED_NORMAL_TUNE = 0, + SI476X_TM_INVALIDATED_FAST_TUNE = 1, + SI476X_TM_VALIDATED_AF_TUNE = 2, + SI476X_TM_VALIDATED_AF_CHECK = 3, +}; + +/** + * enum si476x_smoothmetrics - enum containing the possible setting fo + * audio transitioning of the chip + * @SI476X_SM_INITIALIZE_AUDIO: Initialize audio state to match this + * new channel + * @SI476X_SM_TRANSITION_AUDIO: Transition audio state from previous + * channel values to the new values + */ +enum si476x_smoothmetrics { + SI476X_SM_INITIALIZE_AUDIO = 0, + SI476X_SM_TRANSITION_AUDIO = 1, +}; + +/** + * struct si476x_rds_status_report - the structure representing the + * response to 'FM_RD_STATUS' command + * @rdstpptyint: Traffic program flag(TP) and/or program type(PTY) + * code has changed. + * @rdspiint: Program indentifiaction(PI) code has changed. + * @rdssyncint: RDS synchronization has changed. + * @rdsfifoint: RDS was received and the RDS FIFO has at least + * 'FM_RDS_INTERRUPT_FIFO_COUNT' elements in it. + * @tpptyvalid: TP flag and PTY code are valid falg. + * @pivalid: PI code is valid flag. + * @rdssync: RDS is currently synchronized. + * @rdsfifolost: On or more RDS groups have been lost/discarded flag. + * @tp: Current channel's TP flag. + * @pty: Current channel's PTY code. + * @pi: Current channel's PI code. + * @rdsfifoused: Number of blocks remaining in the RDS FIFO (0 if + * empty). + */ +struct si476x_rds_status_report { + bool rdstpptyint, rdspiint, rdssyncint, rdsfifoint; + bool tpptyvalid, pivalid, rdssync, rdsfifolost; + bool tp; + + u8 pty; + u16 pi; + + u8 rdsfifoused; + u8 ble[4]; + + struct v4l2_rds_data rds[4]; +}; + +struct si476x_rsq_status_args { + bool primary; + bool rsqack; + bool attune; + bool cancel; + bool stcack; +}; + +enum si476x_injside { + SI476X_INJSIDE_AUTO = 0, + SI476X_INJSIDE_LOW = 1, + SI476X_INJSIDE_HIGH = 2, +}; + +struct si476x_tune_freq_args { + bool zifsr; + bool hd; + enum si476x_injside injside; + int freq; + enum si476x_tunemode tunemode; + enum si476x_smoothmetrics smoothmetrics; + int antcap; +}; + +int si476x_core_stop(struct si476x_core *, bool); +int si476x_core_start(struct si476x_core *, bool); +int si476x_core_set_power_state(struct si476x_core *, enum si476x_power_state); +bool si476x_core_has_am(struct si476x_core *); +bool si476x_core_has_diversity(struct si476x_core *); +bool si476x_core_is_a_secondary_tuner(struct si476x_core *); +bool si476x_core_is_a_primary_tuner(struct si476x_core *); +bool si476x_core_is_in_am_receiver_mode(struct si476x_core *core); +bool si476x_core_is_powered_up(struct si476x_core *core); + +enum si476x_i2c_type { + SI476X_I2C_SEND, + SI476X_I2C_RECV +}; + +int si476x_core_i2c_xfer(struct si476x_core *, + enum si476x_i2c_type, + char *, int); + + +/* -------------------- si476x-cmd.c ----------------------- */ + +int si476x_core_cmd_func_info(struct si476x_core *, struct si476x_func_info *); +int si476x_core_cmd_set_property(struct si476x_core *, u16, u16); +int si476x_core_cmd_get_property(struct si476x_core *, u16); +int si476x_core_cmd_dig_audio_pin_cfg(struct si476x_core *, + enum si476x_dclk_config, + enum si476x_dfs_config, + enum si476x_dout_config, + enum si476x_xout_config); +int si476x_core_cmd_zif_pin_cfg(struct si476x_core *, + enum si476x_iqclk_config, + enum si476x_iqfs_config, + enum si476x_iout_config, + enum si476x_qout_config); +int si476x_core_cmd_ic_link_gpo_ctl_pin_cfg(struct si476x_core *, + enum si476x_icin_config, + enum si476x_icip_config, + enum si476x_icon_config, + enum si476x_icop_config); +int si476x_core_cmd_ana_audio_pin_cfg(struct si476x_core *, + enum si476x_lrout_config); +int si476x_core_cmd_intb_pin_cfg(struct si476x_core *, enum si476x_intb_config, + enum si476x_a1_config); +int si476x_core_cmd_fm_seek_start(struct si476x_core *, bool, bool); +int si476x_core_cmd_am_seek_start(struct si476x_core *, bool, bool); +int si476x_core_cmd_fm_rds_status(struct si476x_core *, bool, bool, bool, + struct si476x_rds_status_report *); +int si476x_core_cmd_fm_rds_blockcount(struct si476x_core *, bool, + struct si476x_rds_blockcount_report *); +int si476x_core_cmd_fm_tune_freq(struct si476x_core *, + struct si476x_tune_freq_args *); +int si476x_core_cmd_am_tune_freq(struct si476x_core *, + struct si476x_tune_freq_args *); +int si476x_core_cmd_am_rsq_status(struct si476x_core *, + struct si476x_rsq_status_args *, + struct si476x_rsq_status_report *); +int si476x_core_cmd_fm_rsq_status(struct si476x_core *, + struct si476x_rsq_status_args *, + struct si476x_rsq_status_report *); +int si476x_core_cmd_power_up(struct si476x_core *, + struct si476x_power_up_args *); +int si476x_core_cmd_power_down(struct si476x_core *, + struct si476x_power_down_args *); +int si476x_core_cmd_fm_phase_div_status(struct si476x_core *); +int si476x_core_cmd_fm_phase_diversity(struct si476x_core *, + enum si476x_phase_diversity_mode); + +int si476x_core_cmd_fm_acf_status(struct si476x_core *, + struct si476x_acf_status_report *); +int si476x_core_cmd_am_acf_status(struct si476x_core *, + struct si476x_acf_status_report *); +int si476x_core_cmd_agc_status(struct si476x_core *, + struct si476x_agc_status_report *); + +enum si476x_power_grid_type { + SI476X_POWER_GRID_50HZ = 0, + SI476X_POWER_GRID_60HZ, +}; + +/* Properties */ + +enum si476x_interrupt_flags { + SI476X_STCIEN = (1 << 0), + SI476X_ACFIEN = (1 << 1), + SI476X_RDSIEN = (1 << 2), + SI476X_RSQIEN = (1 << 3), + + SI476X_ERRIEN = (1 << 6), + SI476X_CTSIEN = (1 << 7), + + SI476X_STCREP = (1 << 8), + SI476X_ACFREP = (1 << 9), + SI476X_RDSREP = (1 << 10), + SI476X_RSQREP = (1 << 11), +}; + +enum si476x_rdsint_sources { + SI476X_RDSTPPTY = (1 << 4), + SI476X_RDSPI = (1 << 3), + SI476X_RDSSYNC = (1 << 1), + SI476X_RDSRECV = (1 << 0), +}; + +enum si476x_status_response_bits { + SI476X_CTS = (1 << 7), + SI476X_ERR = (1 << 6), + /* Status response for WB receiver */ + SI476X_WB_ASQ_INT = (1 << 4), + SI476X_RSQ_INT = (1 << 3), + /* Status response for FM receiver */ + SI476X_FM_RDS_INT = (1 << 2), + SI476X_ACF_INT = (1 << 1), + SI476X_STC_INT = (1 << 0), +}; + +/* -------------------- si476x-prop.c ----------------------- */ + +enum si476x_common_receiver_properties { + SI476X_PROP_INT_CTL_ENABLE = 0x0000, + SI476X_PROP_DIGITAL_IO_INPUT_SAMPLE_RATE = 0x0200, + SI476X_PROP_DIGITAL_IO_INPUT_FORMAT = 0x0201, + SI476X_PROP_DIGITAL_IO_OUTPUT_SAMPLE_RATE = 0x0202, + SI476X_PROP_DIGITAL_IO_OUTPUT_FORMAT = 0x0203, + + SI476X_PROP_SEEK_BAND_BOTTOM = 0x1100, + SI476X_PROP_SEEK_BAND_TOP = 0x1101, + SI476X_PROP_SEEK_FREQUENCY_SPACING = 0x1102, + + SI476X_PROP_VALID_MAX_TUNE_ERROR = 0x2000, + SI476X_PROP_VALID_SNR_THRESHOLD = 0x2003, + SI476X_PROP_VALID_RSSI_THRESHOLD = 0x2004, +}; + +enum si476x_am_receiver_properties { + SI476X_PROP_AUDIO_PWR_LINE_FILTER = 0x0303, +}; + +enum si476x_fm_receiver_properties { + SI476X_PROP_AUDIO_DEEMPHASIS = 0x0302, + + SI476X_PROP_FM_RDS_INTERRUPT_SOURCE = 0x4000, + SI476X_PROP_FM_RDS_INTERRUPT_FIFO_COUNT = 0x4001, + SI476X_PROP_FM_RDS_CONFIG = 0x4002, +}; + +enum si476x_prop_audio_pwr_line_filter_bits { + SI476X_PROP_PWR_HARMONICS_MASK = 0x001f, + SI476X_PROP_PWR_GRID_MASK = 0x0100, + SI476X_PROP_PWR_ENABLE_MASK = 0x0200, + SI476X_PROP_PWR_GRID_50HZ = 0x0000, + SI476X_PROP_PWR_GRID_60HZ = 0x0100, +}; + +enum si476x_prop_fm_rds_config_bits { + SI476X_PROP_RDSEN_MASK = 0x1, + SI476X_PROP_RDSEN = 0x1, +}; + + +struct regmap *devm_regmap_init_si476x(struct si476x_core *); + +#endif /* SI476X_CORE_H */ diff --git a/include/linux/mfd/si476x-platform.h b/include/linux/mfd/si476x-platform.h new file mode 100644 index 000000000000..88bb93b7a9d5 --- /dev/null +++ b/include/linux/mfd/si476x-platform.h @@ -0,0 +1,267 @@ +/* + * include/media/si476x-platform.h -- Platform data specific definitions + * + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef __SI476X_PLATFORM_H__ +#define __SI476X_PLATFORM_H__ + +/* It is possible to select one of the four adresses using pins A0 + * and A1 on SI476x */ +#define SI476X_I2C_ADDR_1 0x60 +#define SI476X_I2C_ADDR_2 0x61 +#define SI476X_I2C_ADDR_3 0x62 +#define SI476X_I2C_ADDR_4 0x63 + +enum si476x_iqclk_config { + SI476X_IQCLK_NOOP = 0, + SI476X_IQCLK_TRISTATE = 1, + SI476X_IQCLK_IQ = 21, +}; +enum si476x_iqfs_config { + SI476X_IQFS_NOOP = 0, + SI476X_IQFS_TRISTATE = 1, + SI476X_IQFS_IQ = 21, +}; +enum si476x_iout_config { + SI476X_IOUT_NOOP = 0, + SI476X_IOUT_TRISTATE = 1, + SI476X_IOUT_OUTPUT = 22, +}; +enum si476x_qout_config { + SI476X_QOUT_NOOP = 0, + SI476X_QOUT_TRISTATE = 1, + SI476X_QOUT_OUTPUT = 22, +}; + +enum si476x_dclk_config { + SI476X_DCLK_NOOP = 0, + SI476X_DCLK_TRISTATE = 1, + SI476X_DCLK_DAUDIO = 10, +}; + +enum si476x_dfs_config { + SI476X_DFS_NOOP = 0, + SI476X_DFS_TRISTATE = 1, + SI476X_DFS_DAUDIO = 10, +}; + +enum si476x_dout_config { + SI476X_DOUT_NOOP = 0, + SI476X_DOUT_TRISTATE = 1, + SI476X_DOUT_I2S_OUTPUT = 12, + SI476X_DOUT_I2S_INPUT = 13, +}; + +enum si476x_xout_config { + SI476X_XOUT_NOOP = 0, + SI476X_XOUT_TRISTATE = 1, + SI476X_XOUT_I2S_INPUT = 13, + SI476X_XOUT_MODE_SELECT = 23, +}; + +enum si476x_icin_config { + SI476X_ICIN_NOOP = 0, + SI476X_ICIN_TRISTATE = 1, + SI476X_ICIN_GPO1_HIGH = 2, + SI476X_ICIN_GPO1_LOW = 3, + SI476X_ICIN_IC_LINK = 30, +}; + +enum si476x_icip_config { + SI476X_ICIP_NOOP = 0, + SI476X_ICIP_TRISTATE = 1, + SI476X_ICIP_GPO2_HIGH = 2, + SI476X_ICIP_GPO2_LOW = 3, + SI476X_ICIP_IC_LINK = 30, +}; + +enum si476x_icon_config { + SI476X_ICON_NOOP = 0, + SI476X_ICON_TRISTATE = 1, + SI476X_ICON_I2S = 10, + SI476X_ICON_IC_LINK = 30, +}; + +enum si476x_icop_config { + SI476X_ICOP_NOOP = 0, + SI476X_ICOP_TRISTATE = 1, + SI476X_ICOP_I2S = 10, + SI476X_ICOP_IC_LINK = 30, +}; + + +enum si476x_lrout_config { + SI476X_LROUT_NOOP = 0, + SI476X_LROUT_TRISTATE = 1, + SI476X_LROUT_AUDIO = 2, + SI476X_LROUT_MPX = 3, +}; + + +enum si476x_intb_config { + SI476X_INTB_NOOP = 0, + SI476X_INTB_TRISTATE = 1, + SI476X_INTB_DAUDIO = 10, + SI476X_INTB_IRQ = 40, +}; + +enum si476x_a1_config { + SI476X_A1_NOOP = 0, + SI476X_A1_TRISTATE = 1, + SI476X_A1_IRQ = 40, +}; + + +struct si476x_pinmux { + enum si476x_dclk_config dclk; + enum si476x_dfs_config dfs; + enum si476x_dout_config dout; + enum si476x_xout_config xout; + + enum si476x_iqclk_config iqclk; + enum si476x_iqfs_config iqfs; + enum si476x_iout_config iout; + enum si476x_qout_config qout; + + enum si476x_icin_config icin; + enum si476x_icip_config icip; + enum si476x_icon_config icon; + enum si476x_icop_config icop; + + enum si476x_lrout_config lrout; + + enum si476x_intb_config intb; + enum si476x_a1_config a1; +}; + +enum si476x_ibias6x { + SI476X_IBIAS6X_OTHER = 0, + SI476X_IBIAS6X_RCVR1_NON_4MHZ_CLK = 1, +}; + +enum si476x_xstart { + SI476X_XSTART_MULTIPLE_TUNER = 0x11, + SI476X_XSTART_NORMAL = 0x77, +}; + +enum si476x_freq { + SI476X_FREQ_4_MHZ = 0, + SI476X_FREQ_37P209375_MHZ = 1, + SI476X_FREQ_36P4_MHZ = 2, + SI476X_FREQ_37P8_MHZ = 3, +}; + +enum si476x_xmode { + SI476X_XMODE_CRYSTAL_RCVR1 = 1, + SI476X_XMODE_EXT_CLOCK = 2, + SI476X_XMODE_CRYSTAL_RCVR2_3 = 3, +}; + +enum si476x_xbiashc { + SI476X_XBIASHC_SINGLE_RECEIVER = 0, + SI476X_XBIASHC_MULTIPLE_RECEIVER = 1, +}; + +enum si476x_xbias { + SI476X_XBIAS_RCVR2_3 = 0, + SI476X_XBIAS_4MHZ_RCVR1 = 3, + SI476X_XBIAS_RCVR1 = 7, +}; + +enum si476x_func { + SI476X_FUNC_BOOTLOADER = 0, + SI476X_FUNC_FM_RECEIVER = 1, + SI476X_FUNC_AM_RECEIVER = 2, + SI476X_FUNC_WB_RECEIVER = 3, +}; + + +/** + * @xcload: Selects the amount of additional on-chip capacitance to + * be connected between XTAL1 and gnd and between XTAL2 and + * GND. One half of the capacitance value shown here is the + * additional load capacitance presented to the xtal. The + * minimum step size is 0.277 pF. Recommended value is 0x28 + * but it will be layout dependent. Range is 0–0x3F i.e. + * (0–16.33 pF) + * @ctsien: enable CTSINT(interrupt request when CTS condition + * arises) when set + * @intsel: when set A1 pin becomes the interrupt pin; otherwise, + * INTB is the interrupt pin + * @func: selects the boot function of the device. I.e. + * SI476X_BOOTLOADER - Boot loader + * SI476X_FM_RECEIVER - FM receiver + * SI476X_AM_RECEIVER - AM receiver + * SI476X_WB_RECEIVER - Weatherband receiver + * @freq: oscillator's crystal frequency: + * SI476X_XTAL_37P209375_MHZ - 37.209375 Mhz + * SI476X_XTAL_36P4_MHZ - 36.4 Mhz + * SI476X_XTAL_37P8_MHZ - 37.8 Mhz + */ +struct si476x_power_up_args { + enum si476x_ibias6x ibias6x; + enum si476x_xstart xstart; + u8 xcload; + bool fastboot; + enum si476x_xbiashc xbiashc; + enum si476x_xbias xbias; + enum si476x_func func; + enum si476x_freq freq; + enum si476x_xmode xmode; +}; + + +/** + * enum si476x_phase_diversity_mode - possbile phase diversity modes + * for SI4764/5/6/7 chips. + * + * @SI476X_PHDIV_DISABLED: Phase diversity feature is + * disabled. + * @SI476X_PHDIV_PRIMARY_COMBINING: Tuner works as a primary tuner + * in combination with a + * secondary one. + * @SI476X_PHDIV_PRIMARY_ANTENNA: Tuner works as a primary tuner + * using only its own antenna. + * @SI476X_PHDIV_SECONDARY_ANTENNA: Tuner works as a primary tuner + * usning seconary tuner's antenna. + * @SI476X_PHDIV_SECONDARY_COMBINING: Tuner works as a secondary + * tuner in combination with the + * primary one. + */ +enum si476x_phase_diversity_mode { + SI476X_PHDIV_DISABLED = 0, + SI476X_PHDIV_PRIMARY_COMBINING = 1, + SI476X_PHDIV_PRIMARY_ANTENNA = 2, + SI476X_PHDIV_SECONDARY_ANTENNA = 3, + SI476X_PHDIV_SECONDARY_COMBINING = 5, +}; + + +/* + * Platform dependent definition + */ +struct si476x_platform_data { + int gpio_reset; /* < 0 if not used */ + + struct si476x_power_up_args power_up_parameters; + enum si476x_phase_diversity_mode diversity_mode; + + struct si476x_pinmux pinmux; +}; + + +#endif /* __SI476X_PLATFORM_H__ */ diff --git a/include/linux/mfd/si476x-reports.h b/include/linux/mfd/si476x-reports.h new file mode 100644 index 000000000000..e0b9455a79c0 --- /dev/null +++ b/include/linux/mfd/si476x-reports.h @@ -0,0 +1,163 @@ +/* + * include/media/si476x-platform.h -- Definitions of the data formats + * returned by debugfs hooks + * + * Copyright (C) 2013 Andrey Smirnov + * + * Author: Andrey Smirnov <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef __SI476X_REPORTS_H__ +#define __SI476X_REPORTS_H__ + +/** + * struct si476x_rsq_status - structure containing received signal + * quality + * @multhint: Multipath Detect High. + * true - Indicatedes that the value is below + * FM_RSQ_MULTIPATH_HIGH_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_MULTIPATH_HIGH_THRESHOLD + * @multlint: Multipath Detect Low. + * true - Indicatedes that the value is below + * FM_RSQ_MULTIPATH_LOW_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_MULTIPATH_LOW_THRESHOLD + * @snrhint: SNR Detect High. + * true - Indicatedes that the value is below + * FM_RSQ_SNR_HIGH_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_SNR_HIGH_THRESHOLD + * @snrlint: SNR Detect Low. + * true - Indicatedes that the value is below + * FM_RSQ_SNR_LOW_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_SNR_LOW_THRESHOLD + * @rssihint: RSSI Detect High. + * true - Indicatedes that the value is below + * FM_RSQ_RSSI_HIGH_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_RSSI_HIGH_THRESHOLD + * @rssilint: RSSI Detect Low. + * true - Indicatedes that the value is below + * FM_RSQ_RSSI_LOW_THRESHOLD + * false - Indicatedes that the value is above + * FM_RSQ_RSSI_LOW_THRESHOLD + * @bltf: Band Limit. + * Set if seek command hits the band limit or wrapped to + * the original frequency. + * @snr_ready: SNR measurement in progress. + * @rssiready: RSSI measurement in progress. + * @afcrl: Set if FREQOFF >= MAX_TUNE_ERROR + * @valid: Set if the channel is valid + * rssi < FM_VALID_RSSI_THRESHOLD + * snr < FM_VALID_SNR_THRESHOLD + * tune_error < FM_VALID_MAX_TUNE_ERROR + * @readfreq: Current tuned frequency. + * @freqoff: Signed frequency offset. + * @rssi: Received Signal Strength Indicator(dBuV). + * @snr: RF SNR Indicator(dB). + * @lassi: + * @hassi: Low/High side Adjacent(100 kHz) Channel Strength Indicator + * @mult: Multipath indicator + * @dev: Who knows? But values may vary. + * @readantcap: Antenna tuning capacity value. + * @assi: Adjacent Channel(+/- 200kHz) Strength Indicator + * @usn: Ultrasonic Noise Inticator in -DBFS + */ +struct si476x_rsq_status_report { + __u8 multhint, multlint; + __u8 snrhint, snrlint; + __u8 rssihint, rssilint; + __u8 bltf; + __u8 snr_ready; + __u8 rssiready; + __u8 injside; + __u8 afcrl; + __u8 valid; + + __u16 readfreq; + __s8 freqoff; + __s8 rssi; + __s8 snr; + __s8 issi; + __s8 lassi, hassi; + __s8 mult; + __u8 dev; + __u16 readantcap; + __s8 assi; + __s8 usn; + + __u8 pilotdev; + __u8 rdsdev; + __u8 assidev; + __u8 strongdev; + __u16 rdspi; +} __packed; + +/** + * si476x_acf_status_report - ACF report results + * + * @blend_int: If set, indicates that stereo separation has crossed + * below the blend threshold as set by FM_ACF_BLEND_THRESHOLD + * @hblend_int: If set, indicates that HiBlend cutoff frequency is + * lower than threshold as set by FM_ACF_HBLEND_THRESHOLD + * @hicut_int: If set, indicates that HiCut cutoff frequency is lower + * than the threshold set by ACF_ + + */ +struct si476x_acf_status_report { + __u8 blend_int; + __u8 hblend_int; + __u8 hicut_int; + __u8 chbw_int; + __u8 softmute_int; + __u8 smute; + __u8 smattn; + __u8 chbw; + __u8 hicut; + __u8 hiblend; + __u8 pilot; + __u8 stblend; +} __packed; + +enum si476x_fmagc { + SI476X_FMAGC_10K_OHM = 0, + SI476X_FMAGC_800_OHM = 1, + SI476X_FMAGC_400_OHM = 2, + SI476X_FMAGC_200_OHM = 4, + SI476X_FMAGC_100_OHM = 8, + SI476X_FMAGC_50_OHM = 16, + SI476X_FMAGC_25_OHM = 32, + SI476X_FMAGC_12P5_OHM = 64, + SI476X_FMAGC_6P25_OHM = 128, +}; + +struct si476x_agc_status_report { + __u8 mxhi; + __u8 mxlo; + __u8 lnahi; + __u8 lnalo; + __u8 fmagc1; + __u8 fmagc2; + __u8 pgagain; + __u8 fmwblang; +} __packed; + +struct si476x_rds_blockcount_report { + __u16 expected; + __u16 received; + __u16 uncorrectable; +} __packed; + +#endif /* __SI476X_REPORTS_H__ */ diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h index 383ac1512a39..48395a69a7e9 100644 --- a/include/linux/mfd/stmpe.h +++ b/include/linux/mfd/stmpe.h @@ -26,6 +26,7 @@ enum stmpe_partnum { STMPE801, STMPE811, STMPE1601, + STMPE1801, STMPE2401, STMPE2403, STMPE_NBR_PARTS @@ -39,6 +40,7 @@ enum { STMPE_IDX_CHIP_ID, STMPE_IDX_ICR_LSB, STMPE_IDX_IER_LSB, + STMPE_IDX_ISR_LSB, STMPE_IDX_ISR_MSB, STMPE_IDX_GPMR_LSB, STMPE_IDX_GPSR_LSB, @@ -49,6 +51,7 @@ enum { STMPE_IDX_GPFER_LSB, STMPE_IDX_GPAFR_U_MSB, STMPE_IDX_IEGPIOR_LSB, + STMPE_IDX_ISGPIOR_LSB, STMPE_IDX_ISGPIOR_MSB, STMPE_IDX_MAX, }; diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index 6aeb6b8da64d..b473577f36db 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -15,8 +15,11 @@ #ifndef __LINUX_MFD_SYSCON_H__ #define __LINUX_MFD_SYSCON_H__ +struct device_node; + extern struct regmap *syscon_node_to_regmap(struct device_node *np); extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s); +extern struct regmap *syscon_regmap_lookup_by_pdevname(const char *s); extern struct regmap *syscon_regmap_lookup_by_phandle( struct device_node *np, const char *property); diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h index 998628a2b08b..3f43069413e7 100644 --- a/include/linux/mfd/tps65090.h +++ b/include/linux/mfd/tps65090.h @@ -27,6 +27,7 @@ /* TPS65090 IRQs */ enum { + TPS65090_IRQ_INTERRUPT, TPS65090_IRQ_VAC_STATUS_CHANGE, TPS65090_IRQ_VSYS_STATUS_CHANGE, TPS65090_IRQ_BAT_STATUS_CHANGE, diff --git a/include/linux/module.h b/include/linux/module.h index ead1b5719a12..46f1ea01e6f6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -190,7 +190,7 @@ extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) +#define symbol_get(x) ((typeof(&x))(__symbol_get(VMLINUX_SYMBOL_STR(x)))) /* modules using other modules: kdb wants to see this. */ struct module_use { @@ -453,7 +453,7 @@ extern void __module_put_and_exit(struct module *mod, long code) #ifdef CONFIG_MODULE_UNLOAD unsigned long module_refcount(struct module *mod); void __symbol_put(const char *symbol); -#define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) +#define symbol_put(x) __symbol_put(VMLINUX_SYMBOL_STR(x)) void symbol_put_addr(void *addr); /* Sometimes we know we already have a refcount, and it's easier not diff --git a/include/linux/of.h b/include/linux/of.h index fb2002f3c7dc..1b671c3809b8 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -356,6 +356,11 @@ static inline struct device_node *of_find_node_by_name(struct device_node *from, return NULL; } +static inline struct device_node *of_get_parent(const struct device_node *node) +{ + return NULL; +} + static inline bool of_have_populated_dt(void) { return false; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e0373d26c244..f463a46424e2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -788,6 +788,12 @@ static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } #endif +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) +extern bool perf_event_can_stop_tick(void); +#else +static inline bool perf_event_can_stop_tick(void) { return true; } +#endif + #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); #else diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 60bac697a91b..7794d75ed155 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -123,6 +123,8 @@ void run_posix_cpu_timers(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); +bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk); + void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9ed2c9a4de45..4ccd68e49b00 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1000,4 +1000,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) +#ifdef CONFIG_RCU_NOCB_CPU +extern bool rcu_is_nocb_cpu(int cpu); +#else +static inline bool rcu_is_nocb_cpu(int cpu) { return false; } +#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f950048b6e9..4800e9d1864c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -231,7 +231,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); extern int get_nohz_timer_target(void); @@ -1764,13 +1764,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON void calc_load_enter_idle(void); void calc_load_exit_idle(void); #else static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) @@ -1856,10 +1856,17 @@ extern void idle_task_exit(void); static inline void idle_task_exit(void) {} #endif -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) -extern void wake_up_idle_cpu(int cpu); +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) +extern void wake_up_nohz_cpu(int cpu); #else -static inline void wake_up_idle_cpu(int cpu) { } +static inline void wake_up_nohz_cpu(int cpu) { } +#endif + +#ifdef CONFIG_NO_HZ_FULL +extern bool sched_can_stop_tick(void); +extern u64 scheduler_tick_max_deferment(void); +#else +static inline bool sched_can_stop_tick(void) { return false; } #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/include/linux/tick.h b/include/linux/tick.h index 553272e6af55..9180f4b85e6d 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -82,7 +82,7 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_setup_sched_timer(void); # endif -# if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS extern void tick_cancel_sched_timer(int cpu); # else static inline void tick_cancel_sched_timer(int cpu) { } @@ -123,7 +123,7 @@ static inline void tick_check_idle(int cpu) { } static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_NO_HZ +# ifdef CONFIG_NO_HZ_COMMON DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); static inline int tick_nohz_tick_stopped(void) @@ -138,7 +138,7 @@ extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else /* !CONFIG_NO_HZ */ +# else /* !CONFIG_NO_HZ_COMMON */ static inline int tick_nohz_tick_stopped(void) { return 0; @@ -155,7 +155,24 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !NO_HZ */ +# endif /* !CONFIG_NO_HZ_COMMON */ + +#ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); +extern int tick_nohz_full_cpu(int cpu); +extern void tick_nohz_full_check(void); +extern void tick_nohz_full_kick(void); +extern void tick_nohz_full_kick_all(void); +extern void tick_nohz_task_switch(struct task_struct *tsk); +#else +static inline void tick_nohz_init(void) { } +static inline int tick_nohz_full_cpu(int cpu) { return 0; } +static inline void tick_nohz_full_check(void) { } +static inline void tick_nohz_full_kick(void) { } +static inline void tick_nohz_full_kick_all(void) { } +static inline void tick_nohz_task_switch(struct task_struct *tsk) { } +#endif + # ifdef CONFIG_CPU_IDLE_GOV_MENU extern void menu_hrtimer_cancel(void); diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h index d21b33c4c6ca..2e9ee4d1c676 100644 --- a/include/linux/ucb1400.h +++ b/include/linux/ucb1400.h @@ -83,15 +83,12 @@ #define UCB_ID 0x7e #define UCB_ID_1400 0x4304 -struct ucb1400_gpio_data { - int gpio_offset; - int (*gpio_setup)(struct device *dev, int ngpio); - int (*gpio_teardown)(struct device *dev, int ngpio); -}; - struct ucb1400_gpio { struct gpio_chip gc; struct snd_ac97 *ac97; + int gpio_offset; + int (*gpio_setup)(struct device *dev, int ngpio); + int (*gpio_teardown)(struct device *dev, int ngpio); }; struct ucb1400_ts { @@ -110,6 +107,9 @@ struct ucb1400 { struct ucb1400_pdata { int irq; + int gpio_offset; + int (*gpio_setup)(struct device *dev, int ngpio); + int (*gpio_teardown)(struct device *dev, int ngpio); }; static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg) @@ -162,10 +162,4 @@ static inline void ucb1400_adc_disable(struct snd_ac97 *ac97) unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, int adcsync); -#ifdef CONFIG_GPIO_UCB1400 -void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data); -#else -static inline void ucb1400_gpio_set_data(struct ucb1400_gpio_data *data) {} -#endif - #endif diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 19911dddaeb7..7005d1109ec9 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit, __entry->errno < 0 ? -__entry->errno : __entry->reason) ); -#if defined(__KVM_HAVE_IRQ_LINE) +#if defined(CONFIG_HAVE_KVM_IRQCHIP) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), TP_ARGS(gsi, level, irq_source_id), @@ -122,6 +122,10 @@ TRACE_EVENT(kvm_msi_set_irq, {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ {KVM_IRQCHIP_IOAPIC, "IOAPIC"} +#endif /* defined(__KVM_HAVE_IOAPIC) */ + +#if defined(CONFIG_HAVE_KVM_IRQCHIP) + TRACE_EVENT(kvm_ack_irq, TP_PROTO(unsigned int irqchip, unsigned int pin), TP_ARGS(irqchip, pin), @@ -136,14 +140,18 @@ TRACE_EVENT(kvm_ack_irq, __entry->pin = pin; ), +#ifdef kvm_irqchips TP_printk("irqchip %s pin %u", __print_symbolic(__entry->irqchip, kvm_irqchips), __entry->pin) +#else + TP_printk("irqchip %d pin %u", __entry->irqchip, __entry->pin) +#endif ); +#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ -#endif /* defined(__KVM_HAVE_IOAPIC) */ #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 #define KVM_TRACE_MMIO_READ 1 diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 8d219470624f..68c2c2000f02 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -323,6 +323,27 @@ TRACE_EVENT(itimer_expire, (int) __entry->pid, (unsigned long long)__entry->now) ); +#ifdef CONFIG_NO_HZ_COMMON +TRACE_EVENT(tick_stop, + + TP_PROTO(int success, char *error_msg), + + TP_ARGS(success, error_msg), + + TP_STRUCT__entry( + __field( int , success ) + __string( msg, error_msg ) + ), + + TP_fast_assign( + __entry->success = success; + __assign_str(msg, error_msg); + ), + + TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) +); +#endif + #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3c56ba3d80c1..a5c86fc34a37 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -449,12 +449,15 @@ enum { kvm_ioeventfd_flag_nr_datamatch, kvm_ioeventfd_flag_nr_pio, kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_virtio_ccw_notify, kvm_ioeventfd_flag_nr_max, }; #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) #define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) @@ -558,9 +561,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ -#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_ASSIGNMENT 17 -#endif #define KVM_CAP_IOMMU 18 #ifdef __KVM_HAVE_MSI #define KVM_CAP_DEVICE_MSI 20 @@ -576,13 +577,9 @@ struct kvm_ppc_smmu_info { #ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif -#ifdef __KVM_HAVE_IOAPIC #define KVM_CAP_IRQ_ROUTING 25 -#endif #define KVM_CAP_IRQ_INJECT_STATUS 26 -#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 -#endif #ifdef __KVM_HAVE_MSIX #define KVM_CAP_DEVICE_MSIX 28 #endif @@ -665,6 +662,10 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_EPR 86 #define KVM_CAP_ARM_PSCI 87 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88 +#define KVM_CAP_DEVICE_CTRL 89 +#define KVM_CAP_IRQ_MPIC 90 +#define KVM_CAP_PPC_RTAS 91 +#define KVM_CAP_IRQ_XICS 92 #ifdef KVM_CAP_IRQ_ROUTING @@ -818,6 +819,28 @@ struct kvm_arm_device_addr { }; /* + * Device control API, available with KVM_CAP_DEVICE_CTRL + */ +#define KVM_CREATE_DEVICE_TEST 1 + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + +#define KVM_DEV_TYPE_FSL_MPIC_20 1 +#define KVM_DEV_TYPE_FSL_MPIC_42 2 +#define KVM_DEV_TYPE_XICS 3 + +/* * ioctls for VM fds */ #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) @@ -904,6 +927,16 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) +/* Available with KVM_CAP_PPC_RTAS */ +#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) + +/* ioctl for vm fd */ +#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) + +/* ioctls for fds returned by KVM_CREATE_DEVICE */ +#define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr) +#define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) +#define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) /* * ioctls for vcpu fds diff --git a/init/Kconfig b/init/Kconfig index a76d13189e47..9d3a7887a6d3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -302,7 +302,7 @@ choice # Kind of a stub config for the pure tick based cputime accounting config TICK_CPU_ACCOUNTING bool "Simple tick based cputime accounting" - depends on !S390 + depends on !S390 && !NO_HZ_FULL help This is the basic tick based cputime accounting that maintains statistics about user, system and idle time spent on per jiffies @@ -312,7 +312,7 @@ config TICK_CPU_ACCOUNTING config VIRT_CPU_ACCOUNTING_NATIVE bool "Deterministic task and CPU time accounting" - depends on HAVE_VIRT_CPU_ACCOUNTING + depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL select VIRT_CPU_ACCOUNTING help Select this option to enable more accurate task and CPU time @@ -342,7 +342,7 @@ config VIRT_CPU_ACCOUNTING_GEN config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" - depends on HAVE_IRQ_TIME_ACCOUNTING + depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL help Select this option to enable fine granularity task irq time accounting. This is done by reading a timestamp on each @@ -576,7 +576,7 @@ config RCU_FANOUT_EXACT config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ && SMP + depends on NO_HZ_COMMON && SMP default n help This option permits CPUs to enter dynticks-idle state even if @@ -687,7 +687,7 @@ choice config RCU_NOCB_CPU_NONE bool "No build_forced no-CBs CPUs" - depends on RCU_NOCB_CPU + depends on RCU_NOCB_CPU && !NO_HZ_FULL help This option does not force any of the CPUs to be no-CBs CPUs. Only CPUs designated by the rcu_nocbs= boot parameter will be @@ -695,7 +695,7 @@ config RCU_NOCB_CPU_NONE config RCU_NOCB_CPU_ZERO bool "CPU 0 is a build_forced no-CBs CPU" - depends on RCU_NOCB_CPU + depends on RCU_NOCB_CPU && !NO_HZ_FULL help This option forces CPU 0 to be a no-CBs CPU. Additional CPUs may be designated as no-CBs CPUs using the rcu_nocbs= boot diff --git a/init/main.c b/init/main.c index ceed17aaedfd..9484f4ba88d0 100644 --- a/init/main.c +++ b/init/main.c @@ -544,6 +544,7 @@ asmlinkage void __init start_kernel(void) idr_init_cache(); perf_event_init(); rcu_init(); + tick_nohz_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); diff --git a/ipc/sem.c b/ipc/sem.c index 4734e9c2a98a..899b598b63be 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -264,12 +264,13 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) struct sem *sem = sma->sem_base + locknum; spin_unlock(&sem->lock); } - rcu_read_unlock(); } /* * sem_lock_(check_) routines are called in the paths where the rw_mutex * is not held. + * + * The caller holds the RCU read lock. */ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, int id, struct sembuf *sops, int nsops, int *locknum) @@ -277,12 +278,9 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp; struct sem_array *sma; - rcu_read_lock(); ipcp = ipc_obtain_object(&sem_ids(ns), id); - if (IS_ERR(ipcp)) { - sma = ERR_CAST(ipcp); - goto err; - } + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); sma = container_of(ipcp, struct sem_array, sem_perm); *locknum = sem_lock(sma, sops, nsops); @@ -294,10 +292,7 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, return container_of(ipcp, struct sem_array, sem_perm); sem_unlock(sma, *locknum); - sma = ERR_PTR(-EINVAL); -err: - rcu_read_unlock(); - return sma; + return ERR_PTR(-EINVAL); } static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) @@ -323,15 +318,13 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns static inline void sem_lock_and_putref(struct sem_array *sma) { - rcu_read_lock(); sem_lock(sma, NULL, -1); ipc_rcu_putref(sma); } static inline void sem_putref(struct sem_array *sma) { - sem_lock_and_putref(sma); - sem_unlock(sma, -1); + ipc_rcu_putref(sma); } static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) @@ -435,6 +428,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); sem_unlock(sma, -1); + rcu_read_unlock(); return sma->sem_perm.id; } @@ -874,6 +868,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) /* Remove the semaphore set from the IDR */ sem_rmid(ns, sma); sem_unlock(sma, -1); + rcu_read_unlock(); wake_up_sem_queue_do(&tasks); ns->used_sems -= sma->sem_nsems; @@ -953,8 +948,8 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, memset(&tbuf, 0, sizeof(tbuf)); + rcu_read_lock(); if (cmd == SEM_STAT) { - rcu_read_lock(); sma = sem_obtain_object(ns, semid); if (IS_ERR(sma)) { err = PTR_ERR(sma); @@ -962,7 +957,6 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, } id = sma->sem_perm.id; } else { - rcu_read_lock(); sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { err = PTR_ERR(sma); @@ -1055,6 +1049,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, /* maybe some queued-up processes were waiting for this */ do_smart_update(sma, NULL, 0, 0, &tasks); sem_unlock(sma, -1); + rcu_read_unlock(); wake_up_sem_queue_do(&tasks); return 0; } @@ -1081,17 +1076,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, nsems = sma->sem_nsems; err = -EACCES; - if (ipcperms(ns, &sma->sem_perm, - cmd == SETALL ? S_IWUGO : S_IRUGO)) { - rcu_read_unlock(); - goto out_wakeup; - } + if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) + goto out_rcu_wakeup; err = security_sem_semctl(sma, cmd); - if (err) { - rcu_read_unlock(); - goto out_wakeup; - } + if (err) + goto out_rcu_wakeup; err = -EACCES; switch (cmd) { @@ -1104,19 +1094,23 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, if(nsems > SEMMSL_FAST) { if (!ipc_rcu_getref(sma)) { sem_unlock(sma, -1); + rcu_read_unlock(); err = -EIDRM; goto out_free; } sem_unlock(sma, -1); + rcu_read_unlock(); sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { sem_putref(sma); return -ENOMEM; } + rcu_read_lock(); sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma, -1); + rcu_read_unlock(); err = -EIDRM; goto out_free; } @@ -1124,6 +1118,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (i = 0; i < sma->sem_nsems; i++) sem_io[i] = sma->sem_base[i].semval; sem_unlock(sma, -1); + rcu_read_unlock(); err = 0; if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) err = -EFAULT; @@ -1161,9 +1156,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, goto out_free; } } + rcu_read_lock(); sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma, -1); + rcu_read_unlock(); err = -EIDRM; goto out_free; } @@ -1185,10 +1182,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ } err = -EINVAL; - if (semnum < 0 || semnum >= nsems) { - rcu_read_unlock(); - goto out_wakeup; - } + if (semnum < 0 || semnum >= nsems) + goto out_rcu_wakeup; sem_lock(sma, NULL, -1); curr = &sma->sem_base[semnum]; @@ -1210,7 +1205,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, out_unlock: sem_unlock(sma, -1); -out_wakeup: +out_rcu_wakeup: + rcu_read_unlock(); wake_up_sem_queue_do(&tasks); out_free: if(sem_io != fast_sem_io) @@ -1272,7 +1268,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, err = security_sem_semctl(sma, cmd); if (err) { rcu_read_unlock(); - goto out_unlock; + goto out_up; } switch(cmd){ @@ -1295,6 +1291,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, out_unlock: sem_unlock(sma, -1); + rcu_read_unlock(); out_up: up_write(&sem_ids(ns).rw_mutex); return err; @@ -1443,9 +1440,11 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) } /* step 3: Acquire the lock on semaphore array */ + rcu_read_lock(); sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma, -1); + rcu_read_unlock(); kfree(new); un = ERR_PTR(-EIDRM); goto out; @@ -1472,7 +1471,6 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) success: spin_unlock(&ulp->lock); - rcu_read_lock(); sem_unlock(sma, -1); out: return un; @@ -1579,22 +1577,16 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, } error = -EFBIG; - if (max >= sma->sem_nsems) { - rcu_read_unlock(); - goto out_wakeup; - } + if (max >= sma->sem_nsems) + goto out_rcu_wakeup; error = -EACCES; - if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { - rcu_read_unlock(); - goto out_wakeup; - } + if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) + goto out_rcu_wakeup; error = security_sem_semop(sma, sops, nsops, alter); - if (error) { - rcu_read_unlock(); - goto out_wakeup; - } + if (error) + goto out_rcu_wakeup; /* * semid identifiers are not unique - find_alloc_undo may have @@ -1648,6 +1640,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, sleep_again: current->state = TASK_INTERRUPTIBLE; sem_unlock(sma, locknum); + rcu_read_unlock(); if (timeout) jiffies_left = schedule_timeout(jiffies_left); @@ -1669,6 +1662,7 @@ sleep_again: goto out_free; } + rcu_read_lock(); sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); /* @@ -1680,6 +1674,7 @@ sleep_again: * Array removed? If yes, leave without sem_unlock(). */ if (IS_ERR(sma)) { + rcu_read_unlock(); goto out_free; } @@ -1709,7 +1704,8 @@ sleep_again: out_unlock_free: sem_unlock(sma, locknum); -out_wakeup: +out_rcu_wakeup: + rcu_read_unlock(); wake_up_sem_queue_do(&tasks); out_free: if(sops != fast_sops) @@ -1801,6 +1797,7 @@ void exit_sem(struct task_struct *tsk) * exactly the same semid. Nothing to do. */ sem_unlock(sma, -1); + rcu_read_unlock(); continue; } @@ -1841,6 +1838,7 @@ void exit_sem(struct task_struct *tsk) INIT_LIST_HEAD(&tasks); do_smart_update(sma, NULL, 0, 1, &tasks); sem_unlock(sma, -1); + rcu_read_unlock(); wake_up_sem_queue_do(&tasks); kfree_rcu(un, rcu); diff --git a/kernel/Makefile b/kernel/Makefile index d1574d47cf27..271fd3119af9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -176,7 +176,7 @@ signing_key.priv signing_key.x509: x509.genkey openssl req -new -nodes -utf8 -$(CONFIG_MODULE_SIG_HASH) -days 36500 \ -batch -x509 -config x509.genkey \ -outform DER -out signing_key.x509 \ - -keyout signing_key.priv + -keyout signing_key.priv 2>&1 @echo "###" @echo "### Key pair generated." @echo "###" diff --git a/kernel/events/core.c b/kernel/events/core.c index 3820e3cefbae..6b41c1899a8b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -18,6 +18,7 @@ #include <linux/poll.h> #include <linux/slab.h> #include <linux/hash.h> +#include <linux/tick.h> #include <linux/sysfs.h> #include <linux/dcache.h> #include <linux/percpu.h> @@ -685,8 +686,12 @@ static void perf_pmu_rotate_start(struct pmu *pmu) WARN_ON(!irqs_disabled()); - if (list_empty(&cpuctx->rotation_list)) + if (list_empty(&cpuctx->rotation_list)) { + int was_empty = list_empty(head); list_add(&cpuctx->rotation_list, head); + if (was_empty) + tick_nohz_full_kick(); + } } static void get_ctx(struct perf_event_context *ctx) @@ -2591,6 +2596,16 @@ done: list_del_init(&cpuctx->rotation_list); } +#ifdef CONFIG_NO_HZ_FULL +bool perf_event_can_stop_tick(void) +{ + if (list_empty(&__get_cpu_var(rotation_list))) + return true; + else + return false; +} +#endif + void perf_event_task_tick(void) { struct list_head *head = &__get_cpu_var(rotation_list); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 97fddb09762b..cd55144270b5 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -326,11 +326,16 @@ void rb_free(struct ring_buffer *rb) } #else +static int data_page_nr(struct ring_buffer *rb) +{ + return rb->nr_pages << page_order(rb); +} struct page * perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) { - if (pgoff > (1UL << page_order(rb))) + /* The '>' counts in the user page. */ + if (pgoff > data_page_nr(rb)) return NULL; return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE); @@ -350,10 +355,11 @@ static void rb_free_work(struct work_struct *work) int i, nr; rb = container_of(work, struct ring_buffer, work); - nr = 1 << page_order(rb); + nr = data_page_nr(rb); base = rb->user_page; - for (i = 0; i < nr + 1; i++) + /* The '<=' counts in the user page. */ + for (i = 0; i <= nr; i++) perf_mmap_unmark_page(base + (i * PAGE_SIZE)); vfree(base); @@ -387,7 +393,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) rb->user_page = all_buf; rb->data_pages[0] = all_buf + PAGE_SIZE; rb->page_order = ilog2(nr_pages); - rb->nr_pages = 1; + rb->nr_pages = !!nr_pages; ring_buffer_init(rb, watermark, flags); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 609d8ff38b74..fd4b13b131f8 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -172,7 +172,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, */ static int hrtimer_get_target(int this_cpu, int pinned) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) return get_nohz_timer_target(); #endif @@ -1125,7 +1125,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } EXPORT_SYMBOL_GPL(hrtimer_get_remaining); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /** * hrtimer_get_next_event - get the time until next expiry event * diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 2169feeba529..3127ad52cdb2 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -84,9 +84,11 @@ static int is_ksym_addr(unsigned long addr) /* * Expand a compressed symbol data into the resulting uncompressed string, + * if uncompressed string is too long (>= maxlen), it will be truncated, * given the offset to where the symbol is in the compressed stream. */ -static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) +static unsigned int kallsyms_expand_symbol(unsigned int off, + char *result, size_t maxlen) { int len, skipped_first = 0; const u8 *tptr, *data; @@ -113,15 +115,20 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) while (*tptr) { if (skipped_first) { + if (maxlen <= 1) + goto tail; *result = *tptr; result++; + maxlen--; } else skipped_first = 1; tptr++; } } - *result = '\0'; +tail: + if (maxlen) + *result = '\0'; /* Return to offset to the next symbol. */ return off; @@ -176,7 +183,7 @@ unsigned long kallsyms_lookup_name(const char *name) unsigned int off; for (i = 0, off = 0; i < kallsyms_num_syms; i++) { - off = kallsyms_expand_symbol(off, namebuf); + off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); if (strcmp(namebuf, name) == 0) return kallsyms_addresses[i]; @@ -195,7 +202,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, int ret; for (i = 0, off = 0; i < kallsyms_num_syms; i++) { - off = kallsyms_expand_symbol(off, namebuf); + off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); ret = fn(data, namebuf, NULL, kallsyms_addresses[i]); if (ret != 0) return ret; @@ -294,7 +301,8 @@ const char *kallsyms_lookup(unsigned long addr, pos = get_symbol_pos(addr, symbolsize, offset); /* Grab name */ - kallsyms_expand_symbol(get_symbol_offset(pos), namebuf); + kallsyms_expand_symbol(get_symbol_offset(pos), + namebuf, KSYM_NAME_LEN); if (modname) *modname = NULL; return namebuf; @@ -315,7 +323,8 @@ int lookup_symbol_name(unsigned long addr, char *symname) pos = get_symbol_pos(addr, NULL, NULL); /* Grab name */ - kallsyms_expand_symbol(get_symbol_offset(pos), symname); + kallsyms_expand_symbol(get_symbol_offset(pos), + symname, KSYM_NAME_LEN); return 0; } /* See if it's in a module. */ @@ -333,7 +342,8 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, pos = get_symbol_pos(addr, size, offset); /* Grab name */ - kallsyms_expand_symbol(get_symbol_offset(pos), name); + kallsyms_expand_symbol(get_symbol_offset(pos), + name, KSYM_NAME_LEN); modname[0] = '\0'; return 0; } @@ -463,7 +473,7 @@ static unsigned long get_ksymbol_core(struct kallsym_iter *iter) iter->type = kallsyms_get_symbol_type(off); - off = kallsyms_expand_symbol(off, iter->name); + off = kallsyms_expand_symbol(off, iter->name, ARRAY_SIZE(iter->name)); return off - iter->nameoff; } diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S index 246b4c6e6135..4a9a86d12c8b 100644 --- a/kernel/modsign_certificate.S +++ b/kernel/modsign_certificate.S @@ -1,15 +1,8 @@ -/* SYMBOL_PREFIX defined on commandline from CONFIG_SYMBOL_PREFIX */ -#ifndef SYMBOL_PREFIX -#define ASM_SYMBOL(sym) sym -#else -#define PASTE2(x,y) x##y -#define PASTE(x,y) PASTE2(x,y) -#define ASM_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym) -#endif +#include <linux/export.h> #define GLOBAL(name) \ - .globl ASM_SYMBOL(name); \ - ASM_SYMBOL(name): + .globl VMLINUX_SYMBOL(name); \ + VMLINUX_SYMBOL(name): .section ".init.data","aw" diff --git a/kernel/module.c b/kernel/module.c index 0925c9a71975..b049939177f6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1209,10 +1209,11 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, /* Since this should be found in kernel (which can't be removed), * no locking is necessary. */ - if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL, + if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL, &crc, true, false)) BUG(); - return check_version(sechdrs, versindex, "module_layout", mod, crc, + return check_version(sechdrs, versindex, + VMLINUX_SYMBOL_STR(module_layout), mod, crc, NULL); } @@ -1861,12 +1862,12 @@ static void free_module(struct module *mod) { trace_module_free(mod); - /* Delete from various lists */ - mutex_lock(&module_mutex); - stop_machine(__unlink_module, mod, NULL); - mutex_unlock(&module_mutex); mod_sysfs_teardown(mod); + /* We leave it in list to prevent duplicate loads, but make sure + * that noone uses it while it's being deconstructed. */ + mod->state = MODULE_STATE_UNFORMED; + /* Remove dynamic debug info */ ddebug_remove_module(mod->name); @@ -1879,6 +1880,11 @@ static void free_module(struct module *mod) /* Free any allocated parameters. */ destroy_params(mod->kp, mod->num_kp); + /* Now we can delete it from the lists */ + mutex_lock(&module_mutex); + stop_machine(__unlink_module, mod, NULL); + mutex_unlock(&module_mutex); + /* This may be NULL, but that's OK */ unset_module_init_ro_nx(mod); module_free(mod, mod->module_init); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 8fd709c9bb58..42670e9b44e0 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -10,6 +10,8 @@ #include <linux/kernel_stat.h> #include <trace/events/timer.h> #include <linux/random.h> +#include <linux/tick.h> +#include <linux/workqueue.h> /* * Called after updating RLIMIT_CPU to run cpu timer and update @@ -153,6 +155,21 @@ static void bump_cpu_timer(struct k_itimer *timer, } } +/** + * task_cputime_zero - Check a task_cputime struct for all zero fields. + * + * @cputime: The struct to compare. + * + * Checks @cputime to see if all fields are zero. Returns true if all fields + * are zero, false if any field is nonzero. + */ +static inline int task_cputime_zero(const struct task_cputime *cputime) +{ + if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) + return 1; + return 0; +} + static inline cputime_t prof_ticks(struct task_struct *p) { cputime_t utime, stime; @@ -636,6 +653,37 @@ static int cpu_timer_sample_group(const clockid_t which_clock, return 0; } +#ifdef CONFIG_NO_HZ_FULL +static void nohz_kick_work_fn(struct work_struct *work) +{ + tick_nohz_full_kick_all(); +} + +static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn); + +/* + * We need the IPIs to be sent from sane process context. + * The posix cpu timers are always set with irqs disabled. + */ +static void posix_cpu_timer_kick_nohz(void) +{ + schedule_work(&nohz_kick_work); +} + +bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk) +{ + if (!task_cputime_zero(&tsk->cputime_expires)) + return false; + + if (tsk->signal->cputimer.running) + return false; + + return true; +} +#else +static inline void posix_cpu_timer_kick_nohz(void) { } +#endif + /* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. @@ -794,6 +842,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, sample_to_timespec(timer->it_clock, old_incr, &old->it_interval); } + if (!ret) + posix_cpu_timer_kick_nohz(); return ret; } @@ -1008,21 +1058,6 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, } } -/** - * task_cputime_zero - Check a task_cputime struct for all zero fields. - * - * @cputime: The struct to compare. - * - * Checks @cputime to see if all fields are zero. Returns true if all fields - * are zero, false if any field is nonzero. - */ -static inline int task_cputime_zero(const struct task_cputime *cputime) -{ - if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) - return 1; - return 0; -} - /* * Check for any per-thread CPU timers that have fired and move them * off the tsk->*_timers list onto the firing list. Per-thread timers @@ -1336,6 +1371,13 @@ void run_posix_cpu_timers(struct task_struct *tsk) cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } + + /* + * In case some timers were rescheduled after the queue got emptied, + * wake up full dynticks CPUs. + */ + if (tsk->signal->cputimer.running) + posix_cpu_timer_kick_nohz(); } /* @@ -1366,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, } if (!*newval) - return; + goto out; *newval += now.cpu; } @@ -1384,6 +1426,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, tsk->signal->cputime_expires.virt_exp = *newval; break; } +out: + posix_cpu_timer_kick_nohz(); } static int do_cpu_nanosleep(const clockid_t which_clock, int flags, diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d8534308fd05..16ea67925015 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -799,6 +799,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) rdp->offline_fqs++; return 1; } + + /* + * There is a possibility that a CPU in adaptive-ticks state + * might run in the kernel with the scheduling-clock tick disabled + * for an extended time period. Invoke rcu_kick_nohz_cpu() to + * force the CPU to restart the scheduling-clock tick in this + * CPU is in this state. + */ + rcu_kick_nohz_cpu(rdp->cpu); + return 0; } @@ -1820,7 +1830,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { /* No-CBs CPUs do not have orphanable callbacks. */ - if (is_nocb_cpu(rdp->cpu)) + if (rcu_is_nocb_cpu(rdp->cpu)) return; /* @@ -2892,10 +2902,10 @@ static void _rcu_barrier(struct rcu_state *rsp) * corresponding CPU's preceding callbacks have been invoked. */ for_each_possible_cpu(cpu) { - if (!cpu_online(cpu) && !is_nocb_cpu(cpu)) + if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu)) continue; rdp = per_cpu_ptr(rsp->rda, cpu); - if (is_nocb_cpu(cpu)) { + if (rcu_is_nocb_cpu(cpu)) { _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, rsp->n_barrier_done); atomic_inc(&rsp->barrier_cpu_count); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 14ee40795d6f..da77a8f57ff9 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -530,13 +530,13 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp); static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); static void rcu_init_one_nocb(struct rcu_node *rnp); -static bool is_nocb_cpu(int cpu); static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy); static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); +static void rcu_kick_nohz_cpu(int cpu); static bool init_nocb_callback_list(struct rcu_data *rdp); #endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index d084ae3f281c..170814dc418f 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -28,6 +28,7 @@ #include <linux/gfp.h> #include <linux/oom.h> #include <linux/smpboot.h> +#include <linux/tick.h> #define RCU_KTHREAD_PRIO 1 @@ -1705,7 +1706,7 @@ static void rcu_prepare_for_idle(int cpu) return; /* If this is a no-CBs CPU, no callbacks, just return. */ - if (is_nocb_cpu(cpu)) + if (rcu_is_nocb_cpu(cpu)) return; /* @@ -1747,7 +1748,7 @@ static void rcu_cleanup_after_idle(int cpu) struct rcu_data *rdp; struct rcu_state *rsp; - if (is_nocb_cpu(cpu)) + if (rcu_is_nocb_cpu(cpu)) return; rcu_try_advance_all_cbs(); for_each_rcu_flavor(rsp) { @@ -2052,7 +2053,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) } /* Is the specified CPU a no-CPUs CPU? */ -static bool is_nocb_cpu(int cpu) +bool rcu_is_nocb_cpu(int cpu) { if (have_rcu_nocb_mask) return cpumask_test_cpu(cpu, rcu_nocb_mask); @@ -2110,7 +2111,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy) { - if (!is_nocb_cpu(rdp->cpu)) + if (!rcu_is_nocb_cpu(rdp->cpu)) return 0; __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); if (__is_kfree_rcu_offset((unsigned long)rhp->func)) @@ -2134,7 +2135,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, long qll = rsp->qlen_lazy; /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ - if (!is_nocb_cpu(smp_processor_id())) + if (!rcu_is_nocb_cpu(smp_processor_id())) return 0; rsp->qlen = 0; rsp->qlen_lazy = 0; @@ -2306,11 +2307,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) { } -static bool is_nocb_cpu(int cpu) -{ - return false; -} - static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy) { @@ -2337,3 +2333,20 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ + +/* + * An adaptive-ticks CPU can potentially execute in kernel mode for an + * arbitrarily long period of time with the scheduling-clock tick turned + * off. RCU will be paying attention to this CPU because it is in the + * kernel, but the CPU cannot be guaranteed to be executing the RCU state + * machine because the scheduling-clock tick has been disabled. Therefore, + * if an adaptive-ticks CPU is failing to respond to the current grace + * period and has not be idle from an RCU perspective, kick it. + */ +static void rcu_kick_nohz_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_cpu(cpu)) + smp_send_reschedule(cpu); +#endif /* #ifdef CONFIG_NO_HZ_FULL */ +} diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 49099e81c87b..cf6c17412932 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -95,7 +95,7 @@ static const struct file_operations rcubarrier_fops = { .open = rcubarrier_open, .read = seq_read, .llseek = no_llseek, - .release = seq_release, + .release = single_release, }; #ifdef CONFIG_RCU_BOOST @@ -206,7 +206,7 @@ static const struct file_operations rcuexp_fops = { .open = rcuexp_open, .read = seq_read, .llseek = no_llseek, - .release = seq_release, + .release = single_release, }; #ifdef CONFIG_RCU_BOOST @@ -306,7 +306,7 @@ static const struct file_operations rcuhier_fops = { .open = rcuhier_open, .read = seq_read, .llseek = no_llseek, - .release = seq_release, + .release = single_release, }; static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp) @@ -348,7 +348,7 @@ static const struct file_operations rcugp_fops = { .open = rcugp_open, .read = seq_read, .llseek = no_llseek, - .release = seq_release, + .release = single_release, }; static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5662f58f0b69..58453b8272fd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -544,7 +544,7 @@ void resched_cpu(int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * In the semi idle case, use the nearest busy cpu for migrating timers * from an idle cpu. This is good for power-savings. @@ -582,7 +582,7 @@ unlock: * account when the CPU goes back to idle and evaluates the timer * wheel for the next timer event. */ -void wake_up_idle_cpu(int cpu) +static void wake_up_idle_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -612,20 +612,56 @@ void wake_up_idle_cpu(int cpu) smp_send_reschedule(cpu); } +static bool wake_up_full_nohz_cpu(int cpu) +{ + if (tick_nohz_full_cpu(cpu)) { + if (cpu != smp_processor_id() || + tick_nohz_tick_stopped()) + smp_send_reschedule(cpu); + return true; + } + + return false; +} + +void wake_up_nohz_cpu(int cpu) +{ + if (!wake_up_full_nohz_cpu(cpu)) + wake_up_idle_cpu(cpu); +} + static inline bool got_nohz_idle_kick(void) { int cpu = smp_processor_id(); return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); } -#else /* CONFIG_NO_HZ */ +#else /* CONFIG_NO_HZ_COMMON */ static inline bool got_nohz_idle_kick(void) { return false; } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ + +#ifdef CONFIG_NO_HZ_FULL +bool sched_can_stop_tick(void) +{ + struct rq *rq; + + rq = this_rq(); + + /* Make sure rq->nr_running update is visible after the IPI */ + smp_rmb(); + + /* More than one running task need preemption */ + if (rq->nr_running > 1) + return false; + + return true; +} +#endif /* CONFIG_NO_HZ_FULL */ void sched_avg_update(struct rq *rq) { @@ -1357,7 +1393,8 @@ static void sched_ttwu_pending(void) void scheduler_ipi(void) { - if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) + if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick() + && !tick_nohz_full_cpu(smp_processor_id())) return; /* @@ -1374,6 +1411,7 @@ void scheduler_ipi(void) * somewhat pessimize the simple resched case. */ irq_enter(); + tick_nohz_full_check(); sched_ttwu_pending(); /* @@ -1855,6 +1893,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) kprobe_flush_task(prev); put_task_struct(prev); } + + tick_nohz_task_switch(current); } #ifdef CONFIG_SMP @@ -2118,7 +2158,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) return load >> FSHIFT; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Handle NO_HZ for the global load-average. * @@ -2344,12 +2384,12 @@ static void calc_global_nohz(void) smp_wmb(); calc_load_idx++; } -#else /* !CONFIG_NO_HZ */ +#else /* !CONFIG_NO_HZ_COMMON */ static inline long calc_load_fold_idle(void) { return 0; } static inline void calc_global_nohz(void) { } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2509,7 +2549,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * There is no sane way to deal with nohz on smp when using jiffies because the * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading @@ -2569,7 +2609,7 @@ void update_cpu_load_nohz(void) } raw_spin_unlock(&this_rq->lock); } -#endif /* CONFIG_NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from scheduler_tick() @@ -2696,7 +2736,34 @@ void scheduler_tick(void) rq->idle_balance = idle_cpu(cpu); trigger_load_balance(rq, cpu); #endif + rq_last_tick_reset(rq); +} + +#ifdef CONFIG_NO_HZ_FULL +/** + * scheduler_tick_max_deferment + * + * Keep at least one tick per second when a single + * active task is running because the scheduler doesn't + * yet completely support full dynticks environment. + * + * This makes sure that uptime, CFS vruntime, load + * balancing, etc... continue to move forward, even + * with a very low granularity. + */ +u64 scheduler_tick_max_deferment(void) +{ + struct rq *rq = this_rq(); + unsigned long next, now = ACCESS_ONCE(jiffies); + + next = rq->last_sched_tick + HZ; + + if (time_before_eq(next, now)) + return 0; + + return jiffies_to_usecs(next - now) * NSEC_PER_USEC; } +#endif notrace unsigned long get_parent_ip(unsigned long addr) { @@ -6951,9 +7018,12 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->cfs_tasks); rq_attach_root(rq, &def_root_domain); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON rq->nohz_flags = 0; #endif +#ifdef CONFIG_NO_HZ_FULL + rq->last_sched_tick = 0; +#endif #endif init_rq_hrtick(rq); atomic_set(&rq->nr_iowait, 0); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8bf7081b1ec5..c61a614465c8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5355,7 +5355,7 @@ out_unlock: return 0; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details * - When one of the busy CPUs notice that there may be an idle rebalancing @@ -5572,9 +5572,9 @@ out: rq->next_balance = next_balance; } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* - * In CONFIG_NO_HZ case, the idle balance kickee will do the + * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the * rebalancing for all the cpus for whom scheduler ticks are stopped. */ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) @@ -5717,7 +5717,7 @@ void trigger_load_balance(struct rq *rq, int cpu) if (time_after_eq(jiffies, rq->next_balance) && likely(!on_null_domain(cpu))) raise_softirq(SCHED_SOFTIRQ); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) nohz_balancer_kick(cpu); #endif @@ -6187,7 +6187,7 @@ __init void init_sched_fair_class(void) #ifdef CONFIG_SMP open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON nohz.next_balance = jiffies; zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); cpu_notifier(sched_ilb_notifier, 0); diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index b8ce77328341..d8da01008d39 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -17,6 +17,7 @@ select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) static void pre_schedule_idle(struct rq *rq, struct task_struct *prev) { idle_exit_fair(rq); + rq_last_tick_reset(rq); } static void post_schedule_idle(struct rq *rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4c225c4c7111..ce39224d6155 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -5,6 +5,7 @@ #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/stop_machine.h> +#include <linux/tick.h> #include "cpupri.h" #include "cpuacct.h" @@ -405,10 +406,13 @@ struct rq { #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long last_load_update_tick; -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON u64 nohz_stamp; unsigned long nohz_flags; #endif +#ifdef CONFIG_NO_HZ_FULL + unsigned long last_sched_tick; +#endif int skip_clock_update; /* capture load from *all* tasks on this cpu: */ @@ -1072,6 +1076,16 @@ static inline u64 steal_ticks(u64 steal) static inline void inc_nr_running(struct rq *rq) { rq->nr_running++; + +#ifdef CONFIG_NO_HZ_FULL + if (rq->nr_running == 2) { + if (tick_nohz_full_cpu(rq->cpu)) { + /* Order rq->nr_running write against the IPI */ + smp_wmb(); + smp_send_reschedule(rq->cpu); + } + } +#endif } static inline void dec_nr_running(struct rq *rq) @@ -1079,6 +1093,13 @@ static inline void dec_nr_running(struct rq *rq) rq->nr_running--; } +static inline void rq_last_tick_reset(struct rq *rq) +{ +#ifdef CONFIG_NO_HZ_FULL + rq->last_sched_tick = jiffies; +#endif +} + extern void update_rq_clock(struct rq *rq); extern void activate_task(struct rq *rq, struct task_struct *p, int flags); @@ -1299,7 +1320,7 @@ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); extern void account_cfs_bandwidth_used(int enabled, int was_enabled); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON enum rq_nohz_flag_bits { NOHZ_TICK_STOPPED, NOHZ_BALANCE_KICK, diff --git a/kernel/softirq.c b/kernel/softirq.c index aa82723c7202..b5197dcb0dad 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -329,6 +329,19 @@ static inline void invoke_softirq(void) wakeup_softirqd(); } +static inline void tick_irq_exit(void) +{ +#ifdef CONFIG_NO_HZ_COMMON + int cpu = smp_processor_id(); + + /* Make sure that timer wheel updates are propagated */ + if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { + if (!in_interrupt()) + tick_nohz_irq_exit(); + } +#endif +} + /* * Exit an interrupt context. Process softirqs if needed and possible: */ @@ -346,11 +359,7 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -#ifdef CONFIG_NO_HZ - /* Make sure that timer wheel updates are propagated */ - if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) - tick_nohz_irq_exit(); -#endif + tick_irq_exit(); rcu_irq_exit(); } diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 24510d84efd7..e4c07b0692bb 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -64,20 +64,88 @@ config GENERIC_CMOS_UPDATE if GENERIC_CLOCKEVENTS menu "Timers subsystem" -# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is +# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independ of this. config TICK_ONESHOT bool -config NO_HZ - bool "Tickless System (Dynamic Ticks)" +config NO_HZ_COMMON + bool depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select TICK_ONESHOT + +choice + prompt "Timer tick handling" + default NO_HZ_IDLE if NO_HZ + +config HZ_PERIODIC + bool "Periodic timer ticks (constant rate, no dynticks)" + help + This option keeps the tick running periodically at a constant + rate, even when the CPU doesn't need it. + +config NO_HZ_IDLE + bool "Idle dynticks system (tickless idle)" + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + select NO_HZ_COMMON + help + This option enables a tickless idle system: timer interrupts + will only trigger on an as-needed basis when the system is idle. + This is usually interesting for energy saving. + + Most of the time you want to say Y here. + +config NO_HZ_FULL + bool "Full dynticks system (tickless)" + # NO_HZ_COMMON dependency + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + # We need at least one periodic CPU for timekeeping + depends on SMP + # RCU_USER_QS dependency + depends on HAVE_CONTEXT_TRACKING + # VIRT_CPU_ACCOUNTING_GEN dependency + depends on 64BIT + select NO_HZ_COMMON + select RCU_USER_QS + select RCU_NOCB_CPU + select VIRT_CPU_ACCOUNTING_GEN + select CONTEXT_TRACKING_FORCE + select IRQ_WORK + help + Adaptively try to shutdown the tick whenever possible, even when + the CPU is running tasks. Typically this requires running a single + task on the CPU. Chances for running tickless are maximized when + the task mostly runs in userspace and has few kernel activity. + + You need to fill up the nohz_full boot parameter with the + desired range of dynticks CPUs. + + This is implemented at the expense of some overhead in user <-> kernel + transitions: syscalls, exceptions and interrupts. Even when it's + dynamically off. + + Say N. + +endchoice + +config NO_HZ_FULL_ALL + bool "Full dynticks system on all CPUs by default" + depends on NO_HZ_FULL + help + If the user doesn't pass the nohz_full boot option to + define the range of full dynticks CPUs, consider that all + CPUs in the system are full dynticks by default. + Note the boot CPU will still be kept outside the range to + handle the timekeeping duty. + +config NO_HZ + bool "Old Idle dynticks config" + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS help - This option enables a tickless system: timer interrupts will - only trigger on an as-needed basis both when the system is - busy and when the system is idle. + This is the old config entry that enables dynticks idle. + We keep it around for a little while to enforce backward + compatibility with older config files. config HIGH_RES_TIMERS bool "High Resolution Timer Support" diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 61d00a8cdf2f..206bbfb34e09 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -693,7 +693,8 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) bc->event_handler = tick_handle_oneshot_broadcast; /* Take the do_timer update */ - tick_do_timer_cpu = cpu; + if (!tick_nohz_full_cpu(cpu)) + tick_do_timer_cpu = cpu; /* * We must be careful here. There might be other CPUs diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 6176a3e45709..5d3fb100bc06 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -163,7 +163,10 @@ static void tick_setup_device(struct tick_device *td, * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - tick_do_timer_cpu = cpu; + if (!tick_nohz_full_cpu(cpu)) + tick_do_timer_cpu = cpu; + else + tick_do_timer_cpu = TICK_DO_TIMER_NONE; tick_next_period = ktime_get(); tick_period = ktime_set(0, NSEC_PER_SEC / HZ); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 225f8bf19095..bc67d4245e1d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -21,11 +21,15 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/irq_work.h> +#include <linux/posix-timers.h> +#include <linux/perf_event.h> #include <asm/irq_regs.h> #include "tick-internal.h" +#include <trace/events/timer.h> + /* * Per cpu nohz control structure */ @@ -104,7 +108,7 @@ static void tick_sched_do_timer(ktime_t now) { int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went @@ -112,7 +116,8 @@ static void tick_sched_do_timer(ktime_t now) * this duty, then the jiffies update is still serialized by * jiffies_lock. */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) + && !tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; #endif @@ -123,7 +128,7 @@ static void tick_sched_do_timer(ktime_t now) static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long @@ -142,10 +147,226 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) profile_tick(CPU_PROFILING); } +#ifdef CONFIG_NO_HZ_FULL +static cpumask_var_t nohz_full_mask; +bool have_nohz_full_mask; + +static bool can_stop_full_tick(void) +{ + WARN_ON_ONCE(!irqs_disabled()); + + if (!sched_can_stop_tick()) { + trace_tick_stop(0, "more than 1 task in runqueue\n"); + return false; + } + + if (!posix_cpu_timers_can_stop_tick(current)) { + trace_tick_stop(0, "posix timers running\n"); + return false; + } + + if (!perf_event_can_stop_tick()) { + trace_tick_stop(0, "perf events running\n"); + return false; + } + + /* sched_clock_tick() needs us? */ +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + /* + * TODO: kick full dynticks CPUs when + * sched_clock_stable is set. + */ + if (!sched_clock_stable) { + trace_tick_stop(0, "unstable sched clock\n"); + return false; + } +#endif + + return true; +} + +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); + +/* + * Re-evaluate the need for the tick on the current CPU + * and restart it if necessary. + */ +void tick_nohz_full_check(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + if (tick_nohz_full_cpu(smp_processor_id())) { + if (ts->tick_stopped && !is_idle_task(current)) { + if (!can_stop_full_tick()) + tick_nohz_restart_sched_tick(ts, ktime_get()); + } + } +} + +static void nohz_full_kick_work_func(struct irq_work *work) +{ + tick_nohz_full_check(); +} + +static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { + .func = nohz_full_kick_work_func, +}; + +/* + * Kick the current CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick(void) +{ + if (tick_nohz_full_cpu(smp_processor_id())) + irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + +static void nohz_full_kick_ipi(void *info) +{ + tick_nohz_full_check(); +} + +/* + * Kick all full dynticks CPUs in order to force these to re-evaluate + * their dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick_all(void) +{ + if (!have_nohz_full_mask) + return; + + preempt_disable(); + smp_call_function_many(nohz_full_mask, + nohz_full_kick_ipi, NULL, false); + preempt_enable(); +} + +/* + * Re-evaluate the need for the tick as we switch the current task. + * It might need the tick due to per task/process properties: + * perf events, posix cpu timers, ... + */ +void tick_nohz_task_switch(struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + + if (!tick_nohz_full_cpu(smp_processor_id())) + goto out; + + if (tick_nohz_tick_stopped() && !can_stop_full_tick()) + tick_nohz_full_kick(); + +out: + local_irq_restore(flags); +} + +int tick_nohz_full_cpu(int cpu) +{ + if (!have_nohz_full_mask) + return 0; + + return cpumask_test_cpu(cpu, nohz_full_mask); +} + +/* Parse the boot-time nohz CPU list from the kernel parameters. */ +static int __init tick_nohz_full_setup(char *str) +{ + int cpu; + + alloc_bootmem_cpumask_var(&nohz_full_mask); + if (cpulist_parse(str, nohz_full_mask) < 0) { + pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); + return 1; + } + + cpu = smp_processor_id(); + if (cpumask_test_cpu(cpu, nohz_full_mask)) { + pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + have_nohz_full_mask = true; + + return 1; +} +__setup("nohz_full=", tick_nohz_full_setup); + +static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_PREPARE: + /* + * If we handle the timekeeping duty for full dynticks CPUs, + * we can't safely shutdown that CPU. + */ + if (have_nohz_full_mask && tick_do_timer_cpu == cpu) + return -EINVAL; + break; + } + return NOTIFY_OK; +} + +/* + * Worst case string length in chunks of CPU range seems 2 steps + * separations: 0,2,4,6,... + * This is NR_CPUS + sizeof('\0') + */ +static char __initdata nohz_full_buf[NR_CPUS + 1]; + +static int tick_nohz_init_all(void) +{ + int err = -1; + +#ifdef CONFIG_NO_HZ_FULL_ALL + if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { + pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); + return err; + } + err = 0; + cpumask_setall(nohz_full_mask); + cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); + have_nohz_full_mask = true; +#endif + return err; +} + +void __init tick_nohz_init(void) +{ + int cpu; + + if (!have_nohz_full_mask) { + if (tick_nohz_init_all() < 0) + return; + } + + cpu_notifier(tick_nohz_cpu_down_callback, 0); + + /* Make sure full dynticks CPU are also RCU nocbs */ + for_each_cpu(cpu, nohz_full_mask) { + if (!rcu_is_nocb_cpu(cpu)) { + pr_warning("NO_HZ: CPU %d is not RCU nocb: " + "cleared from nohz_full range", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + } + + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); + pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); +} +#else +#define have_nohz_full_mask (0) +#endif + /* * NOHZ - aka dynamic tick functionality */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ @@ -345,11 +566,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, delta_jiffies = rcu_delta_jiffies; } } + /* - * Do not stop the tick, if we are only one off - * or if the cpu is required for rcu + * Do not stop the tick, if we are only one off (or less) + * or if the cpu is required for RCU: */ - if (!ts->tick_stopped && delta_jiffies == 1) + if (!ts->tick_stopped && delta_jiffies <= 1) goto out; /* Schedule the tick, if we are at least one jiffie off */ @@ -378,6 +600,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, time_delta = KTIME_MAX; } +#ifdef CONFIG_NO_HZ_FULL + if (!ts->inidle) { + time_delta = min(time_delta, + scheduler_tick_max_deferment()); + } +#endif + /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals @@ -421,6 +650,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; + trace_tick_stop(1, " "); } /* @@ -457,6 +687,24 @@ out: return ret; } +static void tick_nohz_full_stop_tick(struct tick_sched *ts) +{ +#ifdef CONFIG_NO_HZ_FULL + int cpu = smp_processor_id(); + + if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) + return; + + if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) + return; + + if (!can_stop_full_tick()) + return; + + tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); +#endif +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -489,6 +737,21 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } + if (have_nohz_full_mask) { + /* + * Keep the tick alive to guarantee timekeeping progression + * if there are full dynticks CPUs around + */ + if (tick_do_timer_cpu == cpu) + return false; + /* + * Boot safety: make sure the timekeeping duty has been + * assigned before entering dyntick-idle mode, + */ + if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) + return false; + } + return true; } @@ -568,12 +831,13 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); - if (!ts->inidle) - return; - - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); - __tick_nohz_idle_enter(ts); + if (ts->inidle) { + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); + __tick_nohz_idle_enter(ts); + } else { + tick_nohz_full_stop_tick(ts); + } } /** @@ -802,7 +1066,7 @@ static inline void tick_check_nohz(int cpu) static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_check_nohz(int cpu) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() @@ -887,14 +1151,14 @@ void tick_setup_sched_timer(void) now = ktime_get(); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); diff --git a/kernel/timer.c b/kernel/timer.c index 09bca8ce9771..a860bba34412 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -739,7 +739,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, cpu = smp_processor_id(); -#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) cpu = get_nohz_timer_target(); #endif @@ -931,14 +931,14 @@ void add_timer_on(struct timer_list *timer, int cpu) debug_activate(timer, timer->expires); internal_add_timer(base, timer); /* - * Check whether the other CPU is idle and needs to be - * triggered to reevaluate the timer wheel when nohz is - * active. We are protected against the other CPU fiddling + * Check whether the other CPU is in dynticks mode and needs + * to be triggered to reevaluate the timer wheel. + * We are protected against the other CPU fiddling * with the timer by holding the timer base lock. This also - * makes sure that a CPU on the way to idle can not evaluate - * the timer wheel. + * makes sure that a CPU on the way to stop its tick can not + * evaluate the timer wheel. */ - wake_up_idle_cpu(cpu); + wake_up_nohz_cpu(cpu); spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); @@ -1189,7 +1189,7 @@ static inline void __run_timers(struct tvec_base *base) spin_unlock_irq(&base->lock); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Find out when the next timer event is due to happen. This * is used on S/390 to stop all activity when a CPU is idle. diff --git a/lib/oid_registry.c b/lib/oid_registry.c index d8de11f45908..318f382a010d 100644 --- a/lib/oid_registry.c +++ b/lib/oid_registry.c @@ -9,6 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ +#include <linux/module.h> #include <linux/export.h> #include <linux/oid_registry.h> #include <linux/kernel.h> @@ -16,6 +17,10 @@ #include <linux/bug.h> #include "oid_registry_data.c" +MODULE_DESCRIPTION("OID Registry"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + /** * look_up_OID - Find an OID registration for the specified data * @data: Binary representation of the OID diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8af508536d36..3a8c8fd63c88 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -628,7 +628,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, netdev_features_t features) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - u32 old_features = features; + netdev_features_t old_features = features; features &= real_dev->vlan_features; features |= NETIF_F_RXCSUM; diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index c3530a81a33b..950663d4d330 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -107,7 +107,7 @@ static void br_tcn_timer_expired(unsigned long arg) br_debug(br, "tcn timer expired\n"); spin_lock(&br->lock); - if (br->dev->flags & IFF_UP) { + if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) { br_transmit_tcn(br); mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); diff --git a/net/core/dev.c b/net/core/dev.c index 4040673f806a..40b1fadaf637 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2456,7 +2456,7 @@ EXPORT_SYMBOL(netif_skb_features); * 2. skb is fragmented and the device does not support SG. */ static inline int skb_needs_linearize(struct sk_buff *skb, - int features) + netdev_features_t features) { return skb_is_nonlinear(skb) && ((skb_has_frag_list(skb) && diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 5a934ef90f8b..22efdaa76ebf 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1421,7 +1421,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; - u32 old_features; + netdev_features_t old_features; if (!dev || !netif_device_present(dev)) return -ENODEV; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c61b3bb87a16..d01be2a3ae53 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1293,6 +1293,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | 0))) goto out; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index d2d5a99fba09..cc22363965d2 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -121,6 +121,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, int ghl = GRE_HEADER_SECTION; struct gre_base_hdr *greh; int mac_len = skb->mac_len; + __be16 protocol = skb->protocol; int tnl_hlen; bool csum; @@ -150,7 +151,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* setup inner skb. */ if (greh->protocol == htons(ETH_P_TEB)) { - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); skb->protocol = eth->h_proto; } else { skb->protocol = greh->protocol; @@ -199,6 +200,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb->mac_len = mac_len; + skb->protocol = protocol; } while ((skb = skb->next)); out: return segs; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6abbe6455129..0ae038a4c7a8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2311,8 +2311,10 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); int mac_len = skb->mac_len; int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - int outer_hlen; + struct ethhdr *inner_eth = (struct ethhdr *)skb_inner_mac_header(skb); + __be16 protocol = skb->protocol; netdev_features_t enc_features; + int outer_hlen; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; @@ -2322,6 +2324,8 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); + inner_eth = (struct ethhdr *)skb_mac_header(skb); + skb->protocol = inner_eth->h_proto; /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); @@ -2358,6 +2362,7 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, } skb->ip_summed = CHECKSUM_NONE; + skb->protocol = protocol; } while ((skb = skb->next)); out: return segs; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index dd5cd49b0e09..8ec1bca7f859 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -742,36 +742,33 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1, smp_rmb(); - if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { + /* We could have just memset this but we will lose the + * flexibility of making the priv area sticky + */ - /* We could have just memset this but we will lose the - * flexibility of making the priv area sticky - */ - BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; - BLOCK_NUM_PKTS(pbd1) = 0; - BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - getnstimeofday(&ts); - h1->ts_first_pkt.ts_sec = ts.tv_sec; - h1->ts_first_pkt.ts_nsec = ts.tv_nsec; - pkc1->pkblk_start = (char *)pbd1; - pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; - pbd1->version = pkc1->version; - pkc1->prev = pkc1->nxt_offset; - pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; - prb_thaw_queue(pkc1); - _prb_refresh_rx_retire_blk_timer(pkc1); + BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; + BLOCK_NUM_PKTS(pbd1) = 0; + BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - smp_wmb(); + getnstimeofday(&ts); - return; - } + h1->ts_first_pkt.ts_sec = ts.tv_sec; + h1->ts_first_pkt.ts_nsec = ts.tv_nsec; - WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", - pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); - dump_stack(); - BUG(); + pkc1->pkblk_start = (char *)pbd1; + pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + + BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; + + pbd1->version = pkc1->version; + pkc1->prev = pkc1->nxt_offset; + pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; + + prb_thaw_queue(pkc1); + _prb_refresh_rx_retire_blk_timer(pkc1); + + smp_wmb(); } /* @@ -862,10 +859,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, prb_close_block(pkc, pbd, po, status); return; } - - WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); - dump_stack(); - BUG(); } static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 25e159c2feb4..e5f3da507823 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -584,8 +584,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, { int bp_index; - /* - * Prepare broadcast link message for reliable transmission, + /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; * preparation is skipped for broadcast link protocol messages * since they are sent in an unreliable manner and don't need it @@ -611,30 +610,43 @@ static int tipc_bcbearer_send(struct sk_buff *buf, for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; + struct tipc_bearer *b = p; + struct sk_buff *tbuf; if (!p) - break; /* no more bearers to try */ + break; /* No more bearers to try */ + + if (tipc_bearer_blocked(p)) { + if (!s || tipc_bearer_blocked(s)) + continue; /* Can't use either bearer */ + b = s; + } - tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new); + tipc_nmap_diff(&bcbearer->remains, &b->nodes, + &bcbearer->remains_new); if (bcbearer->remains_new.count == bcbearer->remains.count) - continue; /* bearer pair doesn't add anything */ + continue; /* Nothing added by bearer pair */ - if (!tipc_bearer_blocked(p)) - tipc_bearer_send(p, buf, &p->bcast_addr); - else if (s && !tipc_bearer_blocked(s)) - /* unable to send on primary bearer */ - tipc_bearer_send(s, buf, &s->bcast_addr); - else - /* unable to send on either bearer */ - continue; + if (bp_index == 0) { + /* Use original buffer for first bearer */ + tipc_bearer_send(b, buf, &b->bcast_addr); + } else { + /* Avoid concurrent buffer access */ + tbuf = pskb_copy(buf, GFP_ATOMIC); + if (!tbuf) + break; + tipc_bearer_send(b, tbuf, &b->bcast_addr); + kfree_skb(tbuf); /* Bearer keeps a clone */ + } + /* Swap bearers for next packet */ if (s) { bcbearer->bpairs[bp_index].primary = s; bcbearer->bpairs[bp_index].secondary = p; } if (bcbearer->remains_new.count == 0) - break; /* all targets reached */ + break; /* All targets reached */ bcbearer->remains = bcbearer->remains_new; } diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 0e801c3cdaf8..d5d859c80729 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -211,7 +211,8 @@ $(obj)/%.i: $(src)/%.c FORCE cmd_gensymtypes = \ $(CPP) -D__GENKSYMS__ $(c_flags) $< | \ - $(GENKSYMS) $(if $(1), -T $(2)) -a $(ARCH) \ + $(GENKSYMS) $(if $(1), -T $(2)) \ + $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ $(if $(KBUILD_PRESERVE),-p) \ -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null)) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 3e73dfd838cd..51bb3de680b6 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -119,13 +119,6 @@ _c_flags += $(if $(patsubst n%,, \ $(CFLAGS_GCOV)) endif -ifdef CONFIG_SYMBOL_PREFIX -_sym_flags = -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX)) -_cpp_flags += $(_sym_flags) -_a_flags += $(_sym_flags) -endif - - # If building the kernel in a separate objtree expand all occurrences # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/'). diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index cf82c832458f..8dcdca27d836 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -60,7 +60,8 @@ kernelsymfile := $(objtree)/Module.symvers modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers # Step 1), find all modules listed in $(MODVERDIR)/ -__modules := $(sort $(shell grep -h '\.ko$$' /dev/null $(wildcard $(MODVERDIR)/*.mod))) +MODLISTCMD := find $(MODVERDIR) -name '*.mod' | xargs -r grep -h '\.ko$$' | sort -u +__modules := $(shell $(MODLISTCMD)) modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) # Stop after building .o files if NOFINAL is set. Makes compile tests quicker @@ -78,12 +79,13 @@ modpost = scripts/mod/modpost \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) +# We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules - cmd_modpost = $(modpost) -s + cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) -s -T - PHONY += __modpost __modpost: $(modules:.ko=.o) FORCE - $(call cmd,modpost) $(wildcard vmlinux) $(filter-out FORCE,$^) + $(call cmd,modpost) $(wildcard vmlinux) quiet_cmd_kernel-mod = MODPOST $@ cmd_kernel-mod = $(modpost) $@ diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index d25e4a118d37..88632df4381b 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -45,7 +45,6 @@ int in_source_file; static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types, flag_preserve, flag_warnings; -static const char *arch = ""; static const char *mod_prefix = ""; static int errors; @@ -731,7 +730,7 @@ static void genksyms_usage(void) { fputs("Usage:\n" "genksyms [-adDTwqhV] > /path/to/.tmp_obj.ver\n" "\n" #ifdef __GNU_LIBRARY__ - " -a, --arch Select architecture\n" + " -s, --symbol-prefix Select symbol prefix\n" " -d, --debug Increment the debug level (repeatable)\n" " -D, --dump Dump expanded symbol defs (for debugging only)\n" " -r, --reference file Read reference symbols from a file\n" @@ -742,7 +741,7 @@ static void genksyms_usage(void) " -h, --help Print this message\n" " -V, --version Print the release version\n" #else /* __GNU_LIBRARY__ */ - " -a Select architecture\n" + " -s Select symbol prefix\n" " -d Increment the debug level (repeatable)\n" " -D Dump expanded symbol defs (for debugging only)\n" " -r file Read reference symbols from a file\n" @@ -763,7 +762,7 @@ int main(int argc, char **argv) #ifdef __GNU_LIBRARY__ struct option long_opts[] = { - {"arch", 1, 0, 'a'}, + {"symbol-prefix", 1, 0, 's'}, {"debug", 0, 0, 'd'}, {"warnings", 0, 0, 'w'}, {"quiet", 0, 0, 'q'}, @@ -776,14 +775,14 @@ int main(int argc, char **argv) {0, 0, 0, 0} }; - while ((o = getopt_long(argc, argv, "a:dwqVDr:T:ph", + while ((o = getopt_long(argc, argv, "s:dwqVDr:T:ph", &long_opts[0], NULL)) != EOF) #else /* __GNU_LIBRARY__ */ - while ((o = getopt(argc, argv, "a:dwqVDr:T:ph")) != EOF) + while ((o = getopt(argc, argv, "s:dwqVDr:T:ph")) != EOF) #endif /* __GNU_LIBRARY__ */ switch (o) { - case 'a': - arch = optarg; + case 's': + mod_prefix = optarg; break; case 'd': flag_debug++; @@ -826,9 +825,6 @@ int main(int argc, char **argv) genksyms_usage(); return 1; } - if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0) || - (strcmp(arch, "metag") == 0)) - mod_prefix = "_"; { extern int yydebug; extern int yy_flex_debug; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 3d569d6022c2..014994936b1c 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -74,9 +74,8 @@ kallsyms() info KSYM ${2} local kallsymopt; - if [ -n "${CONFIG_SYMBOL_PREFIX}" ]; then - kallsymopt="${kallsymopt} \ - --symbol-prefix=${CONFIG_SYMBOL_PREFIX}" + if [ -n "${CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX}" ]; then + kallsymopt="${kallsymopt} --symbol-prefix=_" fi if [ -n "${CONFIG_KALLSYMS_ALL}" ]; then diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 78b30c1548e9..a4be8e112bb6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -15,17 +15,12 @@ #include <stdio.h> #include <ctype.h> #include <string.h> +#include <limits.h> +#include <stdbool.h> #include "modpost.h" #include "../../include/generated/autoconf.h" #include "../../include/linux/license.h" - -/* Some toolchains use a `_' prefix for all user symbols. */ -#ifdef CONFIG_SYMBOL_PREFIX -#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX -#else -#define MODULE_SYMBOL_PREFIX "" -#endif - +#include "../../include/linux/export.h" /* Are we using CONFIG_MODVERSIONS? */ int modversions = 0; @@ -85,6 +80,14 @@ PRINTF void merror(const char *fmt, ...) va_end(arglist); } +static inline bool strends(const char *str, const char *postfix) +{ + if (strlen(str) < strlen(postfix)) + return false; + + return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; +} + static int is_vmlinux(const char *modname) { const char *myname; @@ -120,22 +123,20 @@ static struct module *find_module(char *modname) return mod; } -static struct module *new_module(char *modname) +static struct module *new_module(const char *modname) { struct module *mod; - char *p, *s; + char *p; mod = NOFAIL(malloc(sizeof(*mod))); memset(mod, 0, sizeof(*mod)); p = NOFAIL(strdup(modname)); /* strip trailing .o */ - s = strrchr(p, '.'); - if (s != NULL) - if (strcmp(s, ".o") == 0) { - *s = '\0'; - mod->is_dot_o = 1; - } + if (strends(p, ".o")) { + p[strlen(p) - 2] = '\0'; + mod->is_dot_o = 1; + } /* add to list */ mod->name = p; @@ -562,7 +563,7 @@ static void parse_elf_finish(struct elf_info *info) static int ignore_undef_symbol(struct elf_info *info, const char *symname) { /* ignore __this_module, it will be resolved shortly */ - if (strcmp(symname, MODULE_SYMBOL_PREFIX "__this_module") == 0) + if (strcmp(symname, VMLINUX_SYMBOL_STR(__this_module)) == 0) return 1; /* ignore global offset table */ if (strcmp(symname, "_GLOBAL_OFFSET_TABLE_") == 0) @@ -583,8 +584,8 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname) return 0; } -#define CRC_PFX MODULE_SYMBOL_PREFIX "__crc_" -#define KSYMTAB_PFX MODULE_SYMBOL_PREFIX "__ksymtab_" +#define CRC_PFX VMLINUX_SYMBOL_STR(__crc_) +#define KSYMTAB_PFX VMLINUX_SYMBOL_STR(__ksymtab_) static void handle_modversions(struct module *mod, struct elf_info *info, Elf_Sym *sym, const char *symname) @@ -637,14 +638,15 @@ static void handle_modversions(struct module *mod, struct elf_info *info, } #endif - if (memcmp(symname, MODULE_SYMBOL_PREFIX, - strlen(MODULE_SYMBOL_PREFIX)) == 0) { - mod->unres = - alloc_symbol(symname + - strlen(MODULE_SYMBOL_PREFIX), - ELF_ST_BIND(sym->st_info) == STB_WEAK, - mod->unres); - } +#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX + if (symname[0] != '_') + break; + else + symname++; +#endif + mod->unres = alloc_symbol(symname, + ELF_ST_BIND(sym->st_info) == STB_WEAK, + mod->unres); break; default: /* All exported symbols */ @@ -652,9 +654,9 @@ static void handle_modversions(struct module *mod, struct elf_info *info, sym_add_exported(symname + strlen(KSYMTAB_PFX), mod, export); } - if (strcmp(symname, MODULE_SYMBOL_PREFIX "init_module") == 0) + if (strcmp(symname, VMLINUX_SYMBOL_STR(init_module)) == 0) mod->has_init = 1; - if (strcmp(symname, MODULE_SYMBOL_PREFIX "cleanup_module") == 0) + if (strcmp(symname, VMLINUX_SYMBOL_STR(cleanup_module)) == 0) mod->has_cleanup = 1; break; } @@ -1762,6 +1764,27 @@ static void read_symbols(char *modname) mod->unres = alloc_symbol("module_layout", 0, mod->unres); } +static void read_symbols_from_files(const char *filename) +{ + FILE *in = stdin; + char fname[PATH_MAX]; + + if (strcmp(filename, "-") != 0) { + in = fopen(filename, "r"); + if (!in) + fatal("Can't open filenames file %s: %m", filename); + } + + while (fgets(fname, PATH_MAX, in) != NULL) { + if (strends(fname, "\n")) + fname[strlen(fname)-1] = '\0'; + read_symbols(fname); + } + + if (in != stdin) + fclose(in); +} + #define SZ 500 /* We first write the generated file into memory using the @@ -1934,7 +1957,8 @@ static int add_versions(struct buffer *b, struct module *mod) s->name, mod->name); continue; } - buf_printf(b, "\t{ %#8x, \"%s\" },\n", s->crc, s->name); + buf_printf(b, "\t{ %#8x, __VMLINUX_SYMBOL_STR(%s) },\n", + s->crc, s->name); } buf_printf(b, "};\n"); @@ -2122,13 +2146,13 @@ int main(int argc, char **argv) struct module *mod; struct buffer buf = { }; char *kernel_read = NULL, *module_read = NULL; - char *dump_write = NULL; + char *dump_write = NULL, *files_source = NULL; int opt; int err; struct ext_sym_list *extsym_iter; struct ext_sym_list *extsym_start = NULL; - while ((opt = getopt(argc, argv, "i:I:e:msSo:awM:K:")) != -1) { + while ((opt = getopt(argc, argv, "i:I:e:msST:o:awM:K:")) != -1) { switch (opt) { case 'i': kernel_read = optarg; @@ -2160,6 +2184,9 @@ int main(int argc, char **argv) case 'S': sec_mismatch_verbose = 0; break; + case 'T': + files_source = optarg; + break; case 'w': warn_unresolved = 1; break; @@ -2182,6 +2209,9 @@ int main(int argc, char **argv) while (optind < argc) read_symbols(argv[optind++]); + if (files_source) + read_symbols_from_files(files_source); + for (mod = modules; mod; mod = mod->next) { if (mod->skip) continue; diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index d01b24b72c61..779262f59e25 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQ_ROUTING + bool + config HAVE_KVM_EVENTFD bool select EVENTFD diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 3642239252b0..8db43701016f 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -80,11 +80,12 @@ kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, spin_lock(&assigned_dev->intx_mask_lock); if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, vector, 1); + assigned_dev->irq_source_id, vector, 1, + false); spin_unlock(&assigned_dev->intx_mask_lock); } else kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - vector, 1); + vector, 1, false); } static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) @@ -165,7 +166,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) container_of(kian, struct kvm_assigned_dev_kernel, ack_notifier); - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); spin_lock(&dev->intx_mask_lock); @@ -188,7 +189,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) if (reassert) kvm_set_irq(dev->kvm, dev->irq_source_id, - dev->guest_irq, 1); + dev->guest_irq, 1, false); } spin_unlock(&dev->intx_mask_lock); @@ -202,7 +203,7 @@ static void deassign_guest_irq(struct kvm *kvm, &assigned_dev->ack_notifier); kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 0); + assigned_dev->guest_irq, 0, false); if (assigned_dev->irq_source_id != -1) kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); @@ -901,7 +902,7 @@ static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { kvm_set_irq(match->kvm, match->irq_source_id, - match->guest_irq, 0); + match->guest_irq, 0, false); /* * Masking at hardware-level is performed on demand, * i.e. when an IRQ actually arrives at the host. @@ -982,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } -#ifdef KVM_CAP_IRQ_ROUTING - case KVM_SET_GSI_ROUTING: { - struct kvm_irq_routing routing; - struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries; - - r = -EFAULT; - if (copy_from_user(&routing, argp, sizeof(routing))) - goto out; - r = -EINVAL; - if (routing.nr >= KVM_MAX_IRQ_ROUTES) - goto out; - if (routing.flags) - goto out; - r = -ENOMEM; - entries = vmalloc(routing.nr * sizeof(*entries)); - if (!entries) - goto out; - r = -EFAULT; - urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; - r = kvm_set_irq_routing(kvm, entries, routing.nr, - routing.flags); - out_free_irq_routing: - vfree(entries); - break; - } -#endif /* KVM_CAP_IRQ_ROUTING */ #ifdef __KVM_HAVE_MSIX case KVM_ASSIGN_SET_MSIX_NR: { struct kvm_assigned_msix_nr entry_nr; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index adb17f266b28..64ee720b75c7 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,7 +35,7 @@ #include "iodev.h" -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -100,11 +100,13 @@ irqfd_inject(struct work_struct *work) struct kvm *kvm = irqfd->kvm; if (!irqfd->resampler) { - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1, + false); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0, + false); } else kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - irqfd->gsi, 1); + irqfd->gsi, 1, false); } /* @@ -121,7 +123,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) resampler = container_of(kian, struct _irqfd_resampler, notifier); kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - resampler->notifier.gsi, 0); + resampler->notifier.gsi, 0, false); rcu_read_lock(); @@ -146,7 +148,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) list_del(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, - resampler->notifier.gsi, 0); + resampler->notifier.gsi, 0, false); kfree(resampler); } @@ -225,7 +227,8 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) irq = rcu_dereference(irqfd->irq_entry); /* An event has been signaled, inject an interrupt */ if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); + kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false); else schedule_work(&irqfd->inject); rcu_read_unlock(); @@ -430,7 +433,7 @@ fail: void kvm_eventfd_init(struct kvm *kvm) { -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -439,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING /* * shutdown any irqfd's that match fd+gsi */ @@ -543,7 +546,7 @@ void kvm_irq_routing_update(struct kvm *kvm, * aggregated from all vm* instances. We need our own isolated single-thread * queue to prevent deadlock against flushing the normal work-queue. */ -static int __init irqfd_module_init(void) +int kvm_irqfd_init(void) { irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup"); if (!irqfd_cleanup_wq) @@ -552,13 +555,10 @@ static int __init irqfd_module_init(void) return 0; } -static void __exit irqfd_module_exit(void) +void kvm_irqfd_exit(void) { destroy_workqueue(irqfd_cleanup_wq); } - -module_init(irqfd_module_init); -module_exit(irqfd_module_exit); #endif /* @@ -577,6 +577,7 @@ struct _ioeventfd { struct eventfd_ctx *eventfd; u64 datamatch; struct kvm_io_device dev; + u8 bus_idx; bool wildcard; }; @@ -669,7 +670,8 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) struct _ioeventfd *_p; list_for_each_entry(_p, &kvm->ioeventfds, list) - if (_p->addr == p->addr && _p->length == p->length && + if (_p->bus_idx == p->bus_idx && + _p->addr == p->addr && _p->length == p->length && (_p->wildcard || p->wildcard || _p->datamatch == p->datamatch)) return true; @@ -677,15 +679,24 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) return false; } +static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) +{ + if (flags & KVM_IOEVENTFD_FLAG_PIO) + return KVM_PIO_BUS; + if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) + return KVM_VIRTIO_CCW_NOTIFY_BUS; + return KVM_MMIO_BUS; +} + static int kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { - int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; + enum kvm_bus bus_idx; struct _ioeventfd *p; struct eventfd_ctx *eventfd; int ret; + bus_idx = ioeventfd_bus_from_flags(args->flags); /* must be natural-word sized */ switch (args->len) { case 1: @@ -717,6 +728,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) INIT_LIST_HEAD(&p->list); p->addr = args->addr; + p->bus_idx = bus_idx; p->length = args->len; p->eventfd = eventfd; @@ -760,12 +772,12 @@ fail: static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { - int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; - enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; + enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; + bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -775,7 +787,8 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); - if (p->eventfd != eventfd || + if (p->bus_idx != bus_idx || + p->eventfd != eventfd || p->addr != args->addr || p->length != args->len || p->wildcard != wildcard) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 5ba005c00e2f..2d682977ce82 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -50,7 +50,8 @@ #else #define ioapic_debug(fmt, arg...) #endif -static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq); +static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, + bool line_status); static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, unsigned long addr, @@ -90,7 +91,80 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, return result; } -static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) +static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) +{ + ioapic->rtc_status.pending_eoi = 0; + bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); +} + +static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + bool new_val, old_val; + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + + e = &ioapic->redirtbl[RTC_GSI]; + if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, + e->fields.dest_mode)) + return; + + new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); + old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + + if (new_val == old_val) + return; + + if (new_val) { + __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi++; + } else { + __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi--; + } + + WARN_ON(ioapic->rtc_status.pending_eoi < 0); +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + + spin_lock(&ioapic->lock); + __rtc_irq_eoi_tracking_restore_one(vcpu); + spin_unlock(&ioapic->lock); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) +{ + struct kvm_vcpu *vcpu; + int i; + + if (RTC_GSI >= IOAPIC_NUM_PINS) + return; + + rtc_irq_eoi_tracking_reset(ioapic); + kvm_for_each_vcpu(i, vcpu, ioapic->kvm) + __rtc_irq_eoi_tracking_restore_one(vcpu); +} + +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) + --ioapic->rtc_status.pending_eoi; + + WARN_ON(ioapic->rtc_status.pending_eoi < 0); +} + +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) +{ + if (ioapic->rtc_status.pending_eoi > 0) + return true; /* coalesced */ + + return false; +} + +static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, + bool line_status) { union kvm_ioapic_redirect_entry *pent; int injected = -1; @@ -98,7 +172,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) pent = &ioapic->redirtbl[idx]; if (!pent->fields.mask) { - injected = ioapic_deliver(ioapic, idx); + injected = ioapic_deliver(ioapic, idx, line_status); if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) pent->fields.remote_irr = 1; } @@ -119,41 +193,48 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic) smp_wmb(); } -void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - u64 *eoi_exit_bitmap) +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; union kvm_ioapic_redirect_entry *e; - struct kvm_lapic_irq irqe; int index; spin_lock(&ioapic->lock); - /* traverse ioapic entry to set eoi exit bitmap*/ for (index = 0; index < IOAPIC_NUM_PINS; index++) { e = &ioapic->redirtbl[index]; if (!e->fields.mask && (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, - index))) { - irqe.dest_id = e->fields.dest_id; - irqe.vector = e->fields.vector; - irqe.dest_mode = e->fields.dest_mode; - irqe.delivery_mode = e->fields.delivery_mode << 8; - kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap); + index) || index == RTC_GSI)) { + if (kvm_apic_match_dest(vcpu, NULL, 0, + e->fields.dest_id, e->fields.dest_mode)) { + __set_bit(e->fields.vector, + (unsigned long *)eoi_exit_bitmap); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) + __set_bit(e->fields.vector, + (unsigned long *)tmr); + } } } spin_unlock(&ioapic->lock); } -EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap); -void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm) +#ifdef CONFIG_X86 +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - if (!kvm_apic_vid_enabled(kvm) || !ioapic) + if (!ioapic) return; - kvm_make_update_eoibitmap_request(kvm); + kvm_make_scan_ioapic_request(kvm); } +#else +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +{ + return; +} +#endif static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { @@ -195,16 +276,17 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index); - kvm_ioapic_make_eoibitmap_request(ioapic->kvm); + ioapic_service(ioapic, index, false); + kvm_vcpu_request_scan_ioapic(ioapic->kvm); break; } } -static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) +static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) { union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; struct kvm_lapic_irq irqe; + int ret; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", @@ -220,11 +302,19 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) irqe.level = 1; irqe.shorthand = 0; - return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); + if (irq == RTC_GSI && line_status) { + BUG_ON(ioapic->rtc_status.pending_eoi != 0); + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, + ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi = ret; + } else + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + + return ret; } int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level) + int level, bool line_status) { u32 old_irr; u32 mask = 1 << irq; @@ -244,13 +334,20 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, ret = 1; } else { int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; /* coalesced */ + goto out; + } ioapic->irr |= mask; if ((edge && old_irr != ioapic->irr) || (!edge && !entry.fields.remote_irr)) - ret = ioapic_service(ioapic, irq); + ret = ioapic_service(ioapic, irq, line_status); else ret = 0; /* report coalesced interrupt */ } +out: trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); spin_unlock(&ioapic->lock); @@ -267,8 +364,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) spin_unlock(&ioapic->lock); } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, - int trigger_mode) +static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, int vector, int trigger_mode) { int i; @@ -278,6 +375,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, if (ent->fields.vector != vector) continue; + if (i == RTC_GSI) + rtc_irq_eoi(ioapic, vcpu); /* * We are dropping lock while calling ack notifiers because ack * notifier callbacks for assigned devices call into IOAPIC @@ -296,7 +395,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; if (!ent->fields.mask && (ioapic->irr & (1 << i))) - ioapic_service(ioapic, i); + ioapic_service(ioapic, i, false); } } @@ -307,12 +406,12 @@ bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) return test_bit(vector, ioapic->handled_vectors); } -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { - struct kvm_ioapic *ioapic = kvm->arch.vioapic; + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; spin_lock(&ioapic->lock); - __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); + __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); spin_unlock(&ioapic->lock); } @@ -410,7 +509,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, break; #ifdef CONFIG_IA64 case IOAPIC_REG_EOI: - __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); + __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG); break; #endif @@ -431,6 +530,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->ioregsel = 0; ioapic->irr = 0; ioapic->id = 0; + rtc_irq_eoi_tracking_reset(ioapic); update_handled_vectors(ioapic); } @@ -496,7 +596,8 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); update_handled_vectors(ioapic); - kvm_ioapic_make_eoibitmap_request(kvm); + kvm_vcpu_request_scan_ioapic(kvm); + kvm_rtc_eoi_tracking_restore_all(ioapic); spin_unlock(&ioapic->lock); return 0; } diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 0400a466c50c..615d8c995c3c 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -34,6 +34,17 @@ struct kvm_vcpu; #define IOAPIC_INIT 0x5 #define IOAPIC_EXTINT 0x7 +#ifdef CONFIG_X86 +#define RTC_GSI 8 +#else +#define RTC_GSI -1U +#endif + +struct rtc_status { + int pending_eoi; + DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); +}; + struct kvm_ioapic { u64 base_address; u32 ioregsel; @@ -47,6 +58,7 @@ struct kvm_ioapic { void (*ack_notifier)(void *opaque, int irq); spinlock_t lock; DECLARE_BITMAP(handled_vectors, 256); + struct rtc_status rtc_status; }; #ifdef DEBUG @@ -67,24 +79,25 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, + int trigger_mode); bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, - int level); + int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq); + struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm); -void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, - u64 *eoi_exit_bitmap); - +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr); #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index e9073cf4d040..e2e6b4473a96 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -35,7 +35,8 @@ #include "ioapic.h" static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { #ifdef CONFIG_X86 struct kvm_pic *pic = pic_irqchip(kvm); @@ -46,10 +47,12 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, } static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level); + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, + line_status); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -63,7 +66,7 @@ inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) } int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq) + struct kvm_lapic_irq *irq, unsigned long *dest_map) { int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; @@ -74,7 +77,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, irq->delivery_mode = APIC_DM_FIXED; } - if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r)) + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) return r; kvm_for_each_vcpu(i, vcpu, kvm) { @@ -88,7 +91,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (!kvm_is_dm_lowest_prio(irq)) { if (r < 0) r = 0; - r += kvm_apic_set_irq(vcpu, irq); + r += kvm_apic_set_irq(vcpu, irq, dest_map); } else if (kvm_lapic_enabled(vcpu)) { if (!lowest) lowest = vcpu; @@ -98,7 +101,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } if (lowest) - r = kvm_apic_set_irq(lowest, irq); + r = kvm_apic_set_irq(lowest, irq, dest_map); return r; } @@ -121,7 +124,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, } int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level) + struct kvm *kvm, int irq_source_id, int level, bool line_status) { struct kvm_lapic_irq irq; @@ -130,7 +133,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, kvm_set_msi_irq(e, &irq); - return kvm_irq_delivery_to_apic(kvm, NULL, &irq); + return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); } @@ -142,63 +145,12 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, kvm_set_msi_irq(e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r)) + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) return r; else return -EWOULDBLOCK; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - struct kvm_kernel_irq_routing_entry route; - - if (!irqchip_in_kernel(kvm) || msi->flags != 0) - return -EINVAL; - - route.msi.address_lo = msi->address_lo; - route.msi.address_hi = msi->address_hi; - route.msi.data = msi->data; - - return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); -} - -/* - * Return value: - * < 0 Interrupt was ignored (masked or not delivered for other reasons) - * = 0 Interrupt was coalesced (previous irq is still pending) - * > 0 Number of CPUs interrupt was delivered to - */ -int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) -{ - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0; - struct kvm_irq_routing_table *irq_rt; - - trace_kvm_set_irq(irq, level, irq_source_id); - - /* Not possible to detect if the guest uses the PIC or the - * IOAPIC. So set the bit in both. The guest will ignore - * writes to the unused one. - */ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm->irq_routing); - if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, &irq_rt->map[irq], link) - irq_set[i++] = *e; - rcu_read_unlock(); - - while(i--) { - int r; - r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); - if (r < 0) - continue; - - ret = r + ((ret < 0) ? 0 : ret); - } - - return ret; -} - /* * Deliver an IRQ in an atomic context if we can, or return a failure, * user can retry in a process context. @@ -236,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) return ret; } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi; - - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) { - rcu_read_unlock(); - return true; - } - - rcu_read_unlock(); - - return false; -} -EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi; - - trace_kvm_ack_irq(irqchip, pin); - - rcu_read_lock(); - gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; - if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); - rcu_read_unlock(); -} - -void kvm_register_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); - mutex_unlock(&kvm->irq_lock); - kvm_ioapic_make_eoibitmap_request(kvm); -} - -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) -{ - mutex_lock(&kvm->irq_lock); - hlist_del_init_rcu(&kian->link); - mutex_unlock(&kvm->irq_lock); - synchronize_rcu(); - kvm_ioapic_make_eoibitmap_request(kvm); -} - int kvm_request_irq_source_id(struct kvm *kvm) { unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; @@ -376,34 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, rcu_read_unlock(); } -void kvm_free_irq_routing(struct kvm *kvm) -{ - /* Called only during vm destruction. Nobody can use the pointer - at this stage */ - kfree(kvm->irq_routing); -} - -static int setup_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int delta; unsigned max_pin; - struct kvm_kernel_irq_routing_entry *ei; - /* - * Do not allow GSI to be mapped to the same irqchip more than once. - * Allow only one to one mapping between GSI and MSI. - */ - hlist_for_each_entry(ei, &rt->map[ue->gsi], link) - if (ei->type == KVM_IRQ_ROUTING_MSI || - ue->type == KVM_IRQ_ROUTING_MSI || - ue->u.irqchip.irqchip == ei->irqchip.irqchip) - return r; - - e->gsi = ue->gsi; - e->type = ue->type; switch (ue->type) { case KVM_IRQ_ROUTING_IRQCHIP: delta = 0; @@ -440,69 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, goto out; } - hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; out: return r; } - -int kvm_set_irq_routing(struct kvm *kvm, - const struct kvm_irq_routing_entry *ue, - unsigned nr, - unsigned flags) -{ - struct kvm_irq_routing_table *new, *old; - u32 i, j, nr_rt_entries = 0; - int r; - - for (i = 0; i < nr; ++i) { - if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) - return -EINVAL; - nr_rt_entries = max(nr_rt_entries, ue[i].gsi); - } - - nr_rt_entries += 1; - - new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) - + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), - GFP_KERNEL); - - if (!new) - return -ENOMEM; - - new->rt_entries = (void *)&new->map[nr_rt_entries]; - - new->nr_rt_entries = nr_rt_entries; - for (i = 0; i < 3; i++) - for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) - new->chip[i][j] = -1; - - for (i = 0; i < nr; ++i) { - r = -EINVAL; - if (ue->flags) - goto out; - r = setup_routing_entry(new, &new->rt_entries[i], ue); - if (r) - goto out; - ++ue; - } - - mutex_lock(&kvm->irq_lock); - old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); - mutex_unlock(&kvm->irq_lock); - - synchronize_rcu(); - - new = old; - r = 0; - -out: - kfree(new); - return r; -} - #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c new file mode 100644 index 000000000000..20dc9e4a8f6c --- /dev/null +++ b/virt/kvm/irqchip.c @@ -0,0 +1,237 @@ +/* + * irqchip.c: Common API for in kernel interrupt controllers + * Copyright (c) 2007, Intel Corporation. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (c) 2013, Alexander Graf <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * This file is derived from virt/kvm/irq_comm.c. + * + * Authors: + * Yaozu (Eddie) Dong <[email protected]> + * Alexander Graf <[email protected]> + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <trace/events/kvm.h> +#include "irq.h" + +bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi; + + rcu_read_lock(); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) { + rcu_read_unlock(); + return true; + } + + rcu_read_unlock(); + + return false; +} +EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + struct kvm_irq_ack_notifier *kian; + int gsi; + + trace_kvm_ack_irq(irqchip, pin); + + rcu_read_lock(); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); + rcu_read_unlock(); +} + +void kvm_register_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); + mutex_unlock(&kvm->irq_lock); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +void kvm_unregister_irq_ack_notifier(struct kvm *kvm, + struct kvm_irq_ack_notifier *kian) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_init_rcu(&kian->link); + mutex_unlock(&kvm->irq_lock); + synchronize_rcu(); +#ifdef __KVM_HAVE_IOAPIC + kvm_vcpu_request_scan_ioapic(kvm); +#endif +} + +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct kvm_kernel_irq_routing_entry route; + + if (!irqchip_in_kernel(kvm) || msi->flags != 0) + return -EINVAL; + + route.msi.address_lo = msi->address_lo; + route.msi.address_hi = msi->address_hi; + route.msi.data = msi->data; + + return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); +} + +/* + * Return value: + * < 0 Interrupt was ignored (masked or not delivered for other reasons) + * = 0 Interrupt was coalesced (previous irq is still pending) + * > 0 Number of CPUs interrupt was delivered to + */ +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, + bool line_status) +{ + struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i = 0; + struct kvm_irq_routing_table *irq_rt; + + trace_kvm_set_irq(irq, level, irq_source_id); + + /* Not possible to detect if the guest uses the PIC or the + * IOAPIC. So set the bit in both. The guest will ignore + * writes to the unused one. + */ + rcu_read_lock(); + irq_rt = rcu_dereference(kvm->irq_routing); + if (irq < irq_rt->nr_rt_entries) + hlist_for_each_entry(e, &irq_rt->map[irq], link) + irq_set[i++] = *e; + rcu_read_unlock(); + + while(i--) { + int r; + r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, + line_status); + if (r < 0) + continue; + + ret = r + ((ret < 0) ? 0 : ret); + } + + return ret; +} + +void kvm_free_irq_routing(struct kvm *kvm) +{ + /* Called only during vm destruction. Nobody can use the pointer + at this stage */ + kfree(kvm->irq_routing); +} + +static int setup_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + struct kvm_kernel_irq_routing_entry *ei; + + /* + * Do not allow GSI to be mapped to the same irqchip more than once. + * Allow only one to one mapping between GSI and MSI. + */ + hlist_for_each_entry(ei, &rt->map[ue->gsi], link) + if (ei->type == KVM_IRQ_ROUTING_MSI || + ue->type == KVM_IRQ_ROUTING_MSI || + ue->u.irqchip.irqchip == ei->irqchip.irqchip) + return r; + + e->gsi = ue->gsi; + e->type = ue->type; + r = kvm_set_routing_entry(rt, e, ue); + if (r) + goto out; + + hlist_add_head(&e->link, &rt->map[e->gsi]); + r = 0; +out: + return r; +} + +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *ue, + unsigned nr, + unsigned flags) +{ + struct kvm_irq_routing_table *new, *old; + u32 i, j, nr_rt_entries = 0; + int r; + + for (i = 0; i < nr; ++i) { + if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) + return -EINVAL; + nr_rt_entries = max(nr_rt_entries, ue[i].gsi); + } + + nr_rt_entries += 1; + + new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) + + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), + GFP_KERNEL); + + if (!new) + return -ENOMEM; + + new->rt_entries = (void *)&new->map[nr_rt_entries]; + + new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < KVM_NR_IRQCHIPS; i++) + for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) + new->chip[i][j] = -1; + + for (i = 0; i < nr; ++i) { + r = -EINVAL; + if (ue->flags) + goto out; + r = setup_routing_entry(new, &new->rt_entries[i], ue); + if (r) + goto out; + ++ue; + } + + mutex_lock(&kvm->irq_lock); + old = kvm->irq_routing; + kvm_irq_routing_update(kvm, new); + mutex_unlock(&kvm->irq_lock); + + synchronize_rcu(); + + new = old; + r = 0; + +out: + kfree(new); + return r; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f18013f09e68..45f09362ee7b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -217,9 +217,9 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm) make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); } -void kvm_make_update_eoibitmap_request(struct kvm *kvm) +void kvm_make_scan_ioapic_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP); + make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -244,6 +244,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); + vcpu->preempted = false; r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -503,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); atomic_set(&kvm->users_count, 1); + INIT_LIST_HEAD(&kvm->devices); r = kvm_init_mmu_notifier(kvm); if (r) @@ -580,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm) kfree(kvm->memslots); } +static void kvm_destroy_devices(struct kvm *kvm) +{ + struct list_head *node, *tmp; + + list_for_each_safe(node, tmp, &kvm->devices) { + struct kvm_device *dev = + list_entry(node, struct kvm_device, vm_node); + + list_del(node); + dev->ops->destroy(dev); + } +} + static void kvm_destroy_vm(struct kvm *kvm) { int i; @@ -599,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow_all(kvm); #endif kvm_arch_destroy_vm(kvm); + kvm_destroy_devices(kvm); kvm_free_physmem(kvm); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -719,24 +735,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, } /* - * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: - * - create a new memory slot - * - delete an existing memory slot - * - modify an existing memory slot - * -- move it in the guest physical memory space - * -- just change its flags - * - * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): - */ -enum kvm_mr_change { - KVM_MR_CREATE, - KVM_MR_DELETE, - KVM_MR_MOVE, - KVM_MR_FLAGS_ONLY, -}; - -/* * Allocate some memory and give it an address in the guest physical address * space. * @@ -745,8 +743,7 @@ enum kvm_mr_change { * Must be called holding mmap_sem for write. */ int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; gfn_t base_gfn; @@ -767,7 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; /* We can read the guest memory with __xxx_user() later on. */ - if (user_alloc && + if ((mem->slot < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok(VERIFY_WRITE, (void __user *)(unsigned long)mem->userspace_addr, @@ -875,7 +872,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = old_memslots; } - r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); + r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); if (r) goto out_slots; @@ -915,7 +912,7 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, slots, &new); - kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); + kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); @@ -932,26 +929,23 @@ out: EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; mutex_lock(&kvm->slots_lock); - r = __kvm_set_memory_region(kvm, mem, user_alloc); + r = __kvm_set_memory_region(kvm, mem); mutex_unlock(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; - return kvm_set_memory_region(kvm, mem, user_alloc); + return kvm_set_memory_region(kvm, mem); } int kvm_get_dirty_log(struct kvm *kvm, @@ -1099,7 +1093,7 @@ static int kvm_read_hva_atomic(void *data, void __user *hva, int len) return __copy_from_user_inatomic(data, hva, len); } -int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, +static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; @@ -1719,6 +1713,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_send_reschedule(cpu); put_cpu(); } +EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ void kvm_resched(struct kvm_vcpu *vcpu) @@ -1816,6 +1811,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; } else if (pass && i > last_boosted_vcpu) break; + if (!ACCESS_ONCE(vcpu->preempted)) + continue; if (vcpu == me) continue; if (waitqueue_active(&vcpu->wq)) @@ -2204,6 +2201,119 @@ out: } #endif +static int kvm_device_ioctl_attr(struct kvm_device *dev, + int (*accessor)(struct kvm_device *dev, + struct kvm_device_attr *attr), + unsigned long arg) +{ + struct kvm_device_attr attr; + + if (!accessor) + return -EPERM; + + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + return -EFAULT; + + return accessor(dev, &attr); +} + +static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_device *dev = filp->private_data; + + switch (ioctl) { + case KVM_SET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); + case KVM_GET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); + case KVM_HAS_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); + default: + if (dev->ops->ioctl) + return dev->ops->ioctl(dev, ioctl, arg); + + return -ENOTTY; + } +} + +static int kvm_device_release(struct inode *inode, struct file *filp) +{ + struct kvm_device *dev = filp->private_data; + struct kvm *kvm = dev->kvm; + + kvm_put_kvm(kvm); + return 0; +} + +static const struct file_operations kvm_device_fops = { + .unlocked_ioctl = kvm_device_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = kvm_device_ioctl, +#endif + .release = kvm_device_release, +}; + +struct kvm_device *kvm_device_from_filp(struct file *filp) +{ + if (filp->f_op != &kvm_device_fops) + return NULL; + + return filp->private_data; +} + +static int kvm_ioctl_create_device(struct kvm *kvm, + struct kvm_create_device *cd) +{ + struct kvm_device_ops *ops = NULL; + struct kvm_device *dev; + bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int ret; + + switch (cd->type) { +#ifdef CONFIG_KVM_MPIC + case KVM_DEV_TYPE_FSL_MPIC_20: + case KVM_DEV_TYPE_FSL_MPIC_42: + ops = &kvm_mpic_ops; + break; +#endif +#ifdef CONFIG_KVM_XICS + case KVM_DEV_TYPE_XICS: + ops = &kvm_xics_ops; + break; +#endif + default: + return -ENODEV; + } + + if (test) + return 0; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->ops = ops; + dev->kvm = kvm; + + ret = ops->create(dev, cd->type); + if (ret < 0) { + kfree(dev); + return ret; + } + + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + if (ret < 0) { + ops->destroy(dev); + return ret; + } + + list_add(&dev->vm_node, &kvm->devices); + kvm_get_kvm(kvm); + cd->fd = ret; + return 0; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2225,7 +2335,7 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); break; } case KVM_GET_DIRTY_LOG: { @@ -2304,7 +2414,8 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; - r = kvm_vm_ioctl_irq_line(kvm, &irq_event); + r = kvm_vm_ioctl_irq_line(kvm, &irq_event, + ioctl == KVM_IRQ_LINE_STATUS); if (r) goto out; @@ -2318,6 +2429,54 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ + case KVM_CREATE_DEVICE: { + struct kvm_create_device cd; + + r = -EFAULT; + if (copy_from_user(&cd, argp, sizeof(cd))) + goto out; + + r = kvm_ioctl_create_device(kvm, &cd); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(argp, &cd, sizeof(cd))) + goto out; + + r = 0; + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2447,8 +2606,11 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif return 1; -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; #endif @@ -2618,14 +2780,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } - -asmlinkage void kvm_spurious_fault(void) -{ - /* Fault while not rebooting. We want the trace. */ - BUG(); -} -EXPORT_SYMBOL_GPL(kvm_spurious_fault); - static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { @@ -2658,7 +2812,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) kfree(bus); } -int kvm_io_bus_sort_cmp(const void *p1, const void *p2) +static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) { const struct kvm_io_range *r1 = p1; const struct kvm_io_range *r2 = p2; @@ -2670,7 +2824,7 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2) return 0; } -int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, +static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, gpa_t addr, int len) { bus->range[bus->dev_count++] = (struct kvm_io_range) { @@ -2685,7 +2839,7 @@ int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, return 0; } -int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, +static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, gpa_t addr, int len) { struct kvm_io_range *range, key; @@ -2929,6 +3083,8 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) + vcpu->preempted = false; kvm_arch_vcpu_load(vcpu, cpu); } @@ -2938,6 +3094,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (current->state == TASK_RUNNING) + vcpu->preempted = true; kvm_arch_vcpu_put(vcpu); } @@ -2947,6 +3105,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, int r; int cpu; + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; r = kvm_arch_init(opaque); if (r) goto out_fail; @@ -3027,6 +3188,8 @@ out_free_0a: out_free_0: kvm_arch_exit(); out_fail: + kvm_irqfd_exit(); +out_irqfd: return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -3043,6 +3206,7 @@ void kvm_exit(void) on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); + kvm_irqfd_exit(); free_cpumask_var(cpus_hardware_enabled); } EXPORT_SYMBOL_GPL(kvm_exit); |