diff options
432 files changed, 7751 insertions, 1938 deletions
diff --git a/.gitignore b/.gitignore index 7587ef56b92d..8f5422cba6e2 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ *.lz4 *.lzma *.lzo +*.mod *.mod.c *.o *.o.* diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 46b826fcb5ad..7ccd158b3894 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2545,7 +2545,7 @@ mem_encrypt=on: Activate SME mem_encrypt=off: Do not activate SME - Refer to Documentation/virtual/kvm/amd-memory-encryption.rst + Refer to Documentation/virt/kvm/amd-memory-encryption.rst for details on when memory encryption can be activated. mem_sleep_default= [SUSPEND] Default system suspend mode: diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml index fc2f63860cc8..be32f087c529 100644 --- a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml +++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml @@ -42,6 +42,7 @@ properties: patternProperties: "^.*@[0-9a-fA-F]+$": + type: object properties: reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml index cf494a08b837..9692b7f719f5 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml @@ -114,42 +114,47 @@ patternProperties: examples: - | - adc@0 { - compatible = "adi,ad7124-4"; - reg = <0>; - spi-max-frequency = <5000000>; - interrupts = <25 2>; - interrupt-parent = <&gpio>; - refin1-supply = <&adc_vref>; - clocks = <&ad7124_mclk>; - clock-names = "mclk"; - + spi { #address-cells = <1>; #size-cells = <0>; - channel@0 { + adc@0 { + compatible = "adi,ad7124-4"; reg = <0>; - diff-channels = <0 1>; - adi,reference-select = <0>; - adi,buffered-positive; - }; - - channel@1 { - reg = <1>; - bipolar; - diff-channels = <2 3>; - adi,reference-select = <0>; - adi,buffered-positive; - adi,buffered-negative; - }; - - channel@2 { - reg = <2>; - diff-channels = <4 5>; - }; - - channel@3 { - reg = <3>; - diff-channels = <6 7>; + spi-max-frequency = <5000000>; + interrupts = <25 2>; + interrupt-parent = <&gpio>; + refin1-supply = <&adc_vref>; + clocks = <&ad7124_mclk>; + clock-names = "mclk"; + + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { + reg = <0>; + diff-channels = <0 1>; + adi,reference-select = <0>; + adi,buffered-positive; + }; + + channel@1 { + reg = <1>; + bipolar; + diff-channels = <2 3>; + adi,reference-select = <0>; + adi,buffered-positive; + adi,buffered-negative; + }; + + channel@2 { + reg = <2>; + diff-channels = <4 5>; + }; + + channel@3 { + reg = <3>; + diff-channels = <6 7>; + }; }; }; diff --git a/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml b/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml index 8a4100ceeaf2..d76ece97c76c 100644 --- a/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml +++ b/Documentation/devicetree/bindings/iio/adc/avia-hx711.yaml @@ -61,6 +61,6 @@ examples: compatible = "avia,hx711"; sck-gpios = <&gpio3 10 GPIO_ACTIVE_HIGH>; dout-gpios = <&gpio0 7 GPIO_ACTIVE_HIGH>; - avdd-suppy = <&avdd>; + avdd-supply = <&avdd>; clock-frequency = <100000>; }; diff --git a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml index e5a411518be1..b5b3cf5b1ac2 100644 --- a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml +++ b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml @@ -55,6 +55,7 @@ patternProperties: "^pinctrl-[0-9]+$": true "^nand@[a-f0-9]+$": + type: object properties: reg: minimum: 0 diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml index 199ba5ac2a06..d261b7096c69 100644 --- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml +++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml @@ -40,6 +40,7 @@ properties: patternProperties: "^nand@[a-f0-9]$": + type: object properties: reg: description: diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml index 61a110a7db8a..125599a2dc5e 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2400-pinctrl.yaml @@ -22,7 +22,9 @@ description: |+ properties: compatible: - enum: [ aspeed,ast2400-pinctrl, aspeed,g4-pinctrl ] + enum: + - aspeed,ast2400-pinctrl + - aspeed,g4-pinctrl patternProperties: '^.*$': diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml index cf561bd55128..3e6d85318577 100644 --- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2500-pinctrl.yaml @@ -22,7 +22,9 @@ description: |+ properties: compatible: - enum: [ aspeed,ast2500-pinctrl, aspeed,g5-pinctrl ] + enum: + - aspeed,ast2500-pinctrl + - aspeed,g5-pinctrl aspeed,external-nodes: minItems: 2 maxItems: 2 @@ -74,9 +76,6 @@ required: examples: - | - compatible = "simple-bus"; - ranges; - apb { compatible = "simple-bus"; #address-cells = <1>; @@ -89,7 +88,7 @@ examples: pinctrl: pinctrl { compatible = "aspeed,g5-pinctrl"; - aspeed,external-nodes = <&gfx &lhc>; + aspeed,external-nodes = <&gfx>, <&lhc>; pinctrl_i2c3_default: i2c3_default { function = "I2C3"; diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml index 06c4b66c3ee6..91d3e78b3395 100644 --- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml @@ -55,6 +55,7 @@ properties: patternProperties: '^gpio@[0-9a-f]*$': + type: object properties: gpio-controller: true '#gpio-cells': @@ -113,8 +114,10 @@ patternProperties: - st,bank-name '-[0-9]*$': + type: object patternProperties: '^pins': + type: object description: | A pinctrl node should contain at least one subnode representing the pinctrl group available on the machine. Each subnode will list the @@ -194,6 +197,7 @@ required: examples: - | #include <dt-bindings/pinctrl/stm32-pinfunc.h> + #include <dt-bindings/mfd/stm32f4-rcc.h> //Example 1 pinctrl@40020000 { #address-cells = <1>; @@ -207,6 +211,7 @@ examples: #gpio-cells = <2>; reg = <0x0 0x400>; resets = <&reset_ahb1 0>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOA)>; st,bank-name = "GPIOA"; }; }; @@ -224,6 +229,7 @@ examples: #gpio-cells = <2>; reg = <0x1000 0x400>; resets = <&reset_ahb1 0>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOB)>; st,bank-name = "GPIOB"; gpio-ranges = <&pinctrl 0 0 16>; }; @@ -233,6 +239,7 @@ examples: #gpio-cells = <2>; reg = <0x2000 0x400>; resets = <&reset_ahb1 0>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOC)>; st,bank-name = "GPIOC"; ngpios = <5>; gpio-ranges = <&pinctrl 0 16 3>, diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml index f97a4ecd7b91..c899111aa5e3 100644 --- a/Documentation/devicetree/bindings/riscv/cpus.yaml +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -10,97 +10,76 @@ maintainers: - Paul Walmsley <[email protected]> - Palmer Dabbelt <[email protected]> -allOf: - - $ref: /schemas/cpus.yaml# - properties: - $nodename: - const: cpus - description: Container of cpu nodes - - '#address-cells': - const: 1 - description: | - A single unsigned 32-bit integer uniquely identifies each RISC-V - hart in a system. (See the "reg" node under the "cpu" node, - below). - - '#size-cells': - const: 0 + compatible: + items: + - enum: + - sifive,rocket0 + - sifive,e5 + - sifive,e51 + - sifive,u54-mc + - sifive,u54 + - sifive,u5 + - const: riscv + description: + Identifies that the hart uses the RISC-V instruction set + and identifies the type of the hart. + + mmu-type: + allOf: + - $ref: "/schemas/types.yaml#/definitions/string" + - enum: + - riscv,sv32 + - riscv,sv39 + - riscv,sv48 + description: + Identifies the MMU address translation mode used on this + hart. These values originate from the RISC-V Privileged + Specification document, available from + https://riscv.org/specifications/ + + riscv,isa: + allOf: + - $ref: "/schemas/types.yaml#/definitions/string" + - enum: + - rv64imac + - rv64imafdc + description: + Identifies the specific RISC-V instruction set architecture + supported by the hart. These are documented in the RISC-V + User-Level ISA document, available from + https://riscv.org/specifications/ + + timebase-frequency: + type: integer + minimum: 1 + description: + Specifies the clock frequency of the system timer in Hz. + This value is common to all harts on a single system image. + + interrupt-controller: + type: object + description: Describes the CPU's local interrupt controller -patternProperties: - '^cpu@[0-9a-f]+$': properties: - compatible: - type: array - items: - - enum: - - sifive,rocket0 - - sifive,e5 - - sifive,e51 - - sifive,u54-mc - - sifive,u54 - - sifive,u5 - - const: riscv - description: - Identifies that the hart uses the RISC-V instruction set - and identifies the type of the hart. - - mmu-type: - allOf: - - $ref: "/schemas/types.yaml#/definitions/string" - - enum: - - riscv,sv32 - - riscv,sv39 - - riscv,sv48 - description: - Identifies the MMU address translation mode used on this - hart. These values originate from the RISC-V Privileged - Specification document, available from - https://riscv.org/specifications/ - - riscv,isa: - allOf: - - $ref: "/schemas/types.yaml#/definitions/string" - - enum: - - rv64imac - - rv64imafdc - description: - Identifies the specific RISC-V instruction set architecture - supported by the hart. These are documented in the RISC-V - User-Level ISA document, available from - https://riscv.org/specifications/ + '#interrupt-cells': + const: 1 - timebase-frequency: - type: integer - minimum: 1 - description: - Specifies the clock frequency of the system timer in Hz. - This value is common to all harts on a single system image. - - interrupt-controller: - type: object - description: Describes the CPU's local interrupt controller - - properties: - '#interrupt-cells': - const: 1 - - compatible: - const: riscv,cpu-intc - - interrupt-controller: true + compatible: + const: riscv,cpu-intc - required: - - '#interrupt-cells' - - compatible - - interrupt-controller + interrupt-controller: true required: - - riscv,isa - - timebase-frequency + - '#interrupt-cells' + - compatible - interrupt-controller +required: + - riscv,isa + - timebase-frequency + - interrupt-controller + examples: - | // Example 1: SiFive Freedom U540G Development Kit diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml index c374fd4923a6..6d1329c28170 100644 --- a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml +++ b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml @@ -50,6 +50,7 @@ properties: patternProperties: "^.*@[0-9a-f]+": + type: object properties: reg: items: diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml index bda7a5befd8b..f36c46d236d7 100644 --- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml +++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml @@ -55,6 +55,7 @@ properties: patternProperties: "^.*@[0-9a-f]+": + type: object properties: reg: items: diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 5eba889ea84d..9f4392876099 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -30,6 +30,7 @@ *.lzo *.mo *.moc +*.mod *.mod.c *.o *.o.* diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst index 074a423c853c..87d1372da879 100644 --- a/Documentation/driver-api/ntb.rst +++ b/Documentation/driver-api/ntb.rst @@ -200,6 +200,33 @@ Debugfs Files: This file is used to read and write peer scratchpads. See *spad* for details. +NTB MSI Test Client (ntb\_msi\_test) +------------------------------------ + +The MSI test client serves to test and debug the MSI library which +allows for passing MSI interrupts across NTB memory windows. The +test client is interacted with through the debugfs filesystem: + +* *debugfs*/ntb\_tool/*hw*/ + A directory in debugfs will be created for each + NTB device probed by the tool. This directory is shortened to *hw* + below. +* *hw*/port + This file describes the local port number +* *hw*/irq*_occurrences + One occurrences file exists for each interrupt and, when read, + returns the number of times the interrupt has been triggered. +* *hw*/peer*/port + This file describes the port number for each peer +* *hw*/peer*/count + This file describes the number of interrupts that can be + triggered on each peer +* *hw*/peer*/trigger + Writing an interrupt number (any number less than the value + specified in count) will trigger the interrupt on the + specified peer. That peer's interrupt's occurrence file + should be incremented. + NTB Hardware Drivers ==================== diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 209672010fb4..6b7a41cfcaed 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -436,7 +436,7 @@ for the inode. If d_make_root(inode) is passed a NULL inode it returns NULL and also requires no further error handling. Typical usage is: inode = foofs_new_inode(....); - s->s_root = d_make_inode(inode); + s->s_root = d_make_root(inode); if (!s->s_root) /* Nothing needed for the inode cleanup */ return -ENOMEM; diff --git a/Documentation/hwmon/k8temp.rst b/Documentation/hwmon/k8temp.rst index 72da12aa17e5..fe9109521056 100644 --- a/Documentation/hwmon/k8temp.rst +++ b/Documentation/hwmon/k8temp.rst @@ -9,7 +9,7 @@ Supported chips: Addresses scanned: PCI space - Datasheet: http://support.amd.com/us/Processor_TechDocs/32559.pdf + Datasheet: http://www.amd.com/system/files/TechDocs/32559.pdf Author: Rudolf Marek diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index ce9b99c004ae..61b2181ed3ea 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -38,12 +38,11 @@ Additional options to the assembler (for built-in and modules). AFLAGS_MODULE ------------- -Additional module specific options to use for $(AS). +Additional assembler options for modules. AFLAGS_KERNEL ------------- -Additional options for $(AS) when used for assembler -code for code that is compiled as built-in. +Additional assembler options for built-in. KCFLAGS ------- diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index f31158457753..f4f0f7ffde2b 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -328,7 +328,7 @@ more details, with real examples. variable $(KBUILD_CFLAGS) and uses it for compilation flags for the entire tree. - asflags-y specifies options for assembling with $(AS). + asflags-y specifies assembler options. Example:: @@ -490,7 +490,7 @@ more details, with real examples. as-instr checks if the assembler reports a specific instruction and then outputs either option1 or option2 C escapes are supported in the test instruction - Note: as-instr-option uses KBUILD_AFLAGS for $(AS) options + Note: as-instr-option uses KBUILD_AFLAGS for assembler options cc-option cc-option is used to check if $(CC) supports a given option, and if @@ -906,7 +906,7 @@ When kbuild executes, the following steps are followed (roughly): vmlinux. The usage of $(call if_changed,xxx) will be described later. KBUILD_AFLAGS - $(AS) assembler flags + Assembler flags Default value - see top level Makefile Append or modify as required per architecture. @@ -949,16 +949,16 @@ When kbuild executes, the following steps are followed (roughly): to 'y' when selected. KBUILD_AFLAGS_KERNEL - $(AS) options specific for built-in + Assembler options specific for built-in $(KBUILD_AFLAGS_KERNEL) contains extra C compiler flags used to compile resident kernel code. KBUILD_AFLAGS_MODULE - Options for $(AS) when building modules + Assembler options specific for modules $(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that - are used for $(AS). + are used for assembler. From commandline AFLAGS_MODULE shall be used (see kbuild.txt). diff --git a/Documentation/virtual/index.rst b/Documentation/virt/index.rst index 062ffb527043..062ffb527043 100644 --- a/Documentation/virtual/index.rst +++ b/Documentation/virt/index.rst diff --git a/Documentation/virtual/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst index d18c97b4e140..d18c97b4e140 100644 --- a/Documentation/virtual/kvm/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/amd-memory-encryption.rst diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virt/kvm/api.txt index 2cd6250b2896..2d067767b617 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virt/kvm/api.txt @@ -3781,7 +3781,7 @@ encrypted VMs. Currently, this ioctl is used for issuing Secure Encrypted Virtualization (SEV) commands on AMD Processors. The SEV commands are defined in -Documentation/virtual/kvm/amd-memory-encryption.rst. +Documentation/virt/kvm/amd-memory-encryption.rst. 4.111 KVM_MEMORY_ENCRYPT_REG_REGION @@ -4090,17 +4090,22 @@ Parameters: struct kvm_pmu_event_filter (in) Returns: 0 on success, -1 on error struct kvm_pmu_event_filter { - __u32 action; - __u32 nevents; - __u64 events[0]; + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[0]; }; This ioctl restricts the set of PMU events that the guest can program. The argument holds a list of events which will be allowed or denied. The eventsel+umask of each event the guest attempts to program is compared against the events field to determine whether the guest should have access. -This only affects general purpose counters; fixed purpose counters can -be disabled by changing the perfmon CPUID leaf. +The events field only controls general purpose counters; fixed purpose +counters are controlled by the fixed_counter_bitmap. + +No flags are defined yet, the field must be zero. Valid values for 'action': #define KVM_PMU_EVENT_ALLOW 0 diff --git a/Documentation/virtual/kvm/arm/hyp-abi.txt b/Documentation/virt/kvm/arm/hyp-abi.txt index a20a0bee268d..a20a0bee268d 100644 --- a/Documentation/virtual/kvm/arm/hyp-abi.txt +++ b/Documentation/virt/kvm/arm/hyp-abi.txt diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virt/kvm/arm/psci.txt index 559586fc9d37..559586fc9d37 100644 --- a/Documentation/virtual/kvm/arm/psci.txt +++ b/Documentation/virt/kvm/arm/psci.txt diff --git a/Documentation/virtual/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst index 01b081f6e7ea..01b081f6e7ea 100644 --- a/Documentation/virtual/kvm/cpuid.rst +++ b/Documentation/virt/kvm/cpuid.rst diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virt/kvm/devices/README index 34a69834124a..34a69834124a 100644 --- a/Documentation/virtual/kvm/devices/README +++ b/Documentation/virt/kvm/devices/README diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virt/kvm/devices/arm-vgic-its.txt index eeaa95b893a8..eeaa95b893a8 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt +++ b/Documentation/virt/kvm/devices/arm-vgic-its.txt diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virt/kvm/devices/arm-vgic-v3.txt index ff290b43c8e5..ff290b43c8e5 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt +++ b/Documentation/virt/kvm/devices/arm-vgic-v3.txt diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virt/kvm/devices/arm-vgic.txt index 97b6518148f8..97b6518148f8 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virt/kvm/devices/arm-vgic.txt diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virt/kvm/devices/mpic.txt index 8257397adc3c..8257397adc3c 100644 --- a/Documentation/virtual/kvm/devices/mpic.txt +++ b/Documentation/virt/kvm/devices/mpic.txt diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virt/kvm/devices/s390_flic.txt index a4e20a090174..a4e20a090174 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virt/kvm/devices/s390_flic.txt diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virt/kvm/devices/vcpu.txt index 2b5dab16c4f2..2b5dab16c4f2 100644 --- a/Documentation/virtual/kvm/devices/vcpu.txt +++ b/Documentation/virt/kvm/devices/vcpu.txt diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virt/kvm/devices/vfio.txt index 528c77c8022c..528c77c8022c 100644 --- a/Documentation/virtual/kvm/devices/vfio.txt +++ b/Documentation/virt/kvm/devices/vfio.txt diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virt/kvm/devices/vm.txt index 4ffb82b02468..4ffb82b02468 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virt/kvm/devices/vm.txt diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virt/kvm/devices/xics.txt index 42864935ac5d..42864935ac5d 100644 --- a/Documentation/virtual/kvm/devices/xics.txt +++ b/Documentation/virt/kvm/devices/xics.txt diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virt/kvm/devices/xive.txt index 9a24a4525253..9a24a4525253 100644 --- a/Documentation/virtual/kvm/devices/xive.txt +++ b/Documentation/virt/kvm/devices/xive.txt diff --git a/Documentation/virtual/kvm/halt-polling.txt b/Documentation/virt/kvm/halt-polling.txt index 4f791b128dd2..4f791b128dd2 100644 --- a/Documentation/virtual/kvm/halt-polling.txt +++ b/Documentation/virt/kvm/halt-polling.txt diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virt/kvm/hypercalls.txt index da210651f714..5f6d291bd004 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virt/kvm/hypercalls.txt @@ -18,7 +18,7 @@ S390: number in R1. For further information on the S390 diagnose call as supported by KVM, - refer to Documentation/virtual/kvm/s390-diag.txt. + refer to Documentation/virt/kvm/s390-diag.txt. PowerPC: It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers. @@ -26,7 +26,7 @@ S390: KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions' property inside the device tree's /hypervisor node. - For more information refer to Documentation/virtual/kvm/ppc-pv.txt + For more information refer to Documentation/virt/kvm/ppc-pv.txt MIPS: KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall diff --git a/Documentation/virtual/kvm/index.rst b/Documentation/virt/kvm/index.rst index 0b206a06f5be..0b206a06f5be 100644 --- a/Documentation/virtual/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virt/kvm/locking.txt index 635cd6eaf714..635cd6eaf714 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virt/kvm/locking.txt diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virt/kvm/mmu.txt index 2efe0efc516e..1b9880dfba0a 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virt/kvm/mmu.txt @@ -298,7 +298,7 @@ Handling a page fault is performed as follows: vcpu->arch.mmio_gfn, and call the emulator - If both P bit and R/W bit of error code are set, this could possibly be handled as a "fast page fault" (fixed without taking the MMU lock). See - the description in Documentation/virtual/kvm/locking.txt. + the description in Documentation/virt/kvm/locking.txt. - if needed, walk the guest page tables to determine the guest translation (gva->gpa or ngpa->gpa) - if permissions are insufficient, reflect the fault back to the guest diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virt/kvm/msr.txt index df1f4338b3ca..df1f4338b3ca 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virt/kvm/msr.txt diff --git a/Documentation/virtual/kvm/nested-vmx.txt b/Documentation/virt/kvm/nested-vmx.txt index 97eb1353e962..97eb1353e962 100644 --- a/Documentation/virtual/kvm/nested-vmx.txt +++ b/Documentation/virt/kvm/nested-vmx.txt diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virt/kvm/ppc-pv.txt index e26115ce4258..e26115ce4258 100644 --- a/Documentation/virtual/kvm/ppc-pv.txt +++ b/Documentation/virt/kvm/ppc-pv.txt diff --git a/Documentation/virtual/kvm/review-checklist.txt b/Documentation/virt/kvm/review-checklist.txt index a83b27635fdd..499af499e296 100644 --- a/Documentation/virtual/kvm/review-checklist.txt +++ b/Documentation/virt/kvm/review-checklist.txt @@ -7,7 +7,7 @@ Review checklist for kvm patches 2. Patches should be against kvm.git master branch. 3. If the patch introduces or modifies a new userspace API: - - the API must be documented in Documentation/virtual/kvm/api.txt + - the API must be documented in Documentation/virt/kvm/api.txt - the API must be discoverable using KVM_CHECK_EXTENSION 4. New state must include support for save/restore. diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virt/kvm/s390-diag.txt index 7c52e5f8b210..7c52e5f8b210 100644 --- a/Documentation/virtual/kvm/s390-diag.txt +++ b/Documentation/virt/kvm/s390-diag.txt diff --git a/Documentation/virtual/kvm/timekeeping.txt b/Documentation/virt/kvm/timekeeping.txt index 76808a17ad84..76808a17ad84 100644 --- a/Documentation/virtual/kvm/timekeeping.txt +++ b/Documentation/virt/kvm/timekeeping.txt diff --git a/Documentation/virtual/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst index 5feb3706a7ae..5feb3706a7ae 100644 --- a/Documentation/virtual/kvm/vcpu-requests.rst +++ b/Documentation/virt/kvm/vcpu-requests.rst diff --git a/Documentation/virtual/paravirt_ops.rst b/Documentation/virt/paravirt_ops.rst index 6b789d27cead..6b789d27cead 100644 --- a/Documentation/virtual/paravirt_ops.rst +++ b/Documentation/virt/paravirt_ops.rst diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virt/uml/UserModeLinux-HOWTO.txt index 87b80f589e1c..87b80f589e1c 100644 --- a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt +++ b/Documentation/virt/uml/UserModeLinux-HOWTO.txt diff --git a/MAINTAINERS b/MAINTAINERS index bd3fe4fe13c4..4e2a525e22c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8808,7 +8808,7 @@ L: [email protected] W: http://www.linux-kvm.org T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git S: Supported -F: Documentation/virtual/kvm/ +F: Documentation/virt/kvm/ F: include/trace/events/kvm.h F: include/uapi/asm-generic/kvm* F: include/uapi/linux/kvm* @@ -8878,6 +8878,8 @@ F: arch/s390/include/asm/gmap.h F: arch/s390/include/asm/kvm* F: arch/s390/kvm/ F: arch/s390/mm/gmap.c +F: tools/testing/selftests/kvm/s390x/ +F: tools/testing/selftests/kvm/*/s390x/ KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) M: Paolo Bonzini <[email protected]> @@ -12131,10 +12133,11 @@ F: Documentation/driver-api/parport*.rst PARAVIRT_OPS INTERFACE M: Juergen Gross <[email protected]> -M: Alok Kataria <[email protected]> +M: Thomas Hellstrom <[email protected]> +M: "VMware, Inc." <[email protected]> S: Supported -F: Documentation/virtual/paravirt_ops.txt +F: Documentation/virt/paravirt_ops.txt F: arch/*/kernel/paravirt* F: arch/*/include/asm/paravirt*.h F: include/linux/hypervisor.h @@ -13722,6 +13725,7 @@ F: drivers/mtd/nand/raw/r852.c F: drivers/mtd/nand/raw/r852.h RISC-V ARCHITECTURE +M: Paul Walmsley <[email protected]> M: Palmer Dabbelt <[email protected]> M: Albert Ou <[email protected]> @@ -16851,7 +16855,7 @@ W: http://user-mode-linux.sourceforge.net Q: https://patchwork.ozlabs.org/project/linux-um/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git S: Maintained -F: Documentation/virtual/uml/ +F: Documentation/virt/uml/ F: arch/um/ F: arch/x86/um/ F: fs/hostfs/ @@ -17177,7 +17181,8 @@ S: Maintained F: drivers/misc/vmw_balloon.c VMWARE HYPERVISOR INTERFACE -M: Alok Kataria <[email protected]> +M: Thomas Hellstrom <[email protected]> +M: "VMware, Inc." <[email protected]> S: Supported F: arch/x86/kernel/cpu/vmware.c @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 -PATCHLEVEL = 2 +PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Bobtail Squid # *DOCUMENTATION* @@ -486,11 +486,6 @@ export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL export KBUILD_ARFLAGS -# When compiling out-of-tree modules, put MODVERDIR in the module -# tree rather than in the kernel tree. The kernel tree might -# even be read-only. -export MODVERDIR := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_versions - # Files to ignore in find ... statements export RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o \ @@ -887,6 +882,12 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # change __FILE__ to the relative path from the srctree KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) +# ensure -fcf-protection is disabled when using retpoline as it is +# incompatible with -mindirect-branch=thunk-extern +ifdef CONFIG_RETPOLINE +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) +endif + # use the deterministic mode of AR if available KBUILD_ARFLAGS := $(call ar-option,D) @@ -900,10 +901,8 @@ KBUILD_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS) KBUILD_AFLAGS += $(ARCH_AFLAGS) $(KAFLAGS) KBUILD_CFLAGS += $(ARCH_CFLAGS) $(KCFLAGS) -# Use --build-id when available. -LDFLAGS_BUILD_ID := $(call ld-option, --build-id) -KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID) -LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID) +KBUILD_LDFLAGS_MODULE += --build-id +LDFLAGS_vmlinux += --build-id ifeq ($(CONFIG_STRIP_ASM_SYMS),y) LDFLAGS_vmlinux += $(call ld-option, -X,) @@ -1031,8 +1030,8 @@ vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_OBJS) $(KBUILD_VMLINUX_LIBS) # Recurse until adjust_autoksyms.sh is satisfied PHONY += autoksyms_recursive -autoksyms_recursive: $(vmlinux-deps) ifdef CONFIG_TRIM_UNUSED_KSYMS +autoksyms_recursive: $(vmlinux-deps) modules.order $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ "$(MAKE) -f $(srctree)/Makefile vmlinux" endif @@ -1074,7 +1073,7 @@ $(sort $(vmlinux-deps)): $(vmlinux-dirs) ; PHONY += $(vmlinux-dirs) $(vmlinux-dirs): prepare - $(Q)$(MAKE) $(build)=$@ need-builtin=1 + $(Q)$(MAKE) $(build)=$@ need-builtin=1 need-modorder=1 filechk_kernel.release = \ echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion $(srctree))" @@ -1096,7 +1095,7 @@ scripts: scripts_basic scripts_dtc # archprepare is used in arch Makefiles and when processed asm symlink, # version.h and scripts_basic is processed / created. -PHONY += prepare archprepare prepare1 prepare3 +PHONY += prepare archprepare prepare3 # prepare3 is used to check if we are building in a separate output directory, # and if so do: @@ -1113,11 +1112,8 @@ ifdef building_out_of_srctree fi; endif -prepare1: prepare3 outputmakefile asm-generic $(version_h) $(autoksyms_h) \ - include/generated/utsrelease.h - $(cmd_crmodverdir) - -archprepare: archheaders archscripts prepare1 scripts +archprepare: archheaders archscripts scripts prepare3 outputmakefile \ + asm-generic $(version_h) $(autoksyms_h) include/generated/utsrelease.h prepare0: archprepare $(Q)$(MAKE) $(build)=scripts/mod @@ -1331,8 +1327,8 @@ _modinst_: rm -f $(MODLIB)/build ; \ ln -s $(CURDIR) $(MODLIB)/build ; \ fi - @cp -f $(objtree)/modules.order $(MODLIB)/ - @cp -f $(objtree)/modules.builtin $(MODLIB)/ + @sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order + @sed 's:^:kernel/:' modules.builtin > $(MODLIB)/modules.builtin @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst @@ -1373,18 +1369,22 @@ endif # CONFIG_MODULES # make distclean Remove editor backup files, patch leftover files and the like # Directories & files removed with 'make clean' -CLEAN_DIRS += $(MODVERDIR) include/ksym +CLEAN_DIRS += include/ksym CLEAN_FILES += modules.builtin.modinfo # Directories & files removed with 'make mrproper' MRPROPER_DIRS += include/config include/generated \ arch/$(SRCARCH)/include/generated .tmp_objdiff MRPROPER_FILES += .config .config.old .version \ - Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \ + Module.symvers \ signing_key.pem signing_key.priv signing_key.x509 \ x509.genkey extra_certificates signing_key.x509.keyid \ signing_key.x509.signer vmlinux-gdb.py +# Directories & files removed with 'make distclean' +DISTCLEAN_DIRS += +DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS + # clean - Delete most, but leave enough to build external modules # clean: rm-dirs := $(CLEAN_DIRS) @@ -1417,9 +1417,14 @@ mrproper: clean $(mrproper-dirs) # distclean # +distclean: rm-dirs := $(wildcard $(DISTCLEAN_DIRS)) +distclean: rm-files := $(wildcard $(DISTCLEAN_FILES)) + PHONY += distclean distclean: mrproper + $(call cmd,rmdirs) + $(call cmd,rmfiles) @find $(srctree) $(RCS_FIND_IGNORE) \ \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ -o -name '*.bak' -o -name '#*#' -o -name '*%' \ @@ -1609,7 +1614,7 @@ $(objtree)/Module.symvers: module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD)) PHONY += $(module-dirs) modules $(module-dirs): prepare $(objtree)/Module.symvers - $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) + $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) need-modorder=1 modules: $(module-dirs) @$(kecho) ' Building modules, stage 2.'; @@ -1634,7 +1639,6 @@ PHONY += $(clean-dirs) clean $(clean-dirs): $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@) -clean: rm-dirs := $(MODVERDIR) clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers PHONY += help @@ -1648,8 +1652,6 @@ help: @echo '' PHONY += prepare -prepare: - $(cmd_crmodverdir) endif # KBUILD_EXTMOD clean: $(clean-dirs) @@ -1660,7 +1662,7 @@ clean: $(clean-dirs) -o -name '*.ko.*' \ -o -name '*.dtb' -o -name '*.dtb.S' -o -name '*.dt.yaml' \ -o -name '*.dwo' -o -name '*.lst' \ - -o -name '*.su' \ + -o -name '*.su' -o -name '*.mod' \ -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ -o -name '*.lex.c' -o -name '*.tab.[ch]' \ -o -name '*.asn1.[ch]' \ @@ -1765,8 +1767,6 @@ build-dir = $(patsubst %/,%,$(dir $(build-target))) $(Q)$(MAKE) $(build)=$(build-dir) $(build-target) %.symtypes: prepare FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(build-target) -%.ko: %.o - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost # Modules PHONY += / @@ -1789,11 +1789,6 @@ quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \ $(KERNELRELEASE) -# Create temporary dir for module support files -# clean it up only when building all modules -cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR) \ - $(if $(KBUILD_MODULES),; rm -f $(MODVERDIR)/*) - # read saved command lines for existing targets existing-targets := $(wildcard $(sort $(targets))) diff --git a/arch/Kconfig b/arch/Kconfig index ac0fba400ded..a7b57dd42c26 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -796,6 +796,9 @@ config ARCH_NO_COHERENT_DMA_MMAP config ARCH_NO_PREEMPT bool +config ARCH_SUPPORTS_RT + bool + config CPU_NO_EFFICIENT_FFS def_bool n diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index ca85df247775..87b7769214e0 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -13,8 +13,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \ - $(call ld-option, --hash-style=sysv) \ - $(call ld-option, --build-id) \ + --hash-style=sysv --build-id \ -T obj-$(CONFIG_VDSO) += vdso.o diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 288c14d30b45..60a4c6239712 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -96,8 +96,8 @@ VDSO_LDFLAGS := $(VDSO_CPPFLAGS) VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft -VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv) -VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id) +VDSO_LDFLAGS += -Wl,--hash-style=sysv +VDSO_LDFLAGS += -Wl,--build-id VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 3c9e1bd9a3e9..d6544dc71258 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -11,6 +11,8 @@ #include <asm/mem-layout.h> #include <asm/atomic.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + #define check_pgt_cache() do {} while (0) extern unsigned long long kmap_generation; @@ -46,38 +48,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_page((unsigned long) pgd); } -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!pte) - return NULL; - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; -} - -/* _kernel variant gets to use a different allocator */ -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - gfp_t flags = GFP_KERNEL | __GFP_ZERO; - return (pte_t *) __get_free_page(flags); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h index e09cf2deeafe..904034da4974 100644 --- a/arch/parisc/include/asm/kprobes.h +++ b/arch/parisc/include/asm/kprobes.h @@ -50,6 +50,10 @@ struct kprobe_ctlblk { int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs); int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs); +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + return 0; +} #endif /* CONFIG_KPROBES */ #endif /* _PARISC_KPROBES_H */ diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index ba67893a1d72..df46b0e5a915 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -63,7 +63,7 @@ ENTRY_CFI(flush_tlb_all_local) /* Flush Instruction Tlb */ - LDREG ITLB_SID_BASE(%r1), %r20 +88: LDREG ITLB_SID_BASE(%r1), %r20 LDREG ITLB_SID_STRIDE(%r1), %r21 LDREG ITLB_SID_COUNT(%r1), %r22 LDREG ITLB_OFF_BASE(%r1), %arg0 @@ -103,6 +103,7 @@ fitonemiddle: /* Loop if LOOP = 1 */ add %r21, %r20, %r20 /* increment space */ fitdone: + ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP) /* Flush Data Tlb */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d8dcd8820369..77f6ebf97113 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -121,6 +121,7 @@ config PPC select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 463c63a9fcf1..11112023e327 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -302,9 +302,14 @@ #define H_SCM_UNBIND_MEM 0x3F0 #define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4 #define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8 -#define H_SCM_MEM_QUERY 0x3FC -#define H_SCM_BLOCK_CLEAR 0x400 -#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR +#define H_SCM_UNBIND_ALL 0x3FC +#define H_SCM_HEALTH 0x400 +#define H_SCM_PERFORMANCE_STATS 0x418 +#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS + +/* Scope args for H_SCM_UNBIND_ALL */ +#define H_UNBIND_SCOPE_ALL (0x1) +#define H_UNBIND_SCOPE_DRC (0x2) /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index dc9a1ca70edf..c6bbe9778d3c 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -27,11 +27,10 @@ static inline void ppc_set_pmu_inuse(int inuse) #ifdef CONFIG_PPC_PSERIES get_lppaca()->pmcregs_in_use = inuse; #endif - } else { + } #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - get_paca()->pmcregs_in_use = inuse; + get_paca()->pmcregs_in_use = inuse; #endif - } #endif } diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h index 01555c6ae0f5..be48c2215fa2 100644 --- a/arch/powerpc/include/uapi/asm/kvm_para.h +++ b/arch/powerpc/include/uapi/asm/kvm_para.h @@ -31,7 +31,7 @@ * Struct fields are always 32 or 64 bit aligned, depending on them being 32 * or 64 bit wide respectively. * - * See Documentation/virtual/kvm/ppc-pv.txt + * See Documentation/virt/kvm/ppc-pv.txt */ struct kvm_vcpu_arch_shared { __u64 scratch1; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 56dfa7a2a6f2..ea0c69236789 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -49,7 +49,8 @@ obj-y := cputable.o ptrace.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o + of_platform.o prom_parse.o \ + dma-common.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o diff --git a/arch/powerpc/kernel/dma-common.c b/arch/powerpc/kernel/dma-common.c new file mode 100644 index 000000000000..dc7ef6b17b69 --- /dev/null +++ b/arch/powerpc/kernel/dma-common.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Contains common dma routines for all powerpc platforms. + * + * Copyright (C) 2019 Shawn Anastasio. + */ + +#include <linux/mm.h> +#include <linux/dma-noncoherent.h> + +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ + if (!dev_is_dma_coherent(dev)) + return pgprot_noncached(prot); + return prot; +} diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index f50b708d6d77..98600b276f76 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1198,6 +1198,9 @@ SYSCALL_DEFINE0(rt_sigreturn) goto bad; if (MSR_TM_ACTIVE(msr_hi<<32)) { + /* Trying to start TM on non TM system */ + if (!cpu_has_feature(CPU_FTR_TM)) + goto bad; /* We only recheckpoint on return if we're * transaction. */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 2f80e270c7b0..117515564ec7 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -771,6 +771,11 @@ SYSCALL_DEFINE0(rt_sigreturn) if (MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; + + /* Trying to start TM on non TM system */ + if (!cpu_has_feature(CPU_FTR_TM)) + goto badframe; + if (__get_user(uc_transact, &uc->uc_link)) goto badframe; if (restore_tm_sigcontexts(current, &uc->uc_mcontext, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ec1804f822af..cde3f5a4b3e4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3569,9 +3569,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); if (kvmhv_on_pseries()) { + /* + * We need to save and restore the guest visible part of the + * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor + * doesn't do this for us. Note only required if pseries since + * this is done in kvmhv_load_hv_regs_and_go() below otherwise. + */ + unsigned long host_psscr; /* call our hypervisor to load up HV regs and go */ struct hv_guest_state hvregs; + host_psscr = mfspr(SPRN_PSSCR_PR); + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); kvmhv_save_hv_regs(vcpu, &hvregs); hvregs.lpcr = lpcr; vcpu->arch.regs.msr = vcpu->arch.shregs.msr; @@ -3590,6 +3599,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.shregs.msr = vcpu->arch.regs.msr; vcpu->arch.shregs.dar = mfspr(SPRN_DAR); vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + mtspr(SPRN_PSSCR_PR, host_psscr); /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && @@ -3654,6 +3665,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.vpa.dirty = 1; save_pmu = lp->pmcregs_in_use; } + /* Must save pmu if this guest is capable of running nested guests */ + save_pmu |= nesting_enabled(vcpu->kvm); kvmhv_save_guest_pmu(vcpu, save_pmu); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6ca0d7376a9f..e3ba67095895 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1986,10 +1986,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) xive->single_escalation = xive_native_has_single_escalation(); - if (ret) { - kfree(xive); + if (ret) return ret; - } return 0; } diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5596c8ec221a..a998823f68a3 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1090,9 +1090,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) xive->ops = &kvmppc_xive_native_ops; if (ret) - kfree(xive); + return ret; - return ret; + return 0; } /* diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 9a5963e07a82..b8ad14bb1170 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1899,11 +1899,20 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, * * For guests on platforms before POWER9, we clamp the it limit to 1G * to avoid some funky things such as RTAS bugs etc... + * + * On POWER9 we limit to 1TB in case the host erroneously told us that + * the RMA was >1TB. Effective address bits 0:23 are treated as zero + * (meaning the access is aliased to zero i.e. addr = addr % 1TB) + * for virtual real mode addressing and so it doesn't make sense to + * have an area larger than 1TB as it can't be addressed. */ if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { ppc64_rma_size = first_memblock_size; if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000); + else + ppc64_rma_size = min_t(u64, ppc64_rma_size, + 1UL << SID_SHIFT_1T); /* Finally limit subsequent allocations */ memblock_set_current_limit(ppc64_rma_size); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9259337d7374..9191a66b3bc5 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -239,7 +239,7 @@ void __init paging_init(void) #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = min(max_low_pfn, - ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT); + 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT)); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index c8ec670ee924..2c07908359b2 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/libnvdimm.h> #include <linux/platform_device.h> +#include <linux/delay.h> #include <asm/plpar_wrappers.h> @@ -43,8 +44,9 @@ struct papr_scm_priv { static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; - uint64_t rc, token; uint64_t saved = 0; + uint64_t token; + int64_t rc; /* * When the hypervisor cannot map all the requested memory in a single @@ -64,6 +66,10 @@ static int drc_pmem_bind(struct papr_scm_priv *p) } while (rc == H_BUSY); if (rc) { + /* H_OVERLAP needs a separate error path */ + if (rc == H_OVERLAP) + return -EBUSY; + dev_err(&p->pdev->dev, "bind err: %lld\n", rc); return -ENXIO; } @@ -78,22 +84,36 @@ static int drc_pmem_bind(struct papr_scm_priv *p) static int drc_pmem_unbind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; - uint64_t rc, token; + uint64_t token = 0; + int64_t rc; - token = 0; + dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index); - /* NB: unbind has the same retry requirements mentioned above */ + /* NB: unbind has the same retry requirements as drc_pmem_bind() */ do { - rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index, - p->bound_addr, p->blocks, token); + + /* Unbind of all SCM resources associated with drcIndex */ + rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, + p->drc_index, token); token = ret[0]; - cond_resched(); + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); if (rc) dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); + else + dev_dbg(&p->pdev->dev, "unbind drc %x complete\n", + p->drc_index); - return !!rc; + return rc == H_SUCCESS ? 0 : -ENXIO; } static int papr_scm_meta_get(struct papr_scm_priv *p, @@ -389,6 +409,14 @@ static int papr_scm_probe(struct platform_device *pdev) /* request the hypervisor to bind this region to somewhere in memory */ rc = drc_pmem_bind(p); + + /* If phyp says drc memory still bound then force unbound and retry */ + if (rc == -EBUSY) { + dev_warn(&pdev->dev, "Retrying bind after unbinding\n"); + drc_pmem_unbind(p); + rc = drc_pmem_bind(p); + } + if (rc) goto err; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 082c7e1c20f0..1cdb39575eae 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -479,7 +479,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask, * Now go through the entire mask until we find a valid * target. */ - for (;;) { + do { /* * We re-check online as the fallback case passes us * an untested affinity mask @@ -487,12 +487,11 @@ static int xive_find_target_in_mask(const struct cpumask *mask, if (cpu_online(cpu) && xive_try_pick_target(cpu)) return cpu; cpu = cpumask_next(cpu, mask); - if (cpu == first) - break; /* Wrap around */ if (cpu >= nr_cpu_ids) cpu = cpumask_first(mask); - } + } while (cpu != first); + return -1; } diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index f8b3b07e4247..7a117be8297c 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -34,8 +34,6 @@ else KBUILD_LDFLAGS += -melf32lriscv endif -KBUILD_CFLAGS += -Wall - # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 40983491b95f..9bf63f0ab253 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -217,5 +217,20 @@ #size-cells = <0>; status = "disabled"; }; + eth0: ethernet@10090000 { + compatible = "sifive,fu540-c000-gem"; + interrupt-parent = <&plic0>; + interrupts = <53>; + reg = <0x0 0x10090000 0x0 0x2000 + 0x0 0x100a0000 0x0 0x1000>; + local-mac-address = [00 00 00 00 00 00]; + clock-names = "pclk", "hclk"; + clocks = <&prci PRCI_CLK_GEMGXLPLL>, + <&prci PRCI_CLK_GEMGXLPLL>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 0b55c53c08c7..93d68cbd64fe 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -76,3 +76,12 @@ disable-wp; }; }; + +ð0 { + status = "okay"; + phy-mode = "gmii"; + phy-handle = <&phy0>; + phy0: ethernet-phy@0 { + reg = <0>; + }; +}; diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 1efaeddf1e4b..16970f246860 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mm-arch-hooks.h +generic-y += msi.h generic-y += percpu.h generic-y += preempt.h generic-y += sections.h diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 0e2eeeb1fd27..13ce76cc5aff 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -18,6 +18,7 @@ #ifdef __LP64__ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT +#define __ARCH_WANT_SYS_CLONE3 #endif /* __LP64__ */ #include <asm-generic/unistd.h> diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5d8570ed6cab..a4ad2733eedf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -189,6 +189,7 @@ config S390 select VIRT_CPU_ACCOUNTING select ARCH_HAS_SCALED_CPUTIME select HAVE_NMI + select ARCH_HAS_FORCE_DMA_UNENCRYPTED select SWIOTLB select GENERIC_ALLOCATOR diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9dde4d7d8704..b5fd6e85657c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1224,28 +1224,11 @@ no_timer: void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { - /* - * We cannot move this into the if, as the CPU might be already - * in kvm_vcpu_block without having the waitqueue set (polling) - */ vcpu->valid_wakeup = true; + kvm_vcpu_wake_up(vcpu); + /* - * This is mostly to document, that the read in swait_active could - * be moved before other stores, leading to subtle races. - * All current users do not store or use an atomic like update - */ - smp_mb__after_atomic(); - if (swait_active(&vcpu->wq)) { - /* - * The vcpu gave up the cpu voluntarily, mark it as a good - * yield-candidate. - */ - vcpu->preempted = true; - swake_up_one(&vcpu->wq); - vcpu->stat.halt_wakeup++; - } - /* - * The VCPU might not be sleeping but is executing the VSIE. Let's + * The VCPU might not be sleeping but rather executing VSIE. Let's * kick it, so it leaves the SIE to process the request. */ kvm_s390_vsie_kick(vcpu); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 4e5bbe328594..20340a03ad90 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -30,7 +30,7 @@ #include <linux/export.h> #include <linux/cma.h> #include <linux/gfp.h> -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <asm/processor.h> #include <linux/uaccess.h> #include <asm/pgtable.h> @@ -161,6 +161,11 @@ bool sev_active(void) return is_prot_virt_guest(); } +bool force_dma_unencrypted(struct device *dev) +{ + return sev_active(); +} + /* protected virtualization */ static void pv_init(void) { diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 5a9e4e1f9f81..324a23947585 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -115,8 +115,7 @@ quiet_cmd_vdso = VDSO $@ -T $(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@' -VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \ - $(call ld-option, --build-id) -Bsymbolic +VDSO_LDFLAGS = -shared --hash-style=both --build-id -Bsymbolic GCOV_PROFILE := n # diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 78772870facd..222855cc0158 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1526,6 +1526,7 @@ config AMD_MEM_ENCRYPT depends on X86_64 && CPU_SUP_AMD select DYNAMIC_PHYSICAL_MASK select ARCH_USE_MEMREMAP_PROT + select ARCH_HAS_FORCE_DMA_UNENCRYPTED ---help--- Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 220d1279d0e2..d6662fdef300 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -384,14 +384,11 @@ struct boot_params *make_boot_params(struct efi_config *c) struct apm_bios_info *bi; struct setup_header *hdr; efi_loaded_image_t *image; - void *options, *handle; + void *handle; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; int options_size = 0; efi_status_t status; char *cmdline_ptr; - u16 *s2; - u8 *s1; - int i; unsigned long ramdisk_addr; unsigned long ramdisk_size; @@ -494,8 +491,6 @@ static void add_e820ext(struct boot_params *params, struct setup_data *e820ext, u32 nr_entries) { struct setup_data *data; - efi_status_t status; - unsigned long size; e820ext->type = SETUP_E820_EXT; e820ext->len = nr_entries * sizeof(struct boot_e820_entry); @@ -677,8 +672,6 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, void *priv) { const char *signature; - __u32 nr_desc; - efi_status_t status; struct exit_boot_struct *p = priv; signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE @@ -747,7 +740,6 @@ struct boot_params * efi_main(struct efi_config *c, struct boot_params *boot_params) { struct desc_ptr *gdt = NULL; - efi_loaded_image_t *image; struct setup_header *hdr = &boot_params->hdr; efi_status_t status; struct desc_struct *desc; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 24e65a0f756d..53ac0cb2396d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -17,6 +17,7 @@ #include "pgtable.h" #include "../string.h" #include "../voffset.h" +#include <asm/bootparam_utils.h> /* * WARNING!! diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index d2f184165934..c8181392f70d 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -23,7 +23,6 @@ #include <asm/page.h> #include <asm/boot.h> #include <asm/bootparam.h> -#include <asm/bootparam_utils.h> #define BOOT_CTYPE_H #include <linux/acpi.h> diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index f8debf7aeb4c..5f2d03067ae5 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -40,7 +40,6 @@ int cmdline_find_option_bool(const char *option); static unsigned long find_trampoline_placement(void) { unsigned long bios_start = 0, ebda_start = 0; - unsigned long trampoline_start; struct boot_e820_entry *entry; char *signature; int i; diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 9f1f9e3b8230..830bd984182b 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -343,3 +343,9 @@ For 32-bit we have the following conventions - kernel is built with .Lafter_call_\@: #endif .endm + +#ifdef CONFIG_PARAVIRT_XXL +#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg +#else +#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg +#endif diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 90b473297299..2bb986f305ac 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -294,9 +294,11 @@ .Lfinished_frame_\@: .endm -.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 cld +.if \skip_gs == 0 PUSH_GS +.endif FIXUP_FRAME pushl %fs pushl %es @@ -313,13 +315,13 @@ movl %edx, %es movl $(__KERNEL_PERCPU), %edx movl %edx, %fs +.if \skip_gs == 0 SET_KERNEL_GS %edx - +.endif /* Switch to kernel stack if necessary */ .if \switch_stacks > 0 SWITCH_TO_KERNEL_STACK .endif - .endm .macro SAVE_ALL_NMI cr3_reg:req @@ -1441,39 +1443,46 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, ENTRY(page_fault) ASM_CLAC - pushl $do_page_fault - ALIGN - jmp common_exception + pushl $0; /* %gs's slot on the stack */ + + SAVE_ALL switch_stacks=1 skip_gs=1 + + ENCODE_FRAME_POINTER + UNWIND_ESPFIX_STACK + + /* fixup %gs */ + GS_TO_REG %ecx + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx + + GET_CR2_INTO(%ecx) # might clobber %eax + + /* fixup orig %eax */ + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + + TRACE_IRQS_OFF + movl %esp, %eax # pt_regs pointer + call do_page_fault + jmp ret_from_exception END(page_fault) common_exception: /* the function address is in %gs's slot on the stack */ - FIXUP_FRAME - pushl %fs - pushl %es - pushl %ds - pushl %eax - movl $(__USER_DS), %eax - movl %eax, %ds - movl %eax, %es - movl $(__KERNEL_PERCPU), %eax - movl %eax, %fs - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - SWITCH_TO_KERNEL_STACK + SAVE_ALL switch_stacks=1 skip_gs=1 ENCODE_FRAME_POINTER - cld UNWIND_ESPFIX_STACK + + /* fixup %gs */ GS_TO_REG %ecx movl PT_GS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart REG_TO_PTGS %ecx SET_KERNEL_GS %ecx + + /* fixup orig %eax */ + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 35a66fcfcb91..3f5a978a02a7 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -864,18 +864,84 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) +.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0 + + .if \paranoid + call paranoid_entry + /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ + .else + call error_entry + .endif + UNWIND_HINT_REGS + + .if \read_cr2 + /* + * Store CR2 early so subsequent faults cannot clobber it. Use R12 as + * intermediate storage as RDX can be clobbered in enter_from_user_mode(). + * GET_CR2_INTO can clobber RAX. + */ + GET_CR2_INTO(%r12); + .endif + + .if \shift_ist != -1 + TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ + .else + TRACE_IRQS_OFF + .endif + + .if \paranoid == 0 + testb $3, CS(%rsp) + jz .Lfrom_kernel_no_context_tracking_\@ + CALL_enter_from_user_mode +.Lfrom_kernel_no_context_tracking_\@: + .endif + + movq %rsp, %rdi /* pt_regs pointer */ + + .if \has_error_code + movq ORIG_RAX(%rsp), %rsi /* get error code */ + movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ + .else + xorl %esi, %esi /* no error code */ + .endif + + .if \shift_ist != -1 + subq $\ist_offset, CPU_TSS_IST(\shift_ist) + .endif + + .if \read_cr2 + movq %r12, %rdx /* Move CR2 into 3rd argument */ + .endif + + call \do_sym + + .if \shift_ist != -1 + addq $\ist_offset, CPU_TSS_IST(\shift_ist) + .endif + + .if \paranoid + /* this procedure expect "no swapgs" flag in ebx */ + jmp paranoid_exit + .else + jmp error_exit + .endif + +.endm + /** * idtentry - Generate an IDT entry stub * @sym: Name of the generated entry point - * @do_sym: C function to be called - * @has_error_code: True if this IDT vector has an error code on the stack - * @paranoid: non-zero means that this vector may be invoked from + * @do_sym: C function to be called + * @has_error_code: True if this IDT vector has an error code on the stack + * @paranoid: non-zero means that this vector may be invoked from * kernel mode with user GSBASE and/or user CR3. * 2 is special -- see below. * @shift_ist: Set to an IST index if entries from kernel mode should - * decrement the IST stack so that nested entries get a + * decrement the IST stack so that nested entries get a * fresh stack. (This is for #DB, which has a nasty habit - * of recursing.) + * of recursing.) + * @create_gap: create a 6-word stack gap when coming from kernel mode. + * @read_cr2: load CR2 into the 3rd argument; done before calling any C code * * idtentry generates an IDT stub that sets up a usable kernel context, * creates struct pt_regs, and calls @do_sym. The stub has the following @@ -900,15 +966,19 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt * @paranoid == 2 is special: the stub will never switch stacks. This is for * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. */ -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 /* Sanity check */ - .if \shift_ist != -1 && \paranoid == 0 + .if \shift_ist != -1 && \paranoid != 1 .error "using shift_ist requires paranoid=1" .endif + .if \create_gap && \paranoid + .error "using create_gap requires paranoid=0" + .endif + ASM_CLAC .if \has_error_code == 0 @@ -934,47 +1004,7 @@ ENTRY(\sym) .Lfrom_usermode_no_gap_\@: .endif - .if \paranoid - call paranoid_entry - .else - call error_entry - .endif - UNWIND_HINT_REGS - /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ - - .if \paranoid - .if \shift_ist != -1 - TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */ - .else - TRACE_IRQS_OFF - .endif - .endif - - movq %rsp, %rdi /* pt_regs pointer */ - - .if \has_error_code - movq ORIG_RAX(%rsp), %rsi /* get error code */ - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - .else - xorl %esi, %esi /* no error code */ - .endif - - .if \shift_ist != -1 - subq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - - call \do_sym - - .if \shift_ist != -1 - addq $\ist_offset, CPU_TSS_IST(\shift_ist) - .endif - - /* these procedures expect "no swapgs" flag in ebx */ - .if \paranoid - jmp paranoid_exit - .else - jmp error_exit - .endif + idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset .if \paranoid == 1 /* @@ -983,21 +1013,9 @@ ENTRY(\sym) * run in real process context if user_mode(regs). */ .Lfrom_usermode_switch_stack_\@: - call error_entry - - movq %rsp, %rdi /* pt_regs pointer */ - - .if \has_error_code - movq ORIG_RAX(%rsp), %rsi /* get error code */ - movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - .else - xorl %esi, %esi /* no error code */ + idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0 .endif - call \do_sym - - jmp error_exit - .endif _ASM_NOKPROBE(\sym) END(\sym) .endm @@ -1007,7 +1025,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=2 +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 @@ -1179,10 +1197,10 @@ idtentry xendebug do_debug has_error_code=0 #endif idtentry general_protection do_general_protection has_error_code=1 -idtentry page_fault do_page_fault has_error_code=1 +idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 #ifdef CONFIG_KVM_GUEST -idtentry async_page_fault do_async_page_fault has_error_code=1 +idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1 #endif #ifdef CONFIG_X86_MCE @@ -1281,18 +1299,9 @@ ENTRY(error_entry) movq %rax, %rsp /* switch stack */ ENCODE_FRAME_POINTER pushq %r12 - - /* - * We need to tell lockdep that IRQs are off. We can't do this until - * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). - */ - TRACE_IRQS_OFF - CALL_enter_from_user_mode ret .Lerror_entry_done: - TRACE_IRQS_OFF ret /* diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index cfdca8b42c70..cc20465b2867 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -12,9 +12,7 @@ /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 - .globl \name - .type \name, @function -\name: + ENTRY(\name) pushq %rbp movq %rsp, %rbp @@ -35,6 +33,7 @@ call \func jmp .L_restore + ENDPROC(\name) _ASM_NOKPROBE(\name) .endm diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 34773395139a..8df549138193 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -176,9 +176,8 @@ quiet_cmd_vdso = VDSO $@ -T $(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' -VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \ - $(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \ - -Bsymbolic +VDSO_LDFLAGS = -shared --hash-style=both --build-id \ + $(call ld-option, --eh-frame-hdr) -Bsymbolic GCOV_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 0e033ef11a9f..0d258688c8cf 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -60,8 +60,17 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; - if (!*hvp) - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + /* + * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section + * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure + * we always write the EOI MSR in hv_apic_eoi_write() *after* the + * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may + * not be stopped in the case of CPU offlining and the VM will hang. + */ + if (!*hvp) { + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, + PAGE_KERNEL); + } if (*hvp) { u64 val; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 050e5f9ebf81..e647aa095867 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -49,7 +49,7 @@ static inline void generic_apic_probe(void) #ifdef CONFIG_X86_LOCAL_APIC -extern unsigned int apic_verbosity; +extern int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0cc5b611a113..7b0a4ee77313 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,15 +607,16 @@ struct kvm_vcpu_arch { /* * QEMU userspace and the guest each have their own FPU state. - * In vcpu_run, we switch between the user, maintained in the - * task_struct struct, and guest FPU contexts. While running a VCPU, - * the VCPU thread will have the guest FPU context. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. * * Note that while the PKRU state lives inside the fpu registers, * it is switched out separately at VMENTER and VMEXIT time. The * "guest_fpu" state here contains the guest FPU context, with the * host PRKU bits. */ + struct fpu *user_fpu; struct fpu *guest_fpu; u64 xcr0; @@ -1496,25 +1497,29 @@ enum { #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) +asmlinkage void __noreturn kvm_spurious_fault(void); + /* * Hardware virtualization extension instructions may fault if a * reboot turns off virtualization while processes are running. - * Trap the fault and ignore the instruction if that happens. + * Usually after catching the fault we just panic; during reboot + * instead the instruction is ignored. */ -asmlinkage void kvm_spurious_fault(void); - -#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ - "666: " insn "\n\t" \ - "668: \n\t" \ - ".pushsection .fixup, \"ax\" \n" \ - "667: \n\t" \ - cleanup_insn "\n\t" \ - "cmpb $0, kvm_rebooting \n\t" \ - "jne 668b \n\t" \ - __ASM_SIZE(push) " $666b \n\t" \ - "jmp kvm_spurious_fault \n\t" \ - ".popsection \n\t" \ - _ASM_EXTABLE(666b, 667b) +#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ + "666: \n\t" \ + insn "\n\t" \ + "jmp 668f \n\t" \ + "667: \n\t" \ + "call kvm_spurious_fault \n\t" \ + "668: \n\t" \ + ".pushsection .fixup, \"ax\" \n\t" \ + "700: \n\t" \ + cleanup_insn "\n\t" \ + "cmpb $0, kvm_rebooting\n\t" \ + "je 667b \n\t" \ + "jmp 668b \n\t" \ + ".popsection \n\t" \ + _ASM_EXTABLE(666b, 700b) #define __kvm_handle_fault_on_reboot(insn) \ ____kvm_handle_fault_on_reboot(insn, "") diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 5ed3cf1c3934..9b4df6eaa11a 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); -void do_async_page_fault(struct pt_regs *regs, unsigned long error_code); +void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c25c38a05c1c..dce26f1d13e1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -116,7 +116,7 @@ static inline void write_cr0(unsigned long x) static inline unsigned long read_cr2(void) { - return PVOP_CALL0(unsigned long, mmu.read_cr2); + return PVOP_CALLEE0(unsigned long, mmu.read_cr2); } static inline void write_cr2(unsigned long x) @@ -746,6 +746,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); PV_RESTORE_ALL_CALLER_REGS \ FRAME_END \ "ret;" \ + ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ ".popsection") /* Get a reference to a callee-save function */ @@ -909,13 +910,7 @@ extern void default_banner(void); ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ ) -#endif - -#define GET_CR2_INTO_RAX \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); -#ifdef CONFIG_PARAVIRT_XXL #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ ANNOTATE_RETPOLINE_SAFE; \ @@ -929,9 +924,19 @@ extern void default_banner(void); call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif -#endif +#endif /* CONFIG_PARAVIRT_XXL */ +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_PARAVIRT_XXL + +#define GET_CR2_INTO_AX \ + PARA_SITE(PARA_PATCH(PV_MMU_read_cr2), \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); \ + ) + +#endif /* CONFIG_PARAVIRT_XXL */ -#endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 946f8f1f1efc..639b2df445ee 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -220,7 +220,7 @@ struct pv_mmu_ops { void (*exit_mmap)(struct mm_struct *mm); #ifdef CONFIG_PARAVIRT_XXL - unsigned long (*read_cr2)(void); + struct paravirt_callee_save read_cr2; void (*write_cr2)(unsigned long); unsigned long (*read_cr3)(void); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f2bd284abc16..b25e633033c3 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_64 -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code); +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address); asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); asmlinkage __visible notrace struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); void __init trap_init(void); #endif dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); -dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code); +dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index e901b0ab116f..503d3f42da16 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -435,9 +435,12 @@ struct kvm_nested_state { /* for KVM_CAP_PMU_EVENT_FILTER */ struct kvm_pmu_event_filter { - __u32 action; - __u32 nevents; - __u64 events[0]; + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[0]; }; #define KVM_PMU_EVENT_ALLOW 0 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 1bd91cb7b320..f5291362da1a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); /* * Debug level, exported for io_apic.c */ -unsigned int apic_verbosity; +int apic_verbosity; int pic_mode; diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index da64452584b0..5c7ee3df4d0b 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -76,6 +76,7 @@ static void __used common(void) BLANK(); OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); + OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2); #endif BLANK(); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index e69408bf664b..7da2bcd2b8eb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -86,9 +86,9 @@ static bool _e820__mapped_any(struct e820_table *table, continue; if (entry->addr >= end || entry->addr + entry->size <= start) continue; - return 1; + return true; } - return 0; + return false; } bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index bcd206c8ac90..a6342c899be5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -29,9 +29,7 @@ #ifdef CONFIG_PARAVIRT_XXL #include <asm/asm-offsets.h> #include <asm/paravirt.h> -#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg #else -#define GET_CR2_INTO(reg) movq %cr2, reg #define INTERRUPT_RETURN iretq #endif @@ -253,10 +251,10 @@ END(secondary_startup_64) * start_secondary() via .Ljump_to_C_code. */ ENTRY(start_cpu0) - movq initial_stack(%rip), %rsp UNWIND_HINT_EMPTY + movq initial_stack(%rip), %rsp jmp .Ljump_to_C_code -ENDPROC(start_cpu0) +END(start_cpu0) #endif /* Both SMP bootup and ACPI suspend change these variables */ @@ -323,7 +321,7 @@ early_idt_handler_common: cmpq $14,%rsi /* Page fault? */ jnz 10f - GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */ + GET_CR2_INTO(%rdi) /* can clobber %rax if pv */ call early_make_pgtable andl %eax,%eax jz 20f /* All good */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 82caf01b63dd..b7f34fe2171e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); dotraplinkage void -do_async_page_fault(struct pt_regs *regs, unsigned long error_code) +do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { enum ctx_state prev_state; switch (kvm_read_and_reset_pf_reason()) { default: - do_page_fault(regs, error_code); + do_page_fault(regs, error_code, address); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ prev_state = exception_enter(); - kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); + kvm_async_pf_task_wait((u32)address, !user_mode(regs)); exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); - kvm_async_pf_task_wake((u32)read_cr2()); + kvm_async_pf_task_wake((u32)address); rcu_irq_exit(); break; } @@ -838,6 +838,7 @@ asm( "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" "setne %al;" "ret;" +".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" ".popsection"); #endif diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 1bfe5c6e6cfe..afac7ccce72f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -546,17 +546,15 @@ void __init default_get_smp_config(unsigned int early) * local APIC has default address */ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - return; + goto out; } pr_info("Default MP configuration #%d\n", mpf->feature1); construct_default_ISA_mptable(mpf->feature1); } else if (mpf->physptr) { - if (check_physptr(mpf, early)) { - early_memunmap(mpf, sizeof(*mpf)); - return; - } + if (check_physptr(mpf, early)) + goto out; } else BUG(); @@ -565,7 +563,7 @@ void __init default_get_smp_config(unsigned int early) /* * Only use the first configuration found. */ - +out: early_memunmap(mpf, sizeof(*mpf)); } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 98039d7fb998..0aa6256eedd8 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops = { .mmu.exit_mmap = paravirt_nop, #ifdef CONFIG_PARAVIRT_XXL - .mmu.read_cr2 = native_read_cr2, + .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2), .mmu.write_cr2 = native_write_cr2, .mmu.read_cr3 = __native_read_cr3, .mmu.write_cr3 = native_write_cr3, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 250e4c4ac6d9..af64519b2695 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -143,17 +143,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) void release_thread(struct task_struct *dead_task) { - if (dead_task->mm) { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - if (dead_task->mm->context.ldt) { - pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", - dead_task->comm, - dead_task->mm->context.ldt->entries, - dead_task->mm->context.ldt->nr_entries); - BUG(); - } -#endif - } + WARN_ON(dead_task->mm); } enum which_selector { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 71691a8310e7..0fdbe89d0754 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -369,12 +369,22 @@ static int putreg(struct task_struct *child, case offsetof(struct user_regs_struct,fs_base): if (value >= TASK_SIZE_MAX) return -EIO; - x86_fsbase_write_task(child, value); + /* + * When changing the FS base, use do_arch_prctl_64() + * to set the index to zero and to set the base + * as requested. + */ + if (child->thread.fsbase != value) + return do_arch_prctl_64(child, ARCH_SET_FS, value); return 0; case offsetof(struct user_regs_struct,gs_base): + /* + * Exactly the same here as the %fs handling above. + */ if (value >= TASK_SIZE_MAX) return -EIO; - x86_gsbase_write_task(child, value); + if (child->thread.gsbase != value) + return do_arch_prctl_64(child, ARCH_SET_GS, value); return 0; #endif } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 87095a477154..4bb0f8447112 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message, #ifdef CONFIG_X86_64 /* Runs on IST stack */ -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) +dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2) { static const char str[] = "double fault"; struct task_struct *tsk = current; -#ifdef CONFIG_VMAP_STACK - unsigned long cr2; -#endif #ifdef CONFIG_X86_ESPFIX64 extern unsigned char native_irq_return_iret[]; @@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * stack even if the actual trigger for the double fault was * something else. */ - cr2 = read_cr2(); if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE) handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2); #endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index ead681210306..22c2720cd948 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -368,9 +368,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | F(MD_CLEAR); + /* cpuid 7.1.eax */ + const u32 kvm_cpuid_7_1_eax_x86_features = + F(AVX512_BF16); + switch (index) { case 0: - entry->eax = 0; + entry->eax = min(entry->eax, 1u); entry->ebx &= kvm_cpuid_7_0_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_7_0_EBX); /* TSC_ADJUST is emulated */ @@ -394,6 +398,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) */ entry->edx |= F(ARCH_CAPABILITIES); break; + case 1: + entry->eax &= kvm_cpuid_7_1_eax_x86_features; + entry->ebx = 0; + entry->ecx = 0; + entry->edx = 0; + break; default: WARN_ON_ONCE(1); entry->eax = 0; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8e409ad448f9..718f7d9afedc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -312,29 +312,42 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); -#define FOP_FUNC(name) \ +#define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ name ":\n\t" -#define FOP_RET "ret \n\t" +#define FOP_FUNC(name) \ + __FOP_FUNC(#name) + +#define __FOP_RET(name) \ + "ret \n\t" \ + ".size " name ", .-" name "\n\t" + +#define FOP_RET(name) \ + __FOP_RET(#name) #define FOP_START(op) \ extern void em_##op(struct fastop *fake); \ asm(".pushsection .text, \"ax\" \n\t" \ ".global em_" #op " \n\t" \ - FOP_FUNC("em_" #op) + ".align " __stringify(FASTOP_SIZE) " \n\t" \ + "em_" #op ":\n\t" #define FOP_END \ ".popsection") +#define __FOPNOP(name) \ + __FOP_FUNC(name) \ + __FOP_RET(name) + #define FOPNOP() \ - FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \ - FOP_RET + __FOPNOP(__stringify(__UNIQUE_ID(nop))) #define FOP1E(op, dst) \ - FOP_FUNC(#op "_" #dst) \ - "10: " #op " %" #dst " \n\t" FOP_RET + __FOP_FUNC(#op "_" #dst) \ + "10: " #op " %" #dst " \n\t" \ + __FOP_RET(#op "_" #dst) #define FOP1EEX(op, dst) \ FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception) @@ -366,8 +379,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); FOP_END #define FOP2E(op, dst, src) \ - FOP_FUNC(#op "_" #dst "_" #src) \ - #op " %" #src ", %" #dst " \n\t" FOP_RET + __FOP_FUNC(#op "_" #dst "_" #src) \ + #op " %" #src ", %" #dst " \n\t" \ + __FOP_RET(#op "_" #dst "_" #src) #define FASTOP2(op) \ FOP_START(op) \ @@ -405,8 +419,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); FOP_END #define FOP3E(op, dst, src, src2) \ - FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \ - #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET + __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \ + #op " %" #src2 ", %" #src ", %" #dst " \n\t"\ + __FOP_RET(#op "_" #dst "_" #src "_" #src2) /* 3-operand, word-only, src2=cl */ #define FASTOP3WCL(op) \ @@ -423,7 +438,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); ".type " #op ", @function \n\t" \ #op ": \n\t" \ #op " %al \n\t" \ - FOP_RET + __FOP_RET(#op) asm(".pushsection .fixup, \"ax\"\n" ".global kvm_fastop_exception \n" @@ -449,7 +464,10 @@ FOP_SETCC(setle) FOP_SETCC(setnle) FOP_END; -FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET +FOP_START(salc) +FOP_FUNC(salc) +"pushf; sbb %al, %al; popf \n\t" +FOP_RET(salc) FOP_END; /* diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index a39e38f13029..c10a8b10b203 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1594,7 +1594,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; uint16_t code, rep_idx, rep_cnt; - bool fast, longmode, rep; + bool fast, rep; /* * hypercall generates UD from non zero cpl and real mode @@ -1605,9 +1605,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return 1; } - longmode = is_64_bit_mode(vcpu); - - if (!longmode) { +#ifdef CONFIG_X86_64 + if (is_64_bit_mode(vcpu)) { + param = kvm_rcx_read(vcpu); + ingpa = kvm_rdx_read(vcpu); + outgpa = kvm_r8_read(vcpu); + } else +#endif + { param = ((u64)kvm_rdx_read(vcpu) << 32) | (kvm_rax_read(vcpu) & 0xffffffff); ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | @@ -1615,13 +1620,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | (kvm_rsi_read(vcpu) & 0xffffffff); } -#ifdef CONFIG_X86_64 - else { - param = kvm_rcx_read(vcpu); - ingpa = kvm_rdx_read(vcpu); - outgpa = kvm_r8_read(vcpu); - } -#endif code = param & 0xffff; fast = !!(param & HV_HYPERCALL_FAST_BIT); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 1add1bc881e2..d859ae8890d0 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -45,11 +45,6 @@ #include "lapic.h" #include "irq.h" -#if 0 -#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) -#else -#define ioapic_debug(fmt, arg...) -#endif static int ioapic_service(struct kvm_ioapic *vioapic, int irq, bool line_status); @@ -294,7 +289,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) default: index = (ioapic->ioregsel - 0x10) >> 1; - ioapic_debug("change redir index %x val %x\n", index, val); if (index >= IOAPIC_NUM_PINS) return; e = &ioapic->redirtbl[index]; @@ -343,12 +337,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) entry->fields.remote_irr)) return -1; - ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " - "vector=%x trig_mode=%x\n", - entry->fields.dest_id, entry->fields.dest_mode, - entry->fields.delivery_mode, entry->fields.vector, - entry->fields.trig_mode); - irqe.dest_id = entry->fields.dest_id; irqe.vector = entry->fields.vector; irqe.dest_mode = entry->fields.dest_mode; @@ -515,7 +503,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!ioapic_in_range(ioapic, addr)) return -EOPNOTSUPP; - ioapic_debug("addr %lx\n", (unsigned long)addr); ASSERT(!(addr & 0xf)); /* check alignment */ addr &= 0xff; @@ -558,8 +545,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!ioapic_in_range(ioapic, addr)) return -EOPNOTSUPP; - ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", - (void*)addr, len, val); ASSERT(!(addr & 0xf)); /* check alignment */ switch (len) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a232e76d8f23..0aa158657f20 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -52,9 +52,6 @@ #define PRIu64 "u" #define PRIo64 "o" -/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) do {} while (0) - /* 14 is the version for Xeon and Pentium 8.4.8*/ #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) #define LAPIC_MMIO_LENGTH (1 << 12) @@ -121,6 +118,17 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) return apic->vcpu->vcpu_id; } +bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) +{ + return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt); + +static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) +{ + return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE; +} + static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { switch (map->mode) { @@ -627,7 +635,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) { u8 val; if (pv_eoi_get_user(vcpu, &val) < 0) - apic_debug("Can't read EOI MSR value: 0x%llx\n", + printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); return val & 0x1; } @@ -635,7 +643,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) { if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { - apic_debug("Can't set EOI MSR value: 0x%llx\n", + printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); return; } @@ -645,7 +653,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) { if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { - apic_debug("Can't clear EOI MSR value: 0x%llx\n", + printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); return; } @@ -679,9 +687,6 @@ static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) else ppr = isrv & 0xf0; - apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", - apic, ppr, isr, isrv); - *new_ppr = ppr; if (old_ppr != ppr) kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); @@ -758,8 +763,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) return ((logical_id >> 4) == (mda >> 4)) && (logical_id & mda & 0xf) != 0; default: - apic_debug("Bad DFR vcpu %d: %08x\n", - apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR)); return false; } } @@ -798,10 +801,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, struct kvm_lapic *target = vcpu->arch.apic; u32 mda = kvm_apic_mda(vcpu, dest, source, target); - apic_debug("target %p, source %p, dest 0x%x, " - "dest_mode 0x%x, short_hand 0x%x\n", - target, source, dest, dest_mode, short_hand); - ASSERT(target); switch (short_hand) { case APIC_DEST_NOSHORT: @@ -816,8 +815,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, case APIC_DEST_ALLBUT: return target != source; default: - apic_debug("kvm: apic: Bad dest shorthand value %x\n", - short_hand); return false; } } @@ -1095,15 +1092,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, smp_wmb(); kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); - } else { - apic_debug("Ignoring de-assert INIT to vcpu %d\n", - vcpu->vcpu_id); } break; case APIC_DM_STARTUP: - apic_debug("SIPI to vcpu %d vector 0x%02x\n", - vcpu->vcpu_id, vector); result = 1; apic->sipi_vector = vector; /* make sure sipi_vector is visible for the receiver */ @@ -1221,14 +1213,6 @@ static void apic_send_ipi(struct kvm_lapic *apic) trace_kvm_apic_ipi(icr_low, irq.dest_id); - apic_debug("icr_high 0x%x, icr_low 0x%x, " - "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " - "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, " - "msi_redir_hint 0x%x\n", - icr_high, icr_low, irq.shorthand, irq.dest_id, - irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, - irq.vector, irq.msi_redir_hint); - kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); } @@ -1282,7 +1266,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) switch (offset) { case APIC_ARBPRI: - apic_debug("Access APIC ARBPRI register which is for P6\n"); break; case APIC_TMCCT: /* Timer CCR */ @@ -1349,11 +1332,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, if (!apic_x2apic_mode(apic)) valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); - if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) { - apic_debug("KVM_APIC_READ: read reserved register %x\n", - offset); + if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) return 1; - } result = __apic_read(apic, offset & ~0xf); @@ -1411,9 +1391,6 @@ static void update_divide_count(struct kvm_lapic *apic) tmp1 = tdcr & 0xf; tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; apic->divide_count = 0x1 << (tmp2 & 0x7); - - apic_debug("timer divide count is 0x%x\n", - apic->divide_count); } static void limit_periodic_timer_frequency(struct kvm_lapic *apic) @@ -1455,29 +1432,6 @@ static void apic_update_lvtt(struct kvm_lapic *apic) } } -static void apic_timer_expired(struct kvm_lapic *apic) -{ - struct kvm_vcpu *vcpu = apic->vcpu; - struct swait_queue_head *q = &vcpu->wq; - struct kvm_timer *ktimer = &apic->lapic_timer; - - if (atomic_read(&apic->lapic_timer.pending)) - return; - - atomic_inc(&apic->lapic_timer.pending); - kvm_set_pending_timer(vcpu); - - /* - * For x86, the atomic_inc() is serialized, thus - * using swait_active() is safe. - */ - if (swait_active(q)) - swake_up_one(q); - - if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) - ktimer->expired_tscdeadline = ktimer->tscdeadline; -} - /* * On APICv, this test will cause a busy wait * during a higher-priority task. @@ -1551,7 +1505,7 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, apic->lapic_timer.timer_advance_ns = timer_advance_ns; } -void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) +static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; u64 guest_tsc, tsc_deadline; @@ -1559,9 +1513,6 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) if (apic->lapic_timer.expired_tscdeadline == 0) return; - if (!lapic_timer_int_injected(vcpu)) - return; - tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); @@ -1573,8 +1524,57 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); } + +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) +{ + if (lapic_timer_int_injected(vcpu)) + __kvm_wait_lapic_expire(vcpu); +} EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); +static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) +{ + struct kvm_timer *ktimer = &apic->lapic_timer; + + kvm_apic_local_deliver(apic, APIC_LVTT); + if (apic_lvtt_tscdeadline(apic)) + ktimer->tscdeadline = 0; + if (apic_lvtt_oneshot(apic)) { + ktimer->tscdeadline = 0; + ktimer->target_expiration = 0; + } +} + +static void apic_timer_expired(struct kvm_lapic *apic) +{ + struct kvm_vcpu *vcpu = apic->vcpu; + struct swait_queue_head *q = &vcpu->wq; + struct kvm_timer *ktimer = &apic->lapic_timer; + + if (atomic_read(&apic->lapic_timer.pending)) + return; + + if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) + ktimer->expired_tscdeadline = ktimer->tscdeadline; + + if (kvm_use_posted_timer_interrupt(apic->vcpu)) { + if (apic->lapic_timer.timer_advance_ns) + __kvm_wait_lapic_expire(vcpu); + kvm_apic_inject_pending_timer_irqs(apic); + return; + } + + atomic_inc(&apic->lapic_timer.pending); + kvm_set_pending_timer(vcpu); + + /* + * For x86, the atomic_inc() is serialized, thus + * using swait_active() is safe. + */ + if (swait_active(q)) + swake_up_one(q); +} + static void start_sw_tscdeadline(struct kvm_lapic *apic) { struct kvm_timer *ktimer = &apic->lapic_timer; @@ -1601,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) likely(ns > apic->lapic_timer.timer_advance_ns)) { expire = ktime_add_ns(now, ns); expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); - hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED); + hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS); } else apic_timer_expired(apic); @@ -1648,16 +1648,6 @@ static bool set_target_expiration(struct kvm_lapic *apic) limit_periodic_timer_frequency(apic); - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" - PRIx64 ", " - "timer initial count 0x%x, period %lldns, " - "expire @ 0x%016" PRIx64 ".\n", __func__, - APIC_BUS_CYCLE_NS, ktime_to_ns(now), - kvm_lapic_get_reg(apic, APIC_TMICT), - apic->lapic_timer.period, - ktime_to_ns(ktime_add_ns(now, - apic->lapic_timer.period))); - apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); @@ -1703,7 +1693,7 @@ static void start_sw_period(struct kvm_lapic *apic) hrtimer_start(&apic->lapic_timer.timer, apic->lapic_timer.target_expiration, - HRTIMER_MODE_ABS_PINNED); + HRTIMER_MODE_ABS); } bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) @@ -1860,8 +1850,6 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; if (lvt0_in_nmi_mode) { - apic_debug("Receive NMI setting on APIC_LVT0 " - "for cpu %d\n", apic->vcpu->vcpu_id); atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); } else atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); @@ -1975,8 +1963,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_TDCR: { uint32_t old_divisor = apic->divide_count; - if (val & 4) - apic_debug("KVM_WRITE:TDCR %x\n", val); kvm_lapic_set_reg(apic, APIC_TDCR, val); update_divide_count(apic); if (apic->divide_count != old_divisor && @@ -1988,10 +1974,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; } case APIC_ESR: - if (apic_x2apic_mode(apic) && val != 0) { - apic_debug("KVM_WRITE:ESR not zero %x\n", val); + if (apic_x2apic_mode(apic) && val != 0) ret = 1; - } break; case APIC_SELF_IPI: @@ -2004,8 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) ret = 1; break; } - if (ret) - apic_debug("Local APIC Write to read-only register %x\n", reg); + return ret; } EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); @@ -2033,20 +2016,12 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, * 32/64/128 bits registers must be accessed thru 32 bits. * Refer SDM 8.4.1 */ - if (len != 4 || (offset & 0xf)) { - /* Don't shout loud, $infamous_os would cause only noise. */ - apic_debug("apic write: bad size=%d %lx\n", len, (long)address); + if (len != 4 || (offset & 0xf)) return 0; - } val = *(u32*)data; - /* too common printing */ - if (offset != APIC_EOI) - apic_debug("%s: offset 0x%x with length 0x%x, and value is " - "0x%x\n", __func__, offset, len, val); - - kvm_lapic_reg_write(apic, offset, val); + kvm_lapic_reg_write(apic, offset & 0xff0, val); return 0; } @@ -2178,11 +2153,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) if ((value & MSR_IA32_APICBASE_ENABLE) && apic->base_address != APIC_DEFAULT_PHYS_BASE) pr_warn_once("APIC base relocation is unsupported by KVM"); - - /* with FSB delivery interrupt, we can restart APIC functionality */ - apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " - "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address); - } void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -2193,8 +2163,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) if (!apic) return; - apic_debug("%s\n", __func__); - /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2247,11 +2215,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; - - apic_debug("%s: vcpu=%p, id=0x%x, base_msr=" - "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, - vcpu, kvm_lapic_get_reg(apic, APIC_ID), - vcpu->arch.apic_base, apic->base_address); } /* @@ -2323,7 +2286,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) struct kvm_lapic *apic; ASSERT(vcpu != NULL); - apic_debug("apic_init %d\n", vcpu->vcpu_id); apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); if (!apic) @@ -2340,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) apic->vcpu = vcpu; hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED); + HRTIMER_MODE_ABS); apic->lapic_timer.timer.function = apic_timer_fn; if (timer_advance_ns == -1) { apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; @@ -2397,13 +2359,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; if (atomic_read(&apic->lapic_timer.pending) > 0) { - kvm_apic_local_deliver(apic, APIC_LVTT); - if (apic_lvtt_tscdeadline(apic)) - apic->lapic_timer.tscdeadline = 0; - if (apic_lvtt_oneshot(apic)) { - apic->lapic_timer.tscdeadline = 0; - apic->lapic_timer.target_expiration = 0; - } + kvm_apic_inject_pending_timer_irqs(apic); atomic_set(&apic->lapic_timer.pending, 0); } } @@ -2525,12 +2481,13 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) { struct hrtimer *timer; - if (!lapic_in_kernel(vcpu)) + if (!lapic_in_kernel(vcpu) || + kvm_can_post_timer_interrupt(vcpu)) return; timer = &vcpu->arch.apic->lapic_timer.timer; if (hrtimer_cancel(timer)) - hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } /* @@ -2678,11 +2635,8 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) return 1; - if (reg == APIC_DFR || reg == APIC_ICR2) { - apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n", - reg); + if (reg == APIC_DFR || reg == APIC_ICR2) return 1; - } if (kvm_lapic_reg_read(apic, reg, 4, &low)) return 1; @@ -2780,8 +2734,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) /* evaluate pending_events before reading the vector */ smp_rmb(); sipi_vector = apic->sipi_vector; - apic_debug("vcpu %d received sipi with vector # %x\n", - vcpu->vcpu_id, sipi_vector); kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 36747174e4a8..50053d2b8b7b 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -236,6 +236,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); +bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu); static inline enum lapic_mode kvm_apic_mode(u64 apic_base) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9a5814d8d194..24843cf49579 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3466,7 +3466,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, /* * Currently, fast page fault only works for direct mapping * since the gfn is not stable for indirect shadow page. See - * Documentation/virtual/kvm/locking.txt to get more detail. + * Documentation/virt/kvm/locking.txt to get more detail. */ fault_handled = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte, @@ -4597,11 +4597,11 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, */ /* Faults from writes to non-writable pages */ - u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0; + u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0; /* Faults from user mode accesses to supervisor pages */ - u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0; + u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0; /* Faults from fetches of non-executable pages*/ - u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0; + u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0; /* Faults from kernel mode fetches of user pages */ u8 smepf = 0; /* Faults from kernel mode accesses of user pages */ diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index aa5a2597305a..46875bbd0419 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -19,8 +19,8 @@ #include "lapic.h" #include "pmu.h" -/* This keeps the total size of the filter under 4k. */ -#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63 +/* This is enough to filter the vast majority of currently defined events. */ +#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 /* NOTE: * - Each perf counter is defined as "struct kvm_pmc"; @@ -131,8 +131,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, intr ? kvm_perf_overflow_intr : kvm_perf_overflow, pmc); if (IS_ERR(event)) { - printk_once("kvm_pmu: event creation failed %ld\n", - PTR_ERR(event)); + pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n", + PTR_ERR(event), pmc->idx); return; } @@ -206,12 +206,24 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) { unsigned en_field = ctrl & 0x3; bool pmi = ctrl & 0x8; + struct kvm_pmu_event_filter *filter; + struct kvm *kvm = pmc->vcpu->kvm; pmc_stop_counter(pmc); if (!en_field || !pmc_is_enabled(pmc)) return; + filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); + if (filter) { + if (filter->action == KVM_PMU_EVENT_DENY && + test_bit(idx, (ulong *)&filter->fixed_counter_bitmap)) + return; + if (filter->action == KVM_PMU_EVENT_ALLOW && + !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap)) + return; + } + pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, kvm_x86_ops->pmu_ops->find_fixed_event(idx), !(en_field & 0x2), /* exclude user */ @@ -385,6 +397,9 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) tmp.action != KVM_PMU_EVENT_DENY) return -EINVAL; + if (tmp.flags != 0) + return -EINVAL; + if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) return -E2BIG; @@ -406,8 +421,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) mutex_unlock(&kvm->lock); synchronize_srcu_expedited(&kvm->srcu); - r = 0; + r = 0; cleanup: kfree(filter); - return r; + return r; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 583b9fa656f3..7eafc6907861 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2143,12 +2143,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!svm->vcpu.arch.user_fpu) { + printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); + err = -ENOMEM; + goto free_partial_svm; + } + svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL_ACCOUNT); if (!svm->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; - goto free_partial_svm; + goto free_user_fpu; } err = kvm_vcpu_init(&svm->vcpu, kvm, id); @@ -2211,6 +2219,8 @@ uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); free_partial_svm: kmem_cache_free(kvm_vcpu_cache, svm); out: @@ -2241,6 +2251,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); + kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -7128,13 +7139,41 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) { - bool is_user, smap; - - is_user = svm_get_cpl(vcpu) == 3; - smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); + unsigned long cr4 = kvm_read_cr4(vcpu); + bool smep = cr4 & X86_CR4_SMEP; + bool smap = cr4 & X86_CR4_SMAP; + bool is_user = svm_get_cpl(vcpu) == 3; /* - * Detect and workaround Errata 1096 Fam_17h_00_0Fh + * Detect and workaround Errata 1096 Fam_17h_00_0Fh. + * + * Errata: + * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is + * possible that CPU microcode implementing DecodeAssist will fail + * to read bytes of instruction which caused #NPF. In this case, + * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly + * return 0 instead of the correct guest instruction bytes. + * + * This happens because CPU microcode reading instruction bytes + * uses a special opcode which attempts to read data using CPL=0 + * priviledges. The microcode reads CS:RIP and if it hits a SMAP + * fault, it gives up and returns no instruction bytes. + * + * Detection: + * We reach here in case CPU supports DecodeAssist, raised #NPF and + * returned 0 in GuestIntrBytes field of the VMCB. + * First, errata can only be triggered in case vCPU CR4.SMAP=1. + * Second, if vCPU CR4.SMEP=1, errata could only be triggered + * in case vCPU CPL==3 (Because otherwise guest would have triggered + * a SMEP fault instead of #NPF). + * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL. + * As most guests enable SMAP if they have also enabled SMEP, use above + * logic in order to attempt minimize false-positive of detecting errata + * while still preserving all cases semantic correctness. + * + * Workaround: + * To determine what instruction the guest was executing, the hypervisor + * will have to decode the instruction at the instruction pointer. * * In non SEV guest, hypervisor will be able to read the guest * memory to decode the instruction pointer when insn_len is zero @@ -7145,11 +7184,11 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) * instruction pointer so we will not able to workaround it. Lets * print the error and request to kill the guest. */ - if (is_user && smap) { + if (smap && (!smep || is_user)) { if (!sev_guest(vcpu->kvm)) return true; - pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); + pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index bb509c254939..ced9fba32598 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -194,6 +194,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) { secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, -1ull); + vmx->nested.need_vmcs12_to_shadow_sync = false; } static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) @@ -219,6 +220,8 @@ static void free_nested(struct kvm_vcpu *vcpu) if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; + kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu); + vmx->nested.vmxon = false; vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); @@ -231,7 +234,9 @@ static void free_nested(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); + vmx->nested.cached_vmcs12 = NULL; kfree(vmx->nested.cached_shadow_vmcs12); + vmx->nested.cached_shadow_vmcs12 = NULL; /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); @@ -1341,6 +1346,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) unsigned long val; int i; + if (WARN_ON(!shadow_vmcs)) + return; + preempt_disable(); vmcs_load(shadow_vmcs); @@ -1373,6 +1381,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) unsigned long val; int i, q; + if (WARN_ON(!shadow_vmcs)) + return; + vmcs_load(shadow_vmcs); for (q = 0; q < ARRAY_SIZE(fields); q++) { @@ -4194,7 +4205,10 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, * mode, e.g. a 32-bit address size can yield a 64-bit virtual * address when using FS/GS with a non-zero base. */ - *ret = s.base + off; + if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS) + *ret = s.base + off; + else + *ret = off; /* Long mode: #GP(0)/#SS(0) if the memory address is in a * non-canonical form. This is the only check on the memory @@ -4433,7 +4447,6 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) /* copy to memory all shadowed fields in case they were modified */ copy_shadow_to_vmcs12(vmx); - vmx->nested.need_vmcs12_to_shadow_sync = false; vmx_disable_shadow_vmcs(vmx); } vmx->nested.posted_intr_nv = -1; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 68d231d49c7a..4dea0e0e7e39 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -337,17 +337,22 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) static void intel_pmu_reset(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc = NULL; int i; for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { - struct kvm_pmc *pmc = &pmu->gp_counters[i]; + pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); pmc->counter = pmc->eventsel = 0; } - for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) - pmc_stop_counter(&pmu->fixed_counters[i]); + for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { + pmc = &pmu->fixed_counters[i]; + + pmc_stop_counter(pmc); + pmc->counter = 0; + } pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = pmu->global_ovf_ctrl = 0; diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index d4cb1945b2e3..4010d519eb8c 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -54,9 +54,9 @@ ENTRY(vmx_vmenter) ret 3: cmpb $0, kvm_rebooting - jne 4f - call kvm_spurious_fault -4: ret + je 4f + ret +4: ud2 .pushsection .fixup, "ax" 5: jmp 3b diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 69536553446d..074385c86c09 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5829,6 +5829,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } if (unlikely(vmx->fail)) { + dump_vmcs(); vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->fail_entry.hardware_entry_failure_reason = vmcs_read32(VM_INSTRUCTION_ERROR); @@ -6597,6 +6598,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); kmem_cache_free(kvm_vcpu_cache, vmx); } @@ -6612,12 +6614,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx) return ERR_PTR(-ENOMEM); + vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vmx->vcpu.arch.user_fpu) { + printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); + err = -ENOMEM; + goto free_partial_vcpu; + } + vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL_ACCOUNT); if (!vmx->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; - goto free_partial_vcpu; + goto free_user_fpu; } vmx->vpid = allocate_vpid(); @@ -6720,6 +6730,8 @@ uninit_vcpu: free_vcpu: free_vpid(vmx->vpid); kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); free_partial_vcpu: kmem_cache_free(kvm_vcpu_cache, vmx); return ERR_PTR(err); @@ -7064,7 +7076,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; - if (kvm_mwait_in_guest(vcpu->kvm)) + if (kvm_mwait_in_guest(vcpu->kvm) || + kvm_can_post_timer_interrupt(vcpu)) return -EOPNOTSUPP; vmx = to_vmx(vcpu); @@ -7453,7 +7466,7 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) { - return 0; + return false; } static __init int hardware_setup(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a0b74ecd1de..c6d951cbd76c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -51,6 +51,7 @@ #include <linux/kvm_irqfd.h> #include <linux/irqbypass.h> #include <linux/sched/stat.h> +#include <linux/sched/isolation.h> #include <linux/mem_encrypt.h> #include <trace/events/kvm.h> @@ -153,6 +154,9 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor); static bool __read_mostly force_emulation_prefix = false; module_param(force_emulation_prefix, bool, S_IRUGO); +int __read_mostly pi_inject_timer = -1; +module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -1456,12 +1460,8 @@ static void update_pvclock_gtod(struct timekeeper *tk) void kvm_set_pending_timer(struct kvm_vcpu *vcpu) { - /* - * Note: KVM_REQ_PENDING_TIMER is implicitly checked in - * vcpu_enter_guest. This function is only called from - * the physical CPU that is running vcpu. - */ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_vcpu_kick(vcpu); } static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) @@ -1540,9 +1540,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz, *pshift = shift; *pmultiplier = div_frac(scaled64, tps32); - - pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n", - __func__, base_hz, scaled_hz, shift, *pmultiplier); } #ifdef CONFIG_X86_64 @@ -1785,12 +1782,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { if (!kvm_check_tsc_unstable()) { offset = kvm->arch.cur_tsc_offset; - pr_debug("kvm: matched tsc offset for %llu\n", data); } else { u64 delta = nsec_to_cycles(vcpu, elapsed); data += delta; offset = kvm_compute_tsc_offset(vcpu, data); - pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } matched = true; already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation); @@ -1809,8 +1804,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm->arch.cur_tsc_write = data; kvm->arch.cur_tsc_offset = offset; matched = false; - pr_debug("kvm: new tsc generation %llu, clock %llu\n", - kvm->arch.cur_tsc_generation, data); } /* @@ -3313,6 +3306,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -6911,7 +6908,6 @@ static void kvm_timer_init(void) cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } - pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", kvmclock_cpu_online, kvmclock_cpu_down_prep); @@ -7070,6 +7066,8 @@ int kvm_arch_init(void *opaque) host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_lapic_init(); + if (pi_inject_timer == -1) + pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER); #ifdef CONFIG_X86_64 pvclock_gtod_register_notifier(&pvclock_gtod_notifier); @@ -7208,7 +7206,7 @@ static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id) rcu_read_unlock(); - if (target) + if (target && READ_ONCE(target->ready)) kvm_vcpu_yield_to(target); } @@ -7248,6 +7246,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) break; case KVM_HC_KICK_CPU: kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); + kvm_sched_yield(vcpu->kvm, a1); ret = 0; break; #ifdef CONFIG_X86_64 @@ -7996,9 +7995,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) trace_kvm_entry(vcpu->vcpu_id); guest_enter_irqoff(); - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); + /* The preempt notifier should have taken care of the FPU already. */ + WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD)); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -8276,7 +8274,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(¤t->thread.fpu); + copy_fpregs_to_fpstate(vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); @@ -8293,7 +8291,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) fpregs_lock(); copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); - copy_kernel_to_fpregs(¤t->thread.fpu.state); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); fpregs_unlock(); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e08a12892e8b..6594020c0691 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -301,6 +301,8 @@ extern unsigned int min_timer_period_us; extern bool enable_vmware_backdoor; +extern int pi_inject_timer; + extern struct static_key kvm_no_apic_vcpu; static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 378a1f70ae7d..4fe1601dbc5d 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -239,7 +239,7 @@ copy_user_handle_tail: ret _ASM_EXTABLE_UA(1b, 2b) -ENDPROC(copy_user_handle_tail) +END(copy_user_handle_tail) /* * copy_user_nocache - Uncached memory copy with exception handling diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 74fdff968ea3..304f958c27b2 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -115,29 +115,29 @@ ENDPROC(__get_user_8) EXPORT_SYMBOL(__get_user_8) +bad_get_user_clac: + ASM_CLAC bad_get_user: xor %edx,%edx mov $(-EFAULT),%_ASM_AX - ASM_CLAC ret -END(bad_get_user) #ifdef CONFIG_X86_32 +bad_get_user_8_clac: + ASM_CLAC bad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX - ASM_CLAC ret -END(bad_get_user_8) #endif - _ASM_EXTABLE_UA(1b, bad_get_user) - _ASM_EXTABLE_UA(2b, bad_get_user) - _ASM_EXTABLE_UA(3b, bad_get_user) + _ASM_EXTABLE_UA(1b, bad_get_user_clac) + _ASM_EXTABLE_UA(2b, bad_get_user_clac) + _ASM_EXTABLE_UA(3b, bad_get_user_clac) #ifdef CONFIG_X86_64 - _ASM_EXTABLE_UA(4b, bad_get_user) + _ASM_EXTABLE_UA(4b, bad_get_user_clac) #else - _ASM_EXTABLE_UA(4b, bad_get_user_8) - _ASM_EXTABLE_UA(5b, bad_get_user_8) + _ASM_EXTABLE_UA(4b, bad_get_user_8_clac) + _ASM_EXTABLE_UA(5b, bad_get_user_8_clac) #endif diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index d2e5c9c39601..14bf78341d3c 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -32,8 +32,6 @@ */ #define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX -#define EXIT ASM_CLAC ; \ - ret .text ENTRY(__put_user_1) @@ -43,7 +41,8 @@ ENTRY(__put_user_1) ASM_STAC 1: movb %al,(%_ASM_CX) xor %eax,%eax - EXIT + ASM_CLAC + ret ENDPROC(__put_user_1) EXPORT_SYMBOL(__put_user_1) @@ -56,7 +55,8 @@ ENTRY(__put_user_2) ASM_STAC 2: movw %ax,(%_ASM_CX) xor %eax,%eax - EXIT + ASM_CLAC + ret ENDPROC(__put_user_2) EXPORT_SYMBOL(__put_user_2) @@ -69,7 +69,8 @@ ENTRY(__put_user_4) ASM_STAC 3: movl %eax,(%_ASM_CX) xor %eax,%eax - EXIT + ASM_CLAC + ret ENDPROC(__put_user_4) EXPORT_SYMBOL(__put_user_4) @@ -85,19 +86,21 @@ ENTRY(__put_user_8) 5: movl %edx,4(%_ASM_CX) #endif xor %eax,%eax - EXIT + ASM_CLAC + RET ENDPROC(__put_user_8) EXPORT_SYMBOL(__put_user_8) +bad_put_user_clac: + ASM_CLAC bad_put_user: movl $-EFAULT,%eax - EXIT -END(bad_put_user) + RET - _ASM_EXTABLE_UA(1b, bad_put_user) - _ASM_EXTABLE_UA(2b, bad_put_user) - _ASM_EXTABLE_UA(3b, bad_put_user) - _ASM_EXTABLE_UA(4b, bad_put_user) + _ASM_EXTABLE_UA(1b, bad_put_user_clac) + _ASM_EXTABLE_UA(2b, bad_put_user_clac) + _ASM_EXTABLE_UA(3b, bad_put_user_clac) + _ASM_EXTABLE_UA(4b, bad_put_user_clac) #ifdef CONFIG_X86_32 - _ASM_EXTABLE_UA(5b, bad_put_user) + _ASM_EXTABLE_UA(5b, bad_put_user_clac) #endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index e0e006f1624e..fff28c6f73a2 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL(clear_user); * but reuse __memcpy_mcsafe in case a new read error is encountered. * clac() is handled in _copy_to_iter_mcsafe(). */ -__visible unsigned long +__visible notrace unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) { for (; len; --len, to++, from++) { diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h index a5a41ec58072..0c122226ca56 100644 --- a/arch/x86/math-emu/fpu_emu.h +++ b/arch/x86/math-emu/fpu_emu.h @@ -177,7 +177,7 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y) #define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ ((y) + EXTENDED_Ebias) & 0x7fff; } #define exponent16(x) (*(short *)&((x)->exp)) -#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); } +#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (u16)(y); } #define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } #define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c index 8dc9095bab22..742619e94bdf 100644 --- a/arch/x86/math-emu/reg_constant.c +++ b/arch/x86/math-emu/reg_constant.c @@ -18,7 +18,7 @@ #include "control_w.h" #define MAKE_REG(s, e, l, h) { l, h, \ - ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } + (u16)((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); #if 0 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d1634c59ed56..6c46095cd0d9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1490,9 +1490,8 @@ good_area: NOKPROBE_SYMBOL(do_user_addr_fault); /* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. + * Explicitly marked noinline such that the function tracer sees this as the + * page_fault entry point. */ static noinline void __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, @@ -1511,33 +1510,26 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, } NOKPROBE_SYMBOL(__do_page_fault); -static nokprobe_inline void -trace_page_fault_entries(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static __always_inline void +trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { + if (!trace_pagefault_enabled()) + return; + if (user_mode(regs)) trace_page_fault_user(address, regs, error_code); else trace_page_fault_kernel(address, regs, error_code); } -/* - * We must have this function blacklisted from kprobes, tagged with notrace - * and call read_cr2() before calling anything else. To avoid calling any - * kind of tracing machinery before we've observed the CR2 value. - * - * exception_{enter,exit}() contains all sorts of tracepoints. - */ -dotraplinkage void notrace -do_page_fault(struct pt_regs *regs, unsigned long error_code) +dotraplinkage void +do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - unsigned long address = read_cr2(); /* Get the faulting address */ enum ctx_state prev_state; prev_state = exception_enter(); - if (trace_pagefault_enabled()) - trace_page_fault_entries(address, regs, error_code); - + trace_page_fault_entries(regs, error_code, address); __do_page_fault(regs, error_code, address); exception_exit(prev_state); } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index e0df96fdfe46..fece30ca8b0c 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -15,6 +15,10 @@ #include <linux/dma-direct.h> #include <linux/swiotlb.h> #include <linux/mem_encrypt.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/dma-mapping.h> #include <asm/tlbflush.h> #include <asm/fixmap.h> @@ -41,7 +45,7 @@ EXPORT_SYMBOL_GPL(sev_enable_key); bool sev_enabled __section(.data); /* Buffer used for early in-place encryption by BSP, no locking needed */ -static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); +static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); /* * This routine does not change the underlying encryption setting of the @@ -348,6 +352,32 @@ bool sev_active(void) } EXPORT_SYMBOL(sev_active); +/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ +bool force_dma_unencrypted(struct device *dev) +{ + /* + * For SEV, all DMA must be to unencrypted addresses. + */ + if (sev_active()) + return true; + + /* + * For SME, all DMA must be to unencrypted addresses if the + * device does not support DMA to addresses that include the + * encryption mask. + */ + if (sme_active()) { + u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask)); + u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask, + dev->bus_dma_mask); + + if (dma_dev_mask <= dma_enc_mask) + return true; + } + + return false; +} + /* Architecture __weak replacement functions */ void __init mem_encrypt_free_decrypted_mem(void) { diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bed6bb93c965..7ceb32821093 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -998,7 +998,8 @@ void __init xen_setup_vcpu_info_placement(void) __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); pv_ops.irq.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); - pv_ops.mmu.read_cr2 = xen_read_cr2_direct; + pv_ops.mmu.read_cr2 = + __PV_IS_CALLEE_SAVE(xen_read_cr2_direct); } } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index f6e5eeecfc69..26e8b326966d 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long cr2) this_cpu_read(xen_vcpu)->arch.cr2 = cr2; } -static unsigned long xen_read_cr2(void) -{ - return this_cpu_read(xen_vcpu)->arch.cr2; -} - -unsigned long xen_read_cr2_direct(void) -{ - return this_cpu_read(xen_vcpu_info.arch.cr2); -} - static noinline void xen_flush_tlb(void) { struct mmuext_op *op; @@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void) } static const struct pv_mmu_ops xen_mmu_ops __initconst = { - .read_cr2 = xen_read_cr2, + .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2), .write_cr2 = xen_write_cr2, .read_cr3 = xen_read_cr3, diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 8019edd0125c..be104eef80be 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -10,6 +10,7 @@ #include <asm/percpu.h> #include <asm/processor-flags.h> #include <asm/frame.h> +#include <asm/asm.h> #include <linux/linkage.h> @@ -135,3 +136,18 @@ ENTRY(check_events) FRAME_END ret ENDPROC(check_events) + +ENTRY(xen_read_cr2) + FRAME_BEGIN + _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX + _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX + FRAME_END + ret + ENDPROC(xen_read_cr2); + +ENTRY(xen_read_cr2_direct) + FRAME_BEGIN + _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX + FRAME_END + ret + ENDPROC(xen_read_cr2_direct); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 2f111f47ba98..45a441c33d6d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(void); __visible unsigned long xen_save_fl_direct(void); __visible void xen_restore_fl_direct(unsigned long); +__visible unsigned long xen_read_cr2(void); +__visible unsigned long xen_read_cr2_direct(void); + /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); __visible void xen_sysret32(void); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 72860325245a..586fcfe227ea 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3354,38 +3354,57 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) * there is no active group, then the primary expectation for * this device is probably a high throughput. * - * We are now left only with explaining the additional - * compound condition that is checked below for deciding - * whether the scenario is asymmetric. To explain this - * compound condition, we need to add that the function + * We are now left only with explaining the two sub-conditions in the + * additional compound condition that is checked below for deciding + * whether the scenario is asymmetric. To explain the first + * sub-condition, we need to add that the function * bfq_asymmetric_scenario checks the weights of only - * non-weight-raised queues, for efficiency reasons (see - * comments on bfq_weights_tree_add()). Then the fact that - * bfqq is weight-raised is checked explicitly here. More - * precisely, the compound condition below takes into account - * also the fact that, even if bfqq is being weight-raised, - * the scenario is still symmetric if all queues with requests - * waiting for completion happen to be - * weight-raised. Actually, we should be even more precise - * here, and differentiate between interactive weight raising - * and soft real-time weight raising. + * non-weight-raised queues, for efficiency reasons (see comments on + * bfq_weights_tree_add()). Then the fact that bfqq is weight-raised + * is checked explicitly here. More precisely, the compound condition + * below takes into account also the fact that, even if bfqq is being + * weight-raised, the scenario is still symmetric if all queues with + * requests waiting for completion happen to be + * weight-raised. Actually, we should be even more precise here, and + * differentiate between interactive weight raising and soft real-time + * weight raising. + * + * The second sub-condition checked in the compound condition is + * whether there is a fair amount of already in-flight I/O not + * belonging to bfqq. If so, I/O dispatching is to be plugged, for the + * following reason. The drive may decide to serve in-flight + * non-bfqq's I/O requests before bfqq's ones, thereby delaying the + * arrival of new I/O requests for bfqq (recall that bfqq is sync). If + * I/O-dispatching is not plugged, then, while bfqq remains empty, a + * basically uncontrolled amount of I/O from other queues may be + * dispatched too, possibly causing the service of bfqq's I/O to be + * delayed even longer in the drive. This problem gets more and more + * serious as the speed and the queue depth of the drive grow, + * because, as these two quantities grow, the probability to find no + * queue busy but many requests in flight grows too. By contrast, + * plugging I/O dispatching minimizes the delay induced by already + * in-flight I/O, and enables bfqq to recover the bandwidth it may + * lose because of this delay. * * As a side note, it is worth considering that the above - * device-idling countermeasures may however fail in the - * following unlucky scenario: if idling is (correctly) - * disabled in a time period during which all symmetry - * sub-conditions hold, and hence the device is allowed to - * enqueue many requests, but at some later point in time some - * sub-condition stops to hold, then it may become impossible - * to let requests be served in the desired order until all - * the requests already queued in the device have been served. + * device-idling countermeasures may however fail in the following + * unlucky scenario: if I/O-dispatch plugging is (correctly) disabled + * in a time period during which all symmetry sub-conditions hold, and + * therefore the device is allowed to enqueue many requests, but at + * some later point in time some sub-condition stops to hold, then it + * may become impossible to make requests be served in the desired + * order until all the requests already queued in the device have been + * served. The last sub-condition commented above somewhat mitigates + * this problem for weight-raised queues. */ static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, struct bfq_queue *bfqq) { return (bfqq->wr_coeff > 1 && - bfqd->wr_busy_queues < - bfq_tot_busy_queues(bfqd)) || + (bfqd->wr_busy_queues < + bfq_tot_busy_queues(bfqd) || + bfqd->rq_in_driver >= + bfqq->dispatched + 4)) || bfq_asymmetric_scenario(bfqd, bfqq); } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 24ed26957367..55a7dc227dfb 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -54,7 +54,7 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ -static bool blkcg_debug_stats = false; +bool blkcg_debug_stats = false; static struct workqueue_struct *blkcg_punt_bio_wq; static bool blkcg_policy_enabled(struct request_queue *q, @@ -944,10 +944,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) dbytes, dios); } - if (!blkcg_debug_stats) - goto next; - - if (atomic_read(&blkg->use_delay)) { + if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) { has_stats = true; off += scnprintf(buf+off, size-off, " use_delay=%d delay_nsec=%llu", @@ -967,7 +964,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) has_stats = true; off += written; } -next: + if (has_stats) { if (off < size - 1) { off += scnprintf(buf+off, size-off, "\n"); diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index d973c38ee4fd..0fff7b56df0e 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -917,6 +917,9 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf, unsigned long long avg_lat; unsigned long long cur_win; + if (!blkcg_debug_stats) + return 0; + if (iolat->ssd) return iolatency_ssd_stat(iolat, buf, size); diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index cf22ab00fefb..126021fc3a11 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -61,15 +61,6 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) e->type->ops.completed_request(rq, now); } -static inline void blk_mq_sched_started_request(struct request *rq) -{ - struct request_queue *q = rq->q; - struct elevator_queue *e = q->elevator; - - if (e && e->type->ops.started_request) - e->type->ops.started_request(rq); -} - static inline void blk_mq_sched_requeue_request(struct request *rq) { struct request_queue *q = rq->q; diff --git a/block/blk-mq.c b/block/blk-mq.c index b038ec680e84..f78d3287dd82 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -669,8 +669,6 @@ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; - blk_mq_sched_started_request(rq); - trace_block_rq_issue(q, rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { @@ -1960,9 +1958,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) rq = blk_mq_get_request(q, bio, &data); if (unlikely(!rq)) { rq_qos_cleanup(q, bio); - if (bio->bi_opf & REQ_NOWAIT) + + cookie = BLK_QC_T_NONE; + if (bio->bi_opf & REQ_NOWAIT_INLINE) + cookie = BLK_QC_T_EAGAIN; + else if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); - return BLK_QC_T_NONE; + return cookie; } trace_block_getrq(q, bio, bio->bi_opf); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 659ccb8b693f..3954c0dc1443 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -202,6 +202,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, return -1; data->got_token = true; + smp_wmb(); list_del_init(&curr->entry); wake_up_process(data->task); return 1; @@ -244,7 +245,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, return; prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); + has_sleeper = !wq_has_single_sleeper(&rqw->wait); do { + /* The memory barrier in set_task_state saves us here. */ if (data.got_token) break; if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { @@ -255,12 +258,14 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, * which means we now have two. Put our local token * and wake anyone else potentially waiting for one. */ + smp_rmb(); if (data.got_token) cleanup_cb(rqw, private_data); break; } io_schedule(); - has_sleeper = false; + has_sleeper = true; + set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); } diff --git a/block/genhd.c b/block/genhd.c index 97887e59f3b2..54f1f0d381f4 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1969,7 +1969,7 @@ static const struct attribute *disk_events_attrs[] = { * The default polling interval can be specified by the kernel * parameter block.events_dfl_poll_msecs which defaults to 0 * (disable). This can also be modified runtime by writing to - * /sys/module/block/events_dfl_poll_msecs. + * /sys/module/block/parameters/events_dfl_poll_msecs. */ static int disk_events_set_dfl_poll_msecs(const char *val, const struct kernel_param *kp) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 72312ad2e142..3a36e76eca83 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -408,7 +408,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, hpriv->mmio = devm_ioremap_resource(dev, platform_get_resource(pdev, IORESOURCE_MEM, 0)); if (IS_ERR(hpriv->mmio)) { - dev_err(dev, "no mmio space\n"); rc = PTR_ERR(hpriv->mmio); goto err_out; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 90ebfcae0ce6..2b3103c30857 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5417,7 +5417,7 @@ static int drbd_do_auth(struct drbd_connection *connection) unsigned int key_len; char secret[SHARED_SECRET_MAX]; /* 64 byte */ unsigned int resp_size; - SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm); + struct shash_desc *desc; struct packet_info pi; struct net_conf *nc; int err, rv; @@ -5430,6 +5430,13 @@ static int drbd_do_auth(struct drbd_connection *connection) memcpy(secret, nc->shared_secret, key_len); rcu_read_unlock(); + desc = kmalloc(sizeof(struct shash_desc) + + crypto_shash_descsize(connection->cram_hmac_tfm), + GFP_KERNEL); + if (!desc) { + rv = -1; + goto fail; + } desc->tfm = connection->cram_hmac_tfm; rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len); @@ -5571,7 +5578,10 @@ static int drbd_do_auth(struct drbd_connection *connection) kfree(peers_ch); kfree(response); kfree(right_response); - shash_desc_zero(desc); + if (desc) { + shash_desc_zero(desc); + kfree(desc); + } return rv; } diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index 23553ed6b548..2d22d6bf52f2 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -248,16 +248,12 @@ static int __maybe_unused cn_proc_show(struct seq_file *m, void *v) return 0; } -static struct cn_dev cdev = { - .input = cn_rx_skb, -}; - static int cn_init(void) { struct cn_dev *dev = &cdev; struct netlink_kernel_cfg cfg = { .groups = CN_NETLINK_USERS + 0xf, - .input = dev->input, + .input = cn_rx_skb, }; dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg); diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 93f39a1d4c3d..c66f566a854c 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -131,10 +131,18 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) int err = -ENODEV; cpu = of_get_cpu_node(policy->cpu, NULL); + if (!cpu) + goto out; + max_freqp = of_get_property(cpu, "clock-frequency", NULL); of_node_put(cpu); - if (!cpu) + if (!max_freqp) { + err = -EINVAL; goto out; + } + + /* we need the freq in kHz */ + max_freq = *max_freqp / 1000; dn = of_find_compatible_node(NULL, NULL, "1682m-sdc"); if (!dn) @@ -171,16 +179,6 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) } pr_debug("init cpufreq on CPU %d\n", policy->cpu); - - max_freqp = of_get_property(cpu, "clock-frequency", NULL); - if (!max_freqp) { - err = -EINVAL; - goto out_unmap_sdcpwr; - } - - /* we need the freq in kHz */ - max_freq = *max_freqp / 1000; - pr_debug("max clock-frequency is at %u kHz\n", max_freq); pr_debug("initializing frequency table\n"); @@ -199,9 +197,6 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) cpufreq_generic_init(policy, pas_freqs, get_gizmo_latency()); return 0; -out_unmap_sdcpwr: - iounmap(sdcpwr_mapbase); - out_unmap_sdcasr: iounmap(sdcasr_mapbase); out: diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 53446e39a32c..ba8d3d0ef32c 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -157,7 +157,7 @@ config DMI_SCAN_MACHINE_NON_EFI_FALLBACK config ISCSI_IBFT_FIND bool "iSCSI Boot Firmware Table Attributes" - depends on X86 && ACPI + depends on X86 && ISCSI_IBFT default n help This option enables the kernel to find the region of memory @@ -168,7 +168,8 @@ config ISCSI_IBFT_FIND config ISCSI_IBFT tristate "iSCSI Boot Firmware Table Attributes module" select ISCSI_BOOT_SYSFS - depends on ISCSI_IBFT_FIND && SCSI && SCSI_LOWLEVEL + select ISCSI_IBFT_FIND if X86 + depends on ACPI && SCSI && SCSI_LOWLEVEL default n help This option enables support for detection and exposing of iSCSI diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index ab3aa3983833..7e12cbdf957c 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -84,6 +84,10 @@ MODULE_DESCRIPTION("sysfs interface to BIOS iBFT information"); MODULE_LICENSE("GPL"); MODULE_VERSION(IBFT_ISCSI_VERSION); +#ifndef CONFIG_ISCSI_IBFT_FIND +struct acpi_table_ibft *ibft_addr; +#endif + struct ibft_hdr { u8 id; u8 version; diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index e7dff5febe16..d42bc0883a32 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -852,7 +852,7 @@ static const u16 NCT6106_REG_TARGET[] = { 0x111, 0x121, 0x131 }; static const u16 NCT6106_REG_WEIGHT_TEMP_SEL[] = { 0x168, 0x178, 0x188 }; static const u16 NCT6106_REG_WEIGHT_TEMP_STEP[] = { 0x169, 0x179, 0x189 }; static const u16 NCT6106_REG_WEIGHT_TEMP_STEP_TOL[] = { 0x16a, 0x17a, 0x18a }; -static const u16 NCT6106_REG_WEIGHT_DUTY_STEP[] = { 0x16b, 0x17b, 0x17c }; +static const u16 NCT6106_REG_WEIGHT_DUTY_STEP[] = { 0x16b, 0x17b, 0x18b }; static const u16 NCT6106_REG_WEIGHT_TEMP_BASE[] = { 0x16c, 0x17c, 0x18c }; static const u16 NCT6106_REG_WEIGHT_DUTY_BASE[] = { 0x16d, 0x17d, 0x18d }; @@ -3764,6 +3764,7 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_TIME[0] = NCT6106_REG_FAN_STOP_TIME; data->REG_FAN_TIME[1] = NCT6106_REG_FAN_STEP_UP_TIME; data->REG_FAN_TIME[2] = NCT6106_REG_FAN_STEP_DOWN_TIME; + data->REG_TOLERANCE_H = NCT6106_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6106_REG_PWM; data->REG_PWM[1] = NCT6106_REG_FAN_START_OUTPUT; data->REG_PWM[2] = NCT6106_REG_FAN_STOP_OUTPUT; diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index a7d2b16dd702..30e18eb60da7 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -408,8 +408,10 @@ static ssize_t occ_show_power_1(struct device *dev, static u64 occ_get_powr_avg(u64 *accum, u32 *samples) { - return div64_u64(get_unaligned_be64(accum) * 1000000ULL, - get_unaligned_be32(samples)); + u64 divisor = get_unaligned_be32(samples); + + return (divisor == 0) ? 0 : + div64_u64(get_unaligned_be64(accum) * 1000000ULL, divisor); } static ssize_t occ_show_power_2(struct device *dev, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index c7a3d75fb308..2e72fc5af157 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -611,6 +611,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; struct ib_conn *ib_conn; + struct ib_device *ib_dev; u32 max_fr_sectors; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); @@ -641,16 +642,19 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } ib_conn = &iser_conn->ib_conn; + ib_dev = ib_conn->device->ib_device; if (ib_conn->pi_support) { - u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap; + u32 sig_caps = ib_dev->attrs.sig_prot_cap; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | SHOST_DIX_GUARD_CRC); } - if (iscsi_host_add(shost, - ib_conn->device->ib_device->dev.parent)) { + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + shost->virt_boundary_mask = ~MASK_4K; + + if (iscsi_host_add(shost, ib_dev->dev.parent)) { mutex_unlock(&iser_conn->state_mutex); goto free_host; } @@ -956,30 +960,6 @@ static umode_t iser_attr_is_visible(int param_type, int param) return 0; } -static int iscsi_iser_slave_alloc(struct scsi_device *sdev) -{ - struct iscsi_session *session; - struct iser_conn *iser_conn; - struct ib_device *ib_dev; - - mutex_lock(&unbind_iser_conn_mutex); - - session = starget_to_session(scsi_target(sdev))->dd_data; - iser_conn = session->leadconn->dd_data; - if (!iser_conn) { - mutex_unlock(&unbind_iser_conn_mutex); - return -ENOTCONN; - } - ib_dev = iser_conn->ib_conn.device->ib_device; - - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) - blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); - - mutex_unlock(&unbind_iser_conn_mutex); - - return 0; -} - static struct scsi_host_template iscsi_iser_sht = { .module = THIS_MODULE, .name = "iSCSI Initiator over iSER", @@ -992,7 +972,6 @@ static struct scsi_host_template iscsi_iser_sht = { .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, - .slave_alloc = iscsi_iser_slave_alloc, .proc_name = "iscsi_iser", .this_id = -1, .track_queue_depth = 1, diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index c7bd96edce80..b5960351bec0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3046,20 +3046,6 @@ static int srp_target_alloc(struct scsi_target *starget) return 0; } -static int srp_slave_alloc(struct scsi_device *sdev) -{ - struct Scsi_Host *shost = sdev->host; - struct srp_target_port *target = host_to_target(shost); - struct srp_device *srp_dev = target->srp_host->srp_dev; - struct ib_device *ibdev = srp_dev->dev; - - if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) - blk_queue_virt_boundary(sdev->request_queue, - ~srp_dev->mr_page_mask); - - return 0; -} - static int srp_slave_configure(struct scsi_device *sdev) { struct Scsi_Host *shost = sdev->host; @@ -3262,7 +3248,6 @@ static struct scsi_host_template srp_template = { .name = "InfiniBand SRP initiator", .proc_name = DRV_NAME, .target_alloc = srp_target_alloc, - .slave_alloc = srp_slave_alloc, .slave_configure = srp_slave_configure, .info = srp_target_info, .queuecommand = srp_queuecommand, @@ -3806,6 +3791,9 @@ static ssize_t srp_create_target(struct device *dev, target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target_host->max_segment_size = ib_dma_max_seg_size(ibdev); + if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; + target = host_to_target(target_host); target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); diff --git a/drivers/input/joystick/iforce/iforce-ff.c b/drivers/input/joystick/iforce/iforce-ff.c index 4cadebd8b9c4..95c0348843e6 100644 --- a/drivers/input/joystick/iforce/iforce-ff.c +++ b/drivers/input/joystick/iforce/iforce-ff.c @@ -6,9 +6,6 @@ * USB/RS232 I-Force joysticks and wheels. */ -/* - */ - #include "iforce.h" /* diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c index 9a5f90da06ec..b2a68bc9f0b4 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -6,9 +6,6 @@ * USB/RS232 I-Force joysticks and wheels. */ -/* - */ - #include <asm/unaligned.h> #include "iforce.h" diff --git a/drivers/input/joystick/iforce/iforce-packets.c b/drivers/input/joystick/iforce/iforce-packets.c index b313e38b2c3a..763642c8cee9 100644 --- a/drivers/input/joystick/iforce/iforce-packets.c +++ b/drivers/input/joystick/iforce/iforce-packets.c @@ -6,9 +6,6 @@ * USB/RS232 I-Force joysticks and wheels. */ -/* - */ - #include <asm/unaligned.h> #include "iforce.h" diff --git a/drivers/input/joystick/iforce/iforce-serio.c b/drivers/input/joystick/iforce/iforce-serio.c index bbe31e0b759f..f95a81b9fac7 100644 --- a/drivers/input/joystick/iforce/iforce-serio.c +++ b/drivers/input/joystick/iforce/iforce-serio.c @@ -6,9 +6,6 @@ * USB/RS232 I-Force joysticks and wheels. */ -/* - */ - #include <linux/serio.h> #include "iforce.h" diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index ade376bfb79f..29abfeeef9a5 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -6,9 +6,6 @@ * USB/RS232 I-Force joysticks and wheels. */ -/* - */ - #include <linux/usb.h> #include "iforce.h" diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h index 9cfa460466aa..6aa761ebbdf7 100644 --- a/drivers/input/joystick/iforce/iforce.h +++ b/drivers/input/joystick/iforce/iforce.h @@ -6,9 +6,6 @@ * USB/RS232 I-Force joysticks and wheels. */ -/* - */ - #include <linux/kernel.h> #include <linux/slab.h> #include <linux/input.h> diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 7c4f19dab34f..8e9c3ea9d5e7 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -71,6 +71,22 @@ config KEYBOARD_AMIGA config ATARI_KBD_CORE bool +config KEYBOARD_APPLESPI + tristate "Apple SPI keyboard and trackpad" + depends on ACPI && EFI + depends on SPI + depends on X86 || COMPILE_TEST + help + Say Y here if you are running Linux on any Apple MacBook8,1 or later, + or any MacBookPro13,* or MacBookPro14,*. + + You will also need to enable appropriate SPI master controllers: + spi_pxa2xx_platform and spi_pxa2xx_pci for MacBook8,1, and + spi_pxa2xx_platform and intel_lpss_pci for the rest. + + To compile this driver as a module, choose M here: the + module will be called applespi. + config KEYBOARD_ATARI tristate "Atari keyboard" depends on ATARI diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index f0291ca39f62..06a0af6efeae 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_KEYBOARD_ADP5520) += adp5520-keys.o obj-$(CONFIG_KEYBOARD_ADP5588) += adp5588-keys.o obj-$(CONFIG_KEYBOARD_ADP5589) += adp5589-keys.o obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o +obj-$(CONFIG_KEYBOARD_APPLESPI) += applespi.o obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o obj-$(CONFIG_KEYBOARD_BCM) += bcm-keypad.o diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c index 4c05c70a8cf3..4f96a4a99e5b 100644 --- a/drivers/input/keyboard/adp5589-keys.c +++ b/drivers/input/keyboard/adp5589-keys.c @@ -505,6 +505,7 @@ static int adp5589_gpio_add(struct adp5589_kpad *kpad) if (!gpio_data) return 0; + kpad->gc.parent = dev; kpad->gc.ngpio = adp5589_build_gpiomap(kpad, pdata); if (kpad->gc.ngpio == 0) { dev_info(dev, "No unused gpios left to export\n"); diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c new file mode 100644 index 000000000000..548737e7aeda --- /dev/null +++ b/drivers/input/keyboard/applespi.c @@ -0,0 +1,1977 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * MacBook (Pro) SPI keyboard and touchpad driver + * + * Copyright (c) 2015-2018 Federico Lorenzi + * Copyright (c) 2017-2018 Ronald Tschalär + */ + +/* + * The keyboard and touchpad controller on the MacBookAir6, MacBookPro12, + * MacBook8 and newer can be driven either by USB or SPI. However the USB + * pins are only connected on the MacBookAir6 and 7 and the MacBookPro12. + * All others need this driver. The interface is selected using ACPI methods: + * + * * UIEN ("USB Interface Enable"): If invoked with argument 1, disables SPI + * and enables USB. If invoked with argument 0, disables USB. + * * UIST ("USB Interface Status"): Returns 1 if USB is enabled, 0 otherwise. + * * SIEN ("SPI Interface Enable"): If invoked with argument 1, disables USB + * and enables SPI. If invoked with argument 0, disables SPI. + * * SIST ("SPI Interface Status"): Returns 1 if SPI is enabled, 0 otherwise. + * * ISOL: Resets the four GPIO pins used for SPI. Intended to be invoked with + * argument 1, then once more with argument 0. + * + * UIEN and UIST are only provided on models where the USB pins are connected. + * + * SPI-based Protocol + * ------------------ + * + * The device and driver exchange messages (struct message); each message is + * encapsulated in one or more packets (struct spi_packet). There are two types + * of exchanges: reads, and writes. A read is signaled by a GPE, upon which one + * message can be read from the device. A write exchange consists of writing a + * command message, immediately reading a short status packet, and then, upon + * receiving a GPE, reading the response message. Write exchanges cannot be + * interleaved, i.e. a new write exchange must not be started till the previous + * write exchange is complete. Whether a received message is part of a read or + * write exchange is indicated in the encapsulating packet's flags field. + * + * A single message may be too large to fit in a single packet (which has a + * fixed, 256-byte size). In that case it will be split over multiple, + * consecutive packets. + */ + +#include <linux/acpi.h> +#include <linux/crc16.h> +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/efi.h> +#include <linux/input.h> +#include <linux/input/mt.h> +#include <linux/leds.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/spi/spi.h> +#include <linux/wait.h> +#include <linux/workqueue.h> + +#include <asm/barrier.h> +#include <asm/unaligned.h> + +#define CREATE_TRACE_POINTS +#include "applespi.h" +#include "applespi_trace.h" + +#define APPLESPI_PACKET_SIZE 256 +#define APPLESPI_STATUS_SIZE 4 + +#define PACKET_TYPE_READ 0x20 +#define PACKET_TYPE_WRITE 0x40 +#define PACKET_DEV_KEYB 0x01 +#define PACKET_DEV_TPAD 0x02 +#define PACKET_DEV_INFO 0xd0 + +#define MAX_ROLLOVER 6 + +#define MAX_FINGERS 11 +#define MAX_FINGER_ORIENTATION 16384 +#define MAX_PKTS_PER_MSG 2 + +#define KBD_BL_LEVEL_MIN 32U +#define KBD_BL_LEVEL_MAX 255U +#define KBD_BL_LEVEL_SCALE 1000000U +#define KBD_BL_LEVEL_ADJ \ + ((KBD_BL_LEVEL_MAX - KBD_BL_LEVEL_MIN) * KBD_BL_LEVEL_SCALE / 255U) + +#define EFI_BL_LEVEL_NAME L"KeyboardBacklightLevel" +#define EFI_BL_LEVEL_GUID EFI_GUID(0xa076d2af, 0x9678, 0x4386, 0x8b, 0x58, 0x1f, 0xc8, 0xef, 0x04, 0x16, 0x19) + +#define APPLE_FLAG_FKEY 0x01 + +#define SPI_RW_CHG_DELAY_US 100 /* from experimentation, in µs */ + +#define SYNAPTICS_VENDOR_ID 0x06cb + +static unsigned int fnmode = 1; +module_param(fnmode, uint, 0644); +MODULE_PARM_DESC(fnmode, "Mode of Fn key on Apple keyboards (0 = disabled, [1] = fkeyslast, 2 = fkeysfirst)"); + +static unsigned int fnremap; +module_param(fnremap, uint, 0644); +MODULE_PARM_DESC(fnremap, "Remap Fn key ([0] = no-remap; 1 = left-ctrl, 2 = left-shift, 3 = left-alt, 4 = left-meta, 6 = right-shift, 7 = right-alt, 8 = right-meta)"); + +static bool iso_layout; +module_param(iso_layout, bool, 0644); +MODULE_PARM_DESC(iso_layout, "Enable/Disable hardcoded ISO-layout of the keyboard. ([0] = disabled, 1 = enabled)"); + +static char touchpad_dimensions[40]; +module_param_string(touchpad_dimensions, touchpad_dimensions, + sizeof(touchpad_dimensions), 0444); +MODULE_PARM_DESC(touchpad_dimensions, "The pixel dimensions of the touchpad, as XxY+W+H ."); + +/** + * struct keyboard_protocol - keyboard message. + * message.type = 0x0110, message.length = 0x000a + * + * @unknown1: unknown + * @modifiers: bit-set of modifier/control keys pressed + * @unknown2: unknown + * @keys_pressed: the (non-modifier) keys currently pressed + * @fn_pressed: whether the fn key is currently pressed + * @crc16: crc over the whole message struct (message header + + * this struct) minus this @crc16 field + */ +struct keyboard_protocol { + u8 unknown1; + u8 modifiers; + u8 unknown2; + u8 keys_pressed[MAX_ROLLOVER]; + u8 fn_pressed; + __le16 crc16; +}; + +/** + * struct tp_finger - single trackpad finger structure, le16-aligned + * + * @origin: zero when switching track finger + * @abs_x: absolute x coodinate + * @abs_y: absolute y coodinate + * @rel_x: relative x coodinate + * @rel_y: relative y coodinate + * @tool_major: tool area, major axis + * @tool_minor: tool area, minor axis + * @orientation: 16384 when point, else 15 bit angle + * @touch_major: touch area, major axis + * @touch_minor: touch area, minor axis + * @unused: zeros + * @pressure: pressure on forcetouch touchpad + * @multi: one finger: varies, more fingers: constant + * @crc16: on last finger: crc over the whole message struct + * (i.e. message header + this struct) minus the last + * @crc16 field; unknown on all other fingers. + */ +struct tp_finger { + __le16 origin; + __le16 abs_x; + __le16 abs_y; + __le16 rel_x; + __le16 rel_y; + __le16 tool_major; + __le16 tool_minor; + __le16 orientation; + __le16 touch_major; + __le16 touch_minor; + __le16 unused[2]; + __le16 pressure; + __le16 multi; + __le16 crc16; +}; + +/** + * struct touchpad_protocol - touchpad message. + * message.type = 0x0210 + * + * @unknown1: unknown + * @clicked: 1 if a button-click was detected, 0 otherwise + * @unknown2: unknown + * @number_of_fingers: the number of fingers being reported in @fingers + * @clicked2: same as @clicked + * @unknown3: unknown + * @fingers: the data for each finger + */ +struct touchpad_protocol { + u8 unknown1[1]; + u8 clicked; + u8 unknown2[28]; + u8 number_of_fingers; + u8 clicked2; + u8 unknown3[16]; + struct tp_finger fingers[0]; +}; + +/** + * struct command_protocol_tp_info - get touchpad info. + * message.type = 0x1020, message.length = 0x0000 + * + * @crc16: crc over the whole message struct (message header + + * this struct) minus this @crc16 field + */ +struct command_protocol_tp_info { + __le16 crc16; +}; + +/** + * struct touchpad_info - touchpad info response. + * message.type = 0x1020, message.length = 0x006e + * + * @unknown1: unknown + * @model_flags: flags (vary by model number, but significance otherwise + * unknown) + * @model_no: the touchpad model number + * @unknown2: unknown + * @crc16: crc over the whole message struct (message header + + * this struct) minus this @crc16 field + */ +struct touchpad_info_protocol { + u8 unknown1[105]; + u8 model_flags; + u8 model_no; + u8 unknown2[3]; + __le16 crc16; +}; + +/** + * struct command_protocol_mt_init - initialize multitouch. + * message.type = 0x0252, message.length = 0x0002 + * + * @cmd: value: 0x0102 + * @crc16: crc over the whole message struct (message header + + * this struct) minus this @crc16 field + */ +struct command_protocol_mt_init { + __le16 cmd; + __le16 crc16; +}; + +/** + * struct command_protocol_capsl - toggle caps-lock led + * message.type = 0x0151, message.length = 0x0002 + * + * @unknown: value: 0x01 (length?) + * @led: 0 off, 2 on + * @crc16: crc over the whole message struct (message header + + * this struct) minus this @crc16 field + */ +struct command_protocol_capsl { + u8 unknown; + u8 led; + __le16 crc16; +}; + +/** + * struct command_protocol_bl - set keyboard backlight brightness + * message.type = 0xB051, message.length = 0x0006 + * + * @const1: value: 0x01B0 + * @level: the brightness level to set + * @const2: value: 0x0001 (backlight off), 0x01F4 (backlight on) + * @crc16: crc over the whole message struct (message header + + * this struct) minus this @crc16 field + */ +struct command_protocol_bl { + __le16 const1; + __le16 level; + __le16 const2; + __le16 crc16; +}; + +/** + * struct message - a complete spi message. + * + * Each message begins with fixed header, followed by a message-type specific + * payload, and ends with a 16-bit crc. Because of the varying lengths of the + * payload, the crc is defined at the end of each payload struct, rather than + * in this struct. + * + * @type: the message type + * @zero: always 0 + * @counter: incremented on each message, rolls over after 255; there is a + * separate counter for each message type. + * @rsp_buf_len:response buffer length (the exact nature of this field is quite + * speculative). On a request/write this is often the same as + * @length, though in some cases it has been seen to be much larger + * (e.g. 0x400); on a response/read this the same as on the + * request; for reads that are not responses it is 0. + * @length: length of the remainder of the data in the whole message + * structure (after re-assembly in case of being split over + * multiple spi-packets), minus the trailing crc. The total size + * of the message struct is therefore @length + 10. + */ +struct message { + __le16 type; + u8 zero; + u8 counter; + __le16 rsp_buf_len; + __le16 length; + union { + struct keyboard_protocol keyboard; + struct touchpad_protocol touchpad; + struct touchpad_info_protocol tp_info; + struct command_protocol_tp_info tp_info_command; + struct command_protocol_mt_init init_mt_command; + struct command_protocol_capsl capsl_command; + struct command_protocol_bl bl_command; + u8 data[0]; + }; +}; + +/* type + zero + counter + rsp_buf_len + length */ +#define MSG_HEADER_SIZE 8 + +/** + * struct spi_packet - a complete spi packet; always 256 bytes. This carries + * the (parts of the) message in the data. But note that this does not + * necessarily contain a complete message, as in some cases (e.g. many + * fingers pressed) the message is split over multiple packets (see the + * @offset, @remaining, and @length fields). In general the data parts in + * spi_packet's are concatenated until @remaining is 0, and the result is an + * message. + * + * @flags: 0x40 = write (to device), 0x20 = read (from device); note that + * the response to a write still has 0x40. + * @device: 1 = keyboard, 2 = touchpad + * @offset: specifies the offset of this packet's data in the complete + * message; i.e. > 0 indicates this is a continuation packet (in + * the second packet for a message split over multiple packets + * this would then be the same as the @length in the first packet) + * @remaining: number of message bytes remaining in subsequents packets (in + * the first packet of a message split over two packets this would + * then be the same as the @length in the second packet) + * @length: length of the valid data in the @data in this packet + * @data: all or part of a message + * @crc16: crc over this whole structure minus this @crc16 field. This + * covers just this packet, even on multi-packet messages (in + * contrast to the crc in the message). + */ +struct spi_packet { + u8 flags; + u8 device; + __le16 offset; + __le16 remaining; + __le16 length; + u8 data[246]; + __le16 crc16; +}; + +struct spi_settings { + u64 spi_cs_delay; /* cs-to-clk delay in us */ + u64 reset_a2r_usec; /* active-to-receive delay? */ + u64 reset_rec_usec; /* ? (cur val: 10) */ +}; + +/* this mimics struct drm_rect */ +struct applespi_tp_info { + int x_min; + int y_min; + int x_max; + int y_max; +}; + +struct applespi_data { + struct spi_device *spi; + struct spi_settings spi_settings; + struct input_dev *keyboard_input_dev; + struct input_dev *touchpad_input_dev; + + u8 *tx_buffer; + u8 *tx_status; + u8 *rx_buffer; + + u8 *msg_buf; + unsigned int saved_msg_len; + + struct applespi_tp_info tp_info; + + u8 last_keys_pressed[MAX_ROLLOVER]; + u8 last_keys_fn_pressed[MAX_ROLLOVER]; + u8 last_fn_pressed; + struct input_mt_pos pos[MAX_FINGERS]; + int slots[MAX_FINGERS]; + int gpe; + acpi_handle sien; + acpi_handle sist; + + struct spi_transfer dl_t; + struct spi_transfer rd_t; + struct spi_message rd_m; + + struct spi_transfer ww_t; + struct spi_transfer wd_t; + struct spi_transfer wr_t; + struct spi_transfer st_t; + struct spi_message wr_m; + + bool want_tp_info_cmd; + bool want_mt_init_cmd; + bool want_cl_led_on; + bool have_cl_led_on; + unsigned int want_bl_level; + unsigned int have_bl_level; + unsigned int cmd_msg_cntr; + /* lock to protect the above parameters and flags below */ + spinlock_t cmd_msg_lock; + bool cmd_msg_queued; + enum applespi_evt_type cmd_evt_type; + + struct led_classdev backlight_info; + + bool suspended; + bool drain; + wait_queue_head_t drain_complete; + bool read_active; + bool write_active; + + struct work_struct work; + struct touchpad_info_protocol rcvd_tp_info; + + struct dentry *debugfs_root; + bool debug_tp_dim; + char tp_dim_val[40]; + int tp_dim_min_x; + int tp_dim_max_x; + int tp_dim_min_y; + int tp_dim_max_y; +}; + +static const unsigned char applespi_scancodes[] = { + 0, 0, 0, 0, + KEY_A, KEY_B, KEY_C, KEY_D, KEY_E, KEY_F, KEY_G, KEY_H, KEY_I, KEY_J, + KEY_K, KEY_L, KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q, KEY_R, KEY_S, KEY_T, + KEY_U, KEY_V, KEY_W, KEY_X, KEY_Y, KEY_Z, + KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, KEY_0, + KEY_ENTER, KEY_ESC, KEY_BACKSPACE, KEY_TAB, KEY_SPACE, KEY_MINUS, + KEY_EQUAL, KEY_LEFTBRACE, KEY_RIGHTBRACE, KEY_BACKSLASH, 0, + KEY_SEMICOLON, KEY_APOSTROPHE, KEY_GRAVE, KEY_COMMA, KEY_DOT, KEY_SLASH, + KEY_CAPSLOCK, + KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7, KEY_F8, KEY_F9, + KEY_F10, KEY_F11, KEY_F12, 0, 0, 0, 0, 0, 0, 0, 0, 0, + KEY_RIGHT, KEY_LEFT, KEY_DOWN, KEY_UP, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, KEY_102ND, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, KEY_RO, 0, KEY_YEN, 0, 0, 0, 0, 0, + 0, KEY_KATAKANAHIRAGANA, KEY_MUHENKAN +}; + +/* + * This must have exactly as many entries as there are bits in + * struct keyboard_protocol.modifiers . + */ +static const unsigned char applespi_controlcodes[] = { + KEY_LEFTCTRL, + KEY_LEFTSHIFT, + KEY_LEFTALT, + KEY_LEFTMETA, + 0, + KEY_RIGHTSHIFT, + KEY_RIGHTALT, + KEY_RIGHTMETA +}; + +struct applespi_key_translation { + u16 from; + u16 to; + u8 flags; +}; + +static const struct applespi_key_translation applespi_fn_codes[] = { + { KEY_BACKSPACE, KEY_DELETE }, + { KEY_ENTER, KEY_INSERT }, + { KEY_F1, KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY }, + { KEY_F2, KEY_BRIGHTNESSUP, APPLE_FLAG_FKEY }, + { KEY_F3, KEY_SCALE, APPLE_FLAG_FKEY }, + { KEY_F4, KEY_DASHBOARD, APPLE_FLAG_FKEY }, + { KEY_F5, KEY_KBDILLUMDOWN, APPLE_FLAG_FKEY }, + { KEY_F6, KEY_KBDILLUMUP, APPLE_FLAG_FKEY }, + { KEY_F7, KEY_PREVIOUSSONG, APPLE_FLAG_FKEY }, + { KEY_F8, KEY_PLAYPAUSE, APPLE_FLAG_FKEY }, + { KEY_F9, KEY_NEXTSONG, APPLE_FLAG_FKEY }, + { KEY_F10, KEY_MUTE, APPLE_FLAG_FKEY }, + { KEY_F11, KEY_VOLUMEDOWN, APPLE_FLAG_FKEY }, + { KEY_F12, KEY_VOLUMEUP, APPLE_FLAG_FKEY }, + { KEY_RIGHT, KEY_END }, + { KEY_LEFT, KEY_HOME }, + { KEY_DOWN, KEY_PAGEDOWN }, + { KEY_UP, KEY_PAGEUP }, + { } +}; + +static const struct applespi_key_translation apple_iso_keyboard[] = { + { KEY_GRAVE, KEY_102ND }, + { KEY_102ND, KEY_GRAVE }, + { } +}; + +struct applespi_tp_model_info { + u16 model; + struct applespi_tp_info tp_info; +}; + +static const struct applespi_tp_model_info applespi_tp_models[] = { + { + .model = 0x04, /* MB8 MB9 MB10 */ + .tp_info = { -5087, -182, 5579, 6089 }, + }, + { + .model = 0x05, /* MBP13,1 MBP13,2 MBP14,1 MBP14,2 */ + .tp_info = { -6243, -170, 6749, 7685 }, + }, + { + .model = 0x06, /* MBP13,3 MBP14,3 */ + .tp_info = { -7456, -163, 7976, 9283 }, + }, + {} +}; + +typedef void (*applespi_trace_fun)(enum applespi_evt_type, + enum applespi_pkt_type, u8 *, size_t); + +static applespi_trace_fun applespi_get_trace_fun(enum applespi_evt_type type) +{ + switch (type) { + case ET_CMD_TP_INI: + return trace_applespi_tp_ini_cmd; + case ET_CMD_BL: + return trace_applespi_backlight_cmd; + case ET_CMD_CL: + return trace_applespi_caps_lock_cmd; + case ET_RD_KEYB: + return trace_applespi_keyboard_data; + case ET_RD_TPAD: + return trace_applespi_touchpad_data; + case ET_RD_UNKN: + return trace_applespi_unknown_data; + default: + WARN_ONCE(1, "Unknown msg type %d", type); + return trace_applespi_unknown_data; + } +} + +static void applespi_setup_read_txfrs(struct applespi_data *applespi) +{ + struct spi_message *msg = &applespi->rd_m; + struct spi_transfer *dl_t = &applespi->dl_t; + struct spi_transfer *rd_t = &applespi->rd_t; + + memset(dl_t, 0, sizeof(*dl_t)); + memset(rd_t, 0, sizeof(*rd_t)); + + dl_t->delay_usecs = applespi->spi_settings.spi_cs_delay; + + rd_t->rx_buf = applespi->rx_buffer; + rd_t->len = APPLESPI_PACKET_SIZE; + + spi_message_init(msg); + spi_message_add_tail(dl_t, msg); + spi_message_add_tail(rd_t, msg); +} + +static void applespi_setup_write_txfrs(struct applespi_data *applespi) +{ + struct spi_message *msg = &applespi->wr_m; + struct spi_transfer *wt_t = &applespi->ww_t; + struct spi_transfer *dl_t = &applespi->wd_t; + struct spi_transfer *wr_t = &applespi->wr_t; + struct spi_transfer *st_t = &applespi->st_t; + + memset(wt_t, 0, sizeof(*wt_t)); + memset(dl_t, 0, sizeof(*dl_t)); + memset(wr_t, 0, sizeof(*wr_t)); + memset(st_t, 0, sizeof(*st_t)); + + /* + * All we need here is a delay at the beginning of the message before + * asserting cs. But the current spi API doesn't support this, so we + * end up with an extra unnecessary (but harmless) cs assertion and + * deassertion. + */ + wt_t->delay_usecs = SPI_RW_CHG_DELAY_US; + wt_t->cs_change = 1; + + dl_t->delay_usecs = applespi->spi_settings.spi_cs_delay; + + wr_t->tx_buf = applespi->tx_buffer; + wr_t->len = APPLESPI_PACKET_SIZE; + wr_t->delay_usecs = SPI_RW_CHG_DELAY_US; + + st_t->rx_buf = applespi->tx_status; + st_t->len = APPLESPI_STATUS_SIZE; + + spi_message_init(msg); + spi_message_add_tail(wt_t, msg); + spi_message_add_tail(dl_t, msg); + spi_message_add_tail(wr_t, msg); + spi_message_add_tail(st_t, msg); +} + +static int applespi_async(struct applespi_data *applespi, + struct spi_message *message, void (*complete)(void *)) +{ + message->complete = complete; + message->context = applespi; + + return spi_async(applespi->spi, message); +} + +static inline bool applespi_check_write_status(struct applespi_data *applespi, + int sts) +{ + static u8 status_ok[] = { 0xac, 0x27, 0x68, 0xd5 }; + + if (sts < 0) { + dev_warn(&applespi->spi->dev, "Error writing to device: %d\n", + sts); + return false; + } + + if (memcmp(applespi->tx_status, status_ok, APPLESPI_STATUS_SIZE)) { + dev_warn(&applespi->spi->dev, "Error writing to device: %*ph\n", + APPLESPI_STATUS_SIZE, applespi->tx_status); + return false; + } + + return true; +} + +static int applespi_get_spi_settings(struct applespi_data *applespi) +{ + struct acpi_device *adev = ACPI_COMPANION(&applespi->spi->dev); + const union acpi_object *o; + struct spi_settings *settings = &applespi->spi_settings; + + if (!acpi_dev_get_property(adev, "spiCSDelay", ACPI_TYPE_BUFFER, &o)) + settings->spi_cs_delay = *(u64 *)o->buffer.pointer; + else + dev_warn(&applespi->spi->dev, + "Property spiCSDelay not found\n"); + + if (!acpi_dev_get_property(adev, "resetA2RUsec", ACPI_TYPE_BUFFER, &o)) + settings->reset_a2r_usec = *(u64 *)o->buffer.pointer; + else + dev_warn(&applespi->spi->dev, + "Property resetA2RUsec not found\n"); + + if (!acpi_dev_get_property(adev, "resetRecUsec", ACPI_TYPE_BUFFER, &o)) + settings->reset_rec_usec = *(u64 *)o->buffer.pointer; + else + dev_warn(&applespi->spi->dev, + "Property resetRecUsec not found\n"); + + dev_dbg(&applespi->spi->dev, + "SPI settings: spi_cs_delay=%llu reset_a2r_usec=%llu reset_rec_usec=%llu\n", + settings->spi_cs_delay, settings->reset_a2r_usec, + settings->reset_rec_usec); + + return 0; +} + +static int applespi_setup_spi(struct applespi_data *applespi) +{ + int sts; + + sts = applespi_get_spi_settings(applespi); + if (sts) + return sts; + + spin_lock_init(&applespi->cmd_msg_lock); + init_waitqueue_head(&applespi->drain_complete); + + return 0; +} + +static int applespi_enable_spi(struct applespi_data *applespi) +{ + acpi_status acpi_sts; + unsigned long long spi_status; + + /* check if SPI is already enabled, so we can skip the delay below */ + acpi_sts = acpi_evaluate_integer(applespi->sist, NULL, NULL, + &spi_status); + if (ACPI_SUCCESS(acpi_sts) && spi_status) + return 0; + + /* SIEN(1) will enable SPI communication */ + acpi_sts = acpi_execute_simple_method(applespi->sien, NULL, 1); + if (ACPI_FAILURE(acpi_sts)) { + dev_err(&applespi->spi->dev, "SIEN failed: %s\n", + acpi_format_exception(acpi_sts)); + return -ENODEV; + } + + /* + * Allow the SPI interface to come up before returning. Without this + * delay, the SPI commands to enable multitouch mode may not reach + * the trackpad controller, causing pointer movement to break upon + * resume from sleep. + */ + msleep(50); + + return 0; +} + +static int applespi_send_cmd_msg(struct applespi_data *applespi); + +static void applespi_msg_complete(struct applespi_data *applespi, + bool is_write_msg, bool is_read_compl) +{ + unsigned long flags; + + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + if (is_read_compl) + applespi->read_active = false; + if (is_write_msg) + applespi->write_active = false; + + if (applespi->drain && !applespi->write_active) + wake_up_all(&applespi->drain_complete); + + if (is_write_msg) { + applespi->cmd_msg_queued = false; + applespi_send_cmd_msg(applespi); + } + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); +} + +static void applespi_async_write_complete(void *context) +{ + struct applespi_data *applespi = context; + enum applespi_evt_type evt_type = applespi->cmd_evt_type; + + applespi_get_trace_fun(evt_type)(evt_type, PT_WRITE, + applespi->tx_buffer, + APPLESPI_PACKET_SIZE); + applespi_get_trace_fun(evt_type)(evt_type, PT_STATUS, + applespi->tx_status, + APPLESPI_STATUS_SIZE); + + if (!applespi_check_write_status(applespi, applespi->wr_m.status)) { + /* + * If we got an error, we presumably won't get the expected + * response message either. + */ + applespi_msg_complete(applespi, true, false); + } +} + +static int applespi_send_cmd_msg(struct applespi_data *applespi) +{ + u16 crc; + int sts; + struct spi_packet *packet = (struct spi_packet *)applespi->tx_buffer; + struct message *message = (struct message *)packet->data; + u16 msg_len; + u8 device; + + /* check if draining */ + if (applespi->drain) + return 0; + + /* check whether send is in progress */ + if (applespi->cmd_msg_queued) + return 0; + + /* set up packet */ + memset(packet, 0, APPLESPI_PACKET_SIZE); + + /* are we processing init commands? */ + if (applespi->want_tp_info_cmd) { + applespi->want_tp_info_cmd = false; + applespi->want_mt_init_cmd = true; + applespi->cmd_evt_type = ET_CMD_TP_INI; + + /* build init command */ + device = PACKET_DEV_INFO; + + message->type = cpu_to_le16(0x1020); + msg_len = sizeof(message->tp_info_command); + + message->zero = 0x02; + message->rsp_buf_len = cpu_to_le16(0x0200); + + } else if (applespi->want_mt_init_cmd) { + applespi->want_mt_init_cmd = false; + applespi->cmd_evt_type = ET_CMD_TP_INI; + + /* build init command */ + device = PACKET_DEV_TPAD; + + message->type = cpu_to_le16(0x0252); + msg_len = sizeof(message->init_mt_command); + + message->init_mt_command.cmd = cpu_to_le16(0x0102); + + /* do we need caps-lock command? */ + } else if (applespi->want_cl_led_on != applespi->have_cl_led_on) { + applespi->have_cl_led_on = applespi->want_cl_led_on; + applespi->cmd_evt_type = ET_CMD_CL; + + /* build led command */ + device = PACKET_DEV_KEYB; + + message->type = cpu_to_le16(0x0151); + msg_len = sizeof(message->capsl_command); + + message->capsl_command.unknown = 0x01; + message->capsl_command.led = applespi->have_cl_led_on ? 2 : 0; + + /* do we need backlight command? */ + } else if (applespi->want_bl_level != applespi->have_bl_level) { + applespi->have_bl_level = applespi->want_bl_level; + applespi->cmd_evt_type = ET_CMD_BL; + + /* build command buffer */ + device = PACKET_DEV_KEYB; + + message->type = cpu_to_le16(0xB051); + msg_len = sizeof(message->bl_command); + + message->bl_command.const1 = cpu_to_le16(0x01B0); + message->bl_command.level = + cpu_to_le16(applespi->have_bl_level); + + if (applespi->have_bl_level > 0) + message->bl_command.const2 = cpu_to_le16(0x01F4); + else + message->bl_command.const2 = cpu_to_le16(0x0001); + + /* everything's up-to-date */ + } else { + return 0; + } + + /* finalize packet */ + packet->flags = PACKET_TYPE_WRITE; + packet->device = device; + packet->length = cpu_to_le16(MSG_HEADER_SIZE + msg_len); + + message->counter = applespi->cmd_msg_cntr++ % (U8_MAX + 1); + + message->length = cpu_to_le16(msg_len - 2); + if (!message->rsp_buf_len) + message->rsp_buf_len = message->length; + + crc = crc16(0, (u8 *)message, le16_to_cpu(packet->length) - 2); + put_unaligned_le16(crc, &message->data[msg_len - 2]); + + crc = crc16(0, (u8 *)packet, sizeof(*packet) - 2); + packet->crc16 = cpu_to_le16(crc); + + /* send command */ + sts = applespi_async(applespi, &applespi->wr_m, + applespi_async_write_complete); + if (sts) { + dev_warn(&applespi->spi->dev, + "Error queueing async write to device: %d\n", sts); + return sts; + } + + applespi->cmd_msg_queued = true; + applespi->write_active = true; + + return 0; +} + +static void applespi_init(struct applespi_data *applespi, bool is_resume) +{ + unsigned long flags; + + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + if (is_resume) + applespi->want_mt_init_cmd = true; + else + applespi->want_tp_info_cmd = true; + applespi_send_cmd_msg(applespi); + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); +} + +static int applespi_set_capsl_led(struct applespi_data *applespi, + bool capslock_on) +{ + unsigned long flags; + int sts; + + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + applespi->want_cl_led_on = capslock_on; + sts = applespi_send_cmd_msg(applespi); + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); + + return sts; +} + +static void applespi_set_bl_level(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct applespi_data *applespi = + container_of(led_cdev, struct applespi_data, backlight_info); + unsigned long flags; + + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + if (value == 0) { + applespi->want_bl_level = value; + } else { + /* + * The backlight does not turn on till level 32, so we scale + * the range here so that from a user's perspective it turns + * on at 1. + */ + applespi->want_bl_level = + ((value * KBD_BL_LEVEL_ADJ) / KBD_BL_LEVEL_SCALE + + KBD_BL_LEVEL_MIN); + } + + applespi_send_cmd_msg(applespi); + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); +} + +static int applespi_event(struct input_dev *dev, unsigned int type, + unsigned int code, int value) +{ + struct applespi_data *applespi = input_get_drvdata(dev); + + switch (type) { + case EV_LED: + applespi_set_capsl_led(applespi, !!test_bit(LED_CAPSL, dev->led)); + return 0; + } + + return -EINVAL; +} + +/* lifted from the BCM5974 driver and renamed from raw2int */ +/* convert 16-bit little endian to signed integer */ +static inline int le16_to_int(__le16 x) +{ + return (signed short)le16_to_cpu(x); +} + +static void applespi_debug_update_dimensions(struct applespi_data *applespi, + const struct tp_finger *f) +{ + applespi->tp_dim_min_x = min_t(int, applespi->tp_dim_min_x, f->abs_x); + applespi->tp_dim_max_x = max_t(int, applespi->tp_dim_max_x, f->abs_x); + applespi->tp_dim_min_y = min_t(int, applespi->tp_dim_min_y, f->abs_y); + applespi->tp_dim_max_y = max_t(int, applespi->tp_dim_max_y, f->abs_y); +} + +static int applespi_tp_dim_open(struct inode *inode, struct file *file) +{ + struct applespi_data *applespi = inode->i_private; + + file->private_data = applespi; + + snprintf(applespi->tp_dim_val, sizeof(applespi->tp_dim_val), + "0x%.4x %dx%d+%u+%u\n", + applespi->touchpad_input_dev->id.product, + applespi->tp_dim_min_x, applespi->tp_dim_min_y, + applespi->tp_dim_max_x - applespi->tp_dim_min_x, + applespi->tp_dim_max_y - applespi->tp_dim_min_y); + + return nonseekable_open(inode, file); +} + +static ssize_t applespi_tp_dim_read(struct file *file, char __user *buf, + size_t len, loff_t *off) +{ + struct applespi_data *applespi = file->private_data; + + return simple_read_from_buffer(buf, len, off, applespi->tp_dim_val, + strlen(applespi->tp_dim_val)); +} + +static const struct file_operations applespi_tp_dim_fops = { + .owner = THIS_MODULE, + .open = applespi_tp_dim_open, + .read = applespi_tp_dim_read, + .llseek = no_llseek, +}; + +static void report_finger_data(struct input_dev *input, int slot, + const struct input_mt_pos *pos, + const struct tp_finger *f) +{ + input_mt_slot(input, slot); + input_mt_report_slot_state(input, MT_TOOL_FINGER, true); + + input_report_abs(input, ABS_MT_TOUCH_MAJOR, + le16_to_int(f->touch_major) << 1); + input_report_abs(input, ABS_MT_TOUCH_MINOR, + le16_to_int(f->touch_minor) << 1); + input_report_abs(input, ABS_MT_WIDTH_MAJOR, + le16_to_int(f->tool_major) << 1); + input_report_abs(input, ABS_MT_WIDTH_MINOR, + le16_to_int(f->tool_minor) << 1); + input_report_abs(input, ABS_MT_ORIENTATION, + MAX_FINGER_ORIENTATION - le16_to_int(f->orientation)); + input_report_abs(input, ABS_MT_POSITION_X, pos->x); + input_report_abs(input, ABS_MT_POSITION_Y, pos->y); +} + +static void report_tp_state(struct applespi_data *applespi, + struct touchpad_protocol *t) +{ + const struct tp_finger *f; + struct input_dev *input; + const struct applespi_tp_info *tp_info = &applespi->tp_info; + int i, n; + + /* touchpad_input_dev is set async in worker */ + input = smp_load_acquire(&applespi->touchpad_input_dev); + if (!input) + return; /* touchpad isn't initialized yet */ + + n = 0; + + for (i = 0; i < t->number_of_fingers; i++) { + f = &t->fingers[i]; + if (le16_to_int(f->touch_major) == 0) + continue; + applespi->pos[n].x = le16_to_int(f->abs_x); + applespi->pos[n].y = tp_info->y_min + tp_info->y_max - + le16_to_int(f->abs_y); + n++; + + if (applespi->debug_tp_dim) + applespi_debug_update_dimensions(applespi, f); + } + + input_mt_assign_slots(input, applespi->slots, applespi->pos, n, 0); + + for (i = 0; i < n; i++) + report_finger_data(input, applespi->slots[i], + &applespi->pos[i], &t->fingers[i]); + + input_mt_sync_frame(input); + input_report_key(input, BTN_LEFT, t->clicked); + + input_sync(input); +} + +static const struct applespi_key_translation * +applespi_find_translation(const struct applespi_key_translation *table, u16 key) +{ + const struct applespi_key_translation *trans; + + for (trans = table; trans->from; trans++) + if (trans->from == key) + return trans; + + return NULL; +} + +static unsigned int applespi_translate_fn_key(unsigned int key, int fn_pressed) +{ + const struct applespi_key_translation *trans; + int do_translate; + + trans = applespi_find_translation(applespi_fn_codes, key); + if (trans) { + if (trans->flags & APPLE_FLAG_FKEY) + do_translate = (fnmode == 2 && fn_pressed) || + (fnmode == 1 && !fn_pressed); + else + do_translate = fn_pressed; + + if (do_translate) + key = trans->to; + } + + return key; +} + +static unsigned int applespi_translate_iso_layout(unsigned int key) +{ + const struct applespi_key_translation *trans; + + trans = applespi_find_translation(apple_iso_keyboard, key); + if (trans) + key = trans->to; + + return key; +} + +static unsigned int applespi_code_to_key(u8 code, int fn_pressed) +{ + unsigned int key = applespi_scancodes[code]; + + if (fnmode) + key = applespi_translate_fn_key(key, fn_pressed); + if (iso_layout) + key = applespi_translate_iso_layout(key); + return key; +} + +static void +applespi_remap_fn_key(struct keyboard_protocol *keyboard_protocol) +{ + unsigned char tmp; + u8 bit = BIT((fnremap - 1) & 0x07); + + if (!fnremap || fnremap > ARRAY_SIZE(applespi_controlcodes) || + !applespi_controlcodes[fnremap - 1]) + return; + + tmp = keyboard_protocol->fn_pressed; + keyboard_protocol->fn_pressed = !!(keyboard_protocol->modifiers & bit); + if (tmp) + keyboard_protocol->modifiers |= bit; + else + keyboard_protocol->modifiers &= ~bit; +} + +static void +applespi_handle_keyboard_event(struct applespi_data *applespi, + struct keyboard_protocol *keyboard_protocol) +{ + unsigned int key; + int i; + + compiletime_assert(ARRAY_SIZE(applespi_controlcodes) == + sizeof_field(struct keyboard_protocol, modifiers) * 8, + "applespi_controlcodes has wrong number of entries"); + + /* check for rollover overflow, which is signalled by all keys == 1 */ + if (!memchr_inv(keyboard_protocol->keys_pressed, 1, MAX_ROLLOVER)) + return; + + /* remap fn key if desired */ + applespi_remap_fn_key(keyboard_protocol); + + /* check released keys */ + for (i = 0; i < MAX_ROLLOVER; i++) { + if (memchr(keyboard_protocol->keys_pressed, + applespi->last_keys_pressed[i], MAX_ROLLOVER)) + continue; /* key is still pressed */ + + key = applespi_code_to_key(applespi->last_keys_pressed[i], + applespi->last_keys_fn_pressed[i]); + input_report_key(applespi->keyboard_input_dev, key, 0); + applespi->last_keys_fn_pressed[i] = 0; + } + + /* check pressed keys */ + for (i = 0; i < MAX_ROLLOVER; i++) { + if (keyboard_protocol->keys_pressed[i] < + ARRAY_SIZE(applespi_scancodes) && + keyboard_protocol->keys_pressed[i] > 0) { + key = applespi_code_to_key( + keyboard_protocol->keys_pressed[i], + keyboard_protocol->fn_pressed); + input_report_key(applespi->keyboard_input_dev, key, 1); + applespi->last_keys_fn_pressed[i] = + keyboard_protocol->fn_pressed; + } + } + + /* check control keys */ + for (i = 0; i < ARRAY_SIZE(applespi_controlcodes); i++) { + if (keyboard_protocol->modifiers & BIT(i)) + input_report_key(applespi->keyboard_input_dev, + applespi_controlcodes[i], 1); + else + input_report_key(applespi->keyboard_input_dev, + applespi_controlcodes[i], 0); + } + + /* check function key */ + if (keyboard_protocol->fn_pressed && !applespi->last_fn_pressed) + input_report_key(applespi->keyboard_input_dev, KEY_FN, 1); + else if (!keyboard_protocol->fn_pressed && applespi->last_fn_pressed) + input_report_key(applespi->keyboard_input_dev, KEY_FN, 0); + applespi->last_fn_pressed = keyboard_protocol->fn_pressed; + + /* done */ + input_sync(applespi->keyboard_input_dev); + memcpy(&applespi->last_keys_pressed, keyboard_protocol->keys_pressed, + sizeof(applespi->last_keys_pressed)); +} + +static const struct applespi_tp_info *applespi_find_touchpad_info(u8 model) +{ + const struct applespi_tp_model_info *info; + + for (info = applespi_tp_models; info->model; info++) { + if (info->model == model) + return &info->tp_info; + } + + return NULL; +} + +static int +applespi_register_touchpad_device(struct applespi_data *applespi, + struct touchpad_info_protocol *rcvd_tp_info) +{ + const struct applespi_tp_info *tp_info; + struct input_dev *touchpad_input_dev; + int sts; + + /* set up touchpad dimensions */ + tp_info = applespi_find_touchpad_info(rcvd_tp_info->model_no); + if (!tp_info) { + dev_warn(&applespi->spi->dev, + "Unknown touchpad model %x - falling back to MB8 touchpad\n", + rcvd_tp_info->model_no); + tp_info = &applespi_tp_models[0].tp_info; + } + + applespi->tp_info = *tp_info; + + if (touchpad_dimensions[0]) { + int x, y, w, h; + + sts = sscanf(touchpad_dimensions, "%dx%d+%u+%u", &x, &y, &w, &h); + if (sts == 4) { + dev_info(&applespi->spi->dev, + "Overriding touchpad dimensions from module param\n"); + applespi->tp_info.x_min = x; + applespi->tp_info.y_min = y; + applespi->tp_info.x_max = x + w; + applespi->tp_info.y_max = y + h; + } else { + dev_warn(&applespi->spi->dev, + "Invalid touchpad dimensions '%s': must be in the form XxY+W+H\n", + touchpad_dimensions); + touchpad_dimensions[0] = '\0'; + } + } + if (!touchpad_dimensions[0]) { + snprintf(touchpad_dimensions, sizeof(touchpad_dimensions), + "%dx%d+%u+%u", + applespi->tp_info.x_min, + applespi->tp_info.y_min, + applespi->tp_info.x_max - applespi->tp_info.x_min, + applespi->tp_info.y_max - applespi->tp_info.y_min); + } + + /* create touchpad input device */ + touchpad_input_dev = devm_input_allocate_device(&applespi->spi->dev); + if (!touchpad_input_dev) { + dev_err(&applespi->spi->dev, + "Failed to allocate touchpad input device\n"); + return -ENOMEM; + } + + touchpad_input_dev->name = "Apple SPI Touchpad"; + touchpad_input_dev->phys = "applespi/input1"; + touchpad_input_dev->dev.parent = &applespi->spi->dev; + touchpad_input_dev->id.bustype = BUS_SPI; + touchpad_input_dev->id.vendor = SYNAPTICS_VENDOR_ID; + touchpad_input_dev->id.product = + rcvd_tp_info->model_no << 8 | rcvd_tp_info->model_flags; + + /* basic properties */ + input_set_capability(touchpad_input_dev, EV_REL, REL_X); + input_set_capability(touchpad_input_dev, EV_REL, REL_Y); + + __set_bit(INPUT_PROP_POINTER, touchpad_input_dev->propbit); + __set_bit(INPUT_PROP_BUTTONPAD, touchpad_input_dev->propbit); + + /* finger touch area */ + input_set_abs_params(touchpad_input_dev, ABS_MT_TOUCH_MAJOR, + 0, 5000, 0, 0); + input_set_abs_params(touchpad_input_dev, ABS_MT_TOUCH_MINOR, + 0, 5000, 0, 0); + + /* finger approach area */ + input_set_abs_params(touchpad_input_dev, ABS_MT_WIDTH_MAJOR, + 0, 5000, 0, 0); + input_set_abs_params(touchpad_input_dev, ABS_MT_WIDTH_MINOR, + 0, 5000, 0, 0); + + /* finger orientation */ + input_set_abs_params(touchpad_input_dev, ABS_MT_ORIENTATION, + -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION, + 0, 0); + + /* finger position */ + input_set_abs_params(touchpad_input_dev, ABS_MT_POSITION_X, + applespi->tp_info.x_min, applespi->tp_info.x_max, + 0, 0); + input_set_abs_params(touchpad_input_dev, ABS_MT_POSITION_Y, + applespi->tp_info.y_min, applespi->tp_info.y_max, + 0, 0); + + /* touchpad button */ + input_set_capability(touchpad_input_dev, EV_KEY, BTN_LEFT); + + /* multitouch */ + sts = input_mt_init_slots(touchpad_input_dev, MAX_FINGERS, + INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED | + INPUT_MT_TRACK); + if (sts) { + dev_err(&applespi->spi->dev, + "failed to initialize slots: %d", sts); + return sts; + } + + /* register input device */ + sts = input_register_device(touchpad_input_dev); + if (sts) { + dev_err(&applespi->spi->dev, + "Unable to register touchpad input device (%d)\n", sts); + return sts; + } + + /* touchpad_input_dev is read async in spi callback */ + smp_store_release(&applespi->touchpad_input_dev, touchpad_input_dev); + + return 0; +} + +static void applespi_worker(struct work_struct *work) +{ + struct applespi_data *applespi = + container_of(work, struct applespi_data, work); + + applespi_register_touchpad_device(applespi, &applespi->rcvd_tp_info); +} + +static void applespi_handle_cmd_response(struct applespi_data *applespi, + struct spi_packet *packet, + struct message *message) +{ + if (packet->device == PACKET_DEV_INFO && + le16_to_cpu(message->type) == 0x1020) { + /* + * We're not allowed to sleep here, but registering an input + * device can sleep. + */ + applespi->rcvd_tp_info = message->tp_info; + schedule_work(&applespi->work); + return; + } + + if (le16_to_cpu(message->length) != 0x0000) { + dev_warn_ratelimited(&applespi->spi->dev, + "Received unexpected write response: length=%x\n", + le16_to_cpu(message->length)); + return; + } + + if (packet->device == PACKET_DEV_TPAD && + le16_to_cpu(message->type) == 0x0252 && + le16_to_cpu(message->rsp_buf_len) == 0x0002) + dev_info(&applespi->spi->dev, "modeswitch done.\n"); +} + +static bool applespi_verify_crc(struct applespi_data *applespi, u8 *buffer, + size_t buflen) +{ + u16 crc; + + crc = crc16(0, buffer, buflen); + if (crc) { + dev_warn_ratelimited(&applespi->spi->dev, + "Received corrupted packet (crc mismatch)\n"); + trace_applespi_bad_crc(ET_RD_CRC, READ, buffer, buflen); + + return false; + } + + return true; +} + +static void applespi_debug_print_read_packet(struct applespi_data *applespi, + struct spi_packet *packet) +{ + unsigned int evt_type; + + if (packet->flags == PACKET_TYPE_READ && + packet->device == PACKET_DEV_KEYB) + evt_type = ET_RD_KEYB; + else if (packet->flags == PACKET_TYPE_READ && + packet->device == PACKET_DEV_TPAD) + evt_type = ET_RD_TPAD; + else if (packet->flags == PACKET_TYPE_WRITE) + evt_type = applespi->cmd_evt_type; + else + evt_type = ET_RD_UNKN; + + applespi_get_trace_fun(evt_type)(evt_type, PT_READ, applespi->rx_buffer, + APPLESPI_PACKET_SIZE); +} + +static void applespi_got_data(struct applespi_data *applespi) +{ + struct spi_packet *packet; + struct message *message; + unsigned int msg_len; + unsigned int off; + unsigned int rem; + unsigned int len; + + /* process packet header */ + if (!applespi_verify_crc(applespi, applespi->rx_buffer, + APPLESPI_PACKET_SIZE)) { + unsigned long flags; + + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + if (applespi->drain) { + applespi->read_active = false; + applespi->write_active = false; + + wake_up_all(&applespi->drain_complete); + } + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); + + return; + } + + packet = (struct spi_packet *)applespi->rx_buffer; + + applespi_debug_print_read_packet(applespi, packet); + + off = le16_to_cpu(packet->offset); + rem = le16_to_cpu(packet->remaining); + len = le16_to_cpu(packet->length); + + if (len > sizeof(packet->data)) { + dev_warn_ratelimited(&applespi->spi->dev, + "Received corrupted packet (invalid packet length %u)\n", + len); + goto msg_complete; + } + + /* handle multi-packet messages */ + if (rem > 0 || off > 0) { + if (off != applespi->saved_msg_len) { + dev_warn_ratelimited(&applespi->spi->dev, + "Received unexpected offset (got %u, expected %u)\n", + off, applespi->saved_msg_len); + goto msg_complete; + } + + if (off + rem > MAX_PKTS_PER_MSG * APPLESPI_PACKET_SIZE) { + dev_warn_ratelimited(&applespi->spi->dev, + "Received message too large (size %u)\n", + off + rem); + goto msg_complete; + } + + if (off + len > MAX_PKTS_PER_MSG * APPLESPI_PACKET_SIZE) { + dev_warn_ratelimited(&applespi->spi->dev, + "Received message too large (size %u)\n", + off + len); + goto msg_complete; + } + + memcpy(applespi->msg_buf + off, &packet->data, len); + applespi->saved_msg_len += len; + + if (rem > 0) + return; + + message = (struct message *)applespi->msg_buf; + msg_len = applespi->saved_msg_len; + } else { + message = (struct message *)&packet->data; + msg_len = len; + } + + /* got complete message - verify */ + if (!applespi_verify_crc(applespi, (u8 *)message, msg_len)) + goto msg_complete; + + if (le16_to_cpu(message->length) != msg_len - MSG_HEADER_SIZE - 2) { + dev_warn_ratelimited(&applespi->spi->dev, + "Received corrupted packet (invalid message length %u - expected %u)\n", + le16_to_cpu(message->length), + msg_len - MSG_HEADER_SIZE - 2); + goto msg_complete; + } + + /* handle message */ + if (packet->flags == PACKET_TYPE_READ && + packet->device == PACKET_DEV_KEYB) { + applespi_handle_keyboard_event(applespi, &message->keyboard); + + } else if (packet->flags == PACKET_TYPE_READ && + packet->device == PACKET_DEV_TPAD) { + struct touchpad_protocol *tp; + size_t tp_len; + + tp = &message->touchpad; + tp_len = sizeof(*tp) + + tp->number_of_fingers * sizeof(tp->fingers[0]); + + if (le16_to_cpu(message->length) + 2 != tp_len) { + dev_warn_ratelimited(&applespi->spi->dev, + "Received corrupted packet (invalid message length %u - num-fingers %u, tp-len %zu)\n", + le16_to_cpu(message->length), + tp->number_of_fingers, tp_len); + goto msg_complete; + } + + if (tp->number_of_fingers > MAX_FINGERS) { + dev_warn_ratelimited(&applespi->spi->dev, + "Number of reported fingers (%u) exceeds max (%u))\n", + tp->number_of_fingers, + MAX_FINGERS); + tp->number_of_fingers = MAX_FINGERS; + } + + report_tp_state(applespi, tp); + + } else if (packet->flags == PACKET_TYPE_WRITE) { + applespi_handle_cmd_response(applespi, packet, message); + } + +msg_complete: + applespi->saved_msg_len = 0; + + applespi_msg_complete(applespi, packet->flags == PACKET_TYPE_WRITE, + true); +} + +static void applespi_async_read_complete(void *context) +{ + struct applespi_data *applespi = context; + + if (applespi->rd_m.status < 0) { + dev_warn(&applespi->spi->dev, "Error reading from device: %d\n", + applespi->rd_m.status); + /* + * We don't actually know if this was a pure read, or a response + * to a write. But this is a rare error condition that should + * never occur, so clearing both flags to avoid deadlock. + */ + applespi_msg_complete(applespi, true, true); + } else { + applespi_got_data(applespi); + } + + acpi_finish_gpe(NULL, applespi->gpe); +} + +static u32 applespi_notify(acpi_handle gpe_device, u32 gpe, void *context) +{ + struct applespi_data *applespi = context; + int sts; + unsigned long flags; + + trace_applespi_irq_received(ET_RD_IRQ, PT_READ); + + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + if (!applespi->suspended) { + sts = applespi_async(applespi, &applespi->rd_m, + applespi_async_read_complete); + if (sts) + dev_warn(&applespi->spi->dev, + "Error queueing async read to device: %d\n", + sts); + else + applespi->read_active = true; + } + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); + + return ACPI_INTERRUPT_HANDLED; +} + +static int applespi_get_saved_bl_level(struct applespi_data *applespi) +{ + struct efivar_entry *efivar_entry; + u16 efi_data = 0; + unsigned long efi_data_len; + int sts; + + efivar_entry = kmalloc(sizeof(*efivar_entry), GFP_KERNEL); + if (!efivar_entry) + return -ENOMEM; + + memcpy(efivar_entry->var.VariableName, EFI_BL_LEVEL_NAME, + sizeof(EFI_BL_LEVEL_NAME)); + efivar_entry->var.VendorGuid = EFI_BL_LEVEL_GUID; + efi_data_len = sizeof(efi_data); + + sts = efivar_entry_get(efivar_entry, NULL, &efi_data_len, &efi_data); + if (sts && sts != -ENOENT) + dev_warn(&applespi->spi->dev, + "Error getting backlight level from EFI vars: %d\n", + sts); + + kfree(efivar_entry); + + return sts ? sts : efi_data; +} + +static void applespi_save_bl_level(struct applespi_data *applespi, + unsigned int level) +{ + efi_guid_t efi_guid; + u32 efi_attr; + unsigned long efi_data_len; + u16 efi_data; + int sts; + + /* Save keyboard backlight level */ + efi_guid = EFI_BL_LEVEL_GUID; + efi_data = (u16)level; + efi_data_len = sizeof(efi_data); + efi_attr = EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS; + + sts = efivar_entry_set_safe(EFI_BL_LEVEL_NAME, efi_guid, efi_attr, true, + efi_data_len, &efi_data); + if (sts) + dev_warn(&applespi->spi->dev, + "Error saving backlight level to EFI vars: %d\n", sts); +} + +static int applespi_probe(struct spi_device *spi) +{ + struct applespi_data *applespi; + acpi_handle spi_handle = ACPI_HANDLE(&spi->dev); + acpi_status acpi_sts; + int sts, i; + unsigned long long gpe, usb_status; + + /* check if the USB interface is present and enabled already */ + acpi_sts = acpi_evaluate_integer(spi_handle, "UIST", NULL, &usb_status); + if (ACPI_SUCCESS(acpi_sts) && usb_status) { + /* let the USB driver take over instead */ + dev_info(&spi->dev, "USB interface already enabled\n"); + return -ENODEV; + } + + /* allocate driver data */ + applespi = devm_kzalloc(&spi->dev, sizeof(*applespi), GFP_KERNEL); + if (!applespi) + return -ENOMEM; + + applespi->spi = spi; + + INIT_WORK(&applespi->work, applespi_worker); + + /* store the driver data */ + spi_set_drvdata(spi, applespi); + + /* create our buffers */ + applespi->tx_buffer = devm_kmalloc(&spi->dev, APPLESPI_PACKET_SIZE, + GFP_KERNEL); + applespi->tx_status = devm_kmalloc(&spi->dev, APPLESPI_STATUS_SIZE, + GFP_KERNEL); + applespi->rx_buffer = devm_kmalloc(&spi->dev, APPLESPI_PACKET_SIZE, + GFP_KERNEL); + applespi->msg_buf = devm_kmalloc_array(&spi->dev, MAX_PKTS_PER_MSG, + APPLESPI_PACKET_SIZE, + GFP_KERNEL); + + if (!applespi->tx_buffer || !applespi->tx_status || + !applespi->rx_buffer || !applespi->msg_buf) + return -ENOMEM; + + /* set up our spi messages */ + applespi_setup_read_txfrs(applespi); + applespi_setup_write_txfrs(applespi); + + /* cache ACPI method handles */ + acpi_sts = acpi_get_handle(spi_handle, "SIEN", &applespi->sien); + if (ACPI_FAILURE(acpi_sts)) { + dev_err(&applespi->spi->dev, + "Failed to get SIEN ACPI method handle: %s\n", + acpi_format_exception(acpi_sts)); + return -ENODEV; + } + + acpi_sts = acpi_get_handle(spi_handle, "SIST", &applespi->sist); + if (ACPI_FAILURE(acpi_sts)) { + dev_err(&applespi->spi->dev, + "Failed to get SIST ACPI method handle: %s\n", + acpi_format_exception(acpi_sts)); + return -ENODEV; + } + + /* switch on the SPI interface */ + sts = applespi_setup_spi(applespi); + if (sts) + return sts; + + sts = applespi_enable_spi(applespi); + if (sts) + return sts; + + /* setup the keyboard input dev */ + applespi->keyboard_input_dev = devm_input_allocate_device(&spi->dev); + + if (!applespi->keyboard_input_dev) + return -ENOMEM; + + applespi->keyboard_input_dev->name = "Apple SPI Keyboard"; + applespi->keyboard_input_dev->phys = "applespi/input0"; + applespi->keyboard_input_dev->dev.parent = &spi->dev; + applespi->keyboard_input_dev->id.bustype = BUS_SPI; + + applespi->keyboard_input_dev->evbit[0] = + BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) | BIT_MASK(EV_REP); + applespi->keyboard_input_dev->ledbit[0] = BIT_MASK(LED_CAPSL); + + input_set_drvdata(applespi->keyboard_input_dev, applespi); + applespi->keyboard_input_dev->event = applespi_event; + + for (i = 0; i < ARRAY_SIZE(applespi_scancodes); i++) + if (applespi_scancodes[i]) + input_set_capability(applespi->keyboard_input_dev, + EV_KEY, applespi_scancodes[i]); + + for (i = 0; i < ARRAY_SIZE(applespi_controlcodes); i++) + if (applespi_controlcodes[i]) + input_set_capability(applespi->keyboard_input_dev, + EV_KEY, applespi_controlcodes[i]); + + for (i = 0; i < ARRAY_SIZE(applespi_fn_codes); i++) + if (applespi_fn_codes[i].to) + input_set_capability(applespi->keyboard_input_dev, + EV_KEY, applespi_fn_codes[i].to); + + input_set_capability(applespi->keyboard_input_dev, EV_KEY, KEY_FN); + + sts = input_register_device(applespi->keyboard_input_dev); + if (sts) { + dev_err(&applespi->spi->dev, + "Unable to register keyboard input device (%d)\n", sts); + return -ENODEV; + } + + /* + * The applespi device doesn't send interrupts normally (as is described + * in its DSDT), but rather seems to use ACPI GPEs. + */ + acpi_sts = acpi_evaluate_integer(spi_handle, "_GPE", NULL, &gpe); + if (ACPI_FAILURE(acpi_sts)) { + dev_err(&applespi->spi->dev, + "Failed to obtain GPE for SPI slave device: %s\n", + acpi_format_exception(acpi_sts)); + return -ENODEV; + } + applespi->gpe = (int)gpe; + + acpi_sts = acpi_install_gpe_handler(NULL, applespi->gpe, + ACPI_GPE_LEVEL_TRIGGERED, + applespi_notify, applespi); + if (ACPI_FAILURE(acpi_sts)) { + dev_err(&applespi->spi->dev, + "Failed to install GPE handler for GPE %d: %s\n", + applespi->gpe, acpi_format_exception(acpi_sts)); + return -ENODEV; + } + + applespi->suspended = false; + + acpi_sts = acpi_enable_gpe(NULL, applespi->gpe); + if (ACPI_FAILURE(acpi_sts)) { + dev_err(&applespi->spi->dev, + "Failed to enable GPE handler for GPE %d: %s\n", + applespi->gpe, acpi_format_exception(acpi_sts)); + acpi_remove_gpe_handler(NULL, applespi->gpe, applespi_notify); + return -ENODEV; + } + + /* trigger touchpad setup */ + applespi_init(applespi, false); + + /* + * By default this device is not enabled for wakeup; but USB keyboards + * generally are, so the expectation is that by default the keyboard + * will wake the system. + */ + device_wakeup_enable(&spi->dev); + + /* set up keyboard-backlight */ + sts = applespi_get_saved_bl_level(applespi); + if (sts >= 0) + applespi_set_bl_level(&applespi->backlight_info, sts); + + applespi->backlight_info.name = "spi::kbd_backlight"; + applespi->backlight_info.default_trigger = "kbd-backlight"; + applespi->backlight_info.brightness_set = applespi_set_bl_level; + + sts = devm_led_classdev_register(&spi->dev, &applespi->backlight_info); + if (sts) + dev_warn(&applespi->spi->dev, + "Unable to register keyboard backlight class dev (%d)\n", + sts); + + /* set up debugfs entries for touchpad dimensions logging */ + applespi->debugfs_root = debugfs_create_dir("applespi", NULL); + if (IS_ERR(applespi->debugfs_root)) { + if (PTR_ERR(applespi->debugfs_root) != -ENODEV) + dev_warn(&applespi->spi->dev, + "Error creating debugfs root entry (%ld)\n", + PTR_ERR(applespi->debugfs_root)); + } else { + struct dentry *ret; + + ret = debugfs_create_bool("enable_tp_dim", 0600, + applespi->debugfs_root, + &applespi->debug_tp_dim); + if (IS_ERR(ret)) + dev_dbg(&applespi->spi->dev, + "Error creating debugfs entry enable_tp_dim (%ld)\n", + PTR_ERR(ret)); + + ret = debugfs_create_file("tp_dim", 0400, + applespi->debugfs_root, applespi, + &applespi_tp_dim_fops); + if (IS_ERR(ret)) + dev_dbg(&applespi->spi->dev, + "Error creating debugfs entry tp_dim (%ld)\n", + PTR_ERR(ret)); + } + + return 0; +} + +static void applespi_drain_writes(struct applespi_data *applespi) +{ + unsigned long flags; + + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + applespi->drain = true; + wait_event_lock_irq(applespi->drain_complete, !applespi->write_active, + applespi->cmd_msg_lock); + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); +} + +static void applespi_drain_reads(struct applespi_data *applespi) +{ + unsigned long flags; + + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + wait_event_lock_irq(applespi->drain_complete, !applespi->read_active, + applespi->cmd_msg_lock); + + applespi->suspended = true; + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); +} + +static int applespi_remove(struct spi_device *spi) +{ + struct applespi_data *applespi = spi_get_drvdata(spi); + + applespi_drain_writes(applespi); + + acpi_disable_gpe(NULL, applespi->gpe); + acpi_remove_gpe_handler(NULL, applespi->gpe, applespi_notify); + device_wakeup_disable(&spi->dev); + + applespi_drain_reads(applespi); + + debugfs_remove_recursive(applespi->debugfs_root); + + return 0; +} + +static void applespi_shutdown(struct spi_device *spi) +{ + struct applespi_data *applespi = spi_get_drvdata(spi); + + applespi_save_bl_level(applespi, applespi->have_bl_level); +} + +static int applespi_poweroff_late(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct applespi_data *applespi = spi_get_drvdata(spi); + + applespi_save_bl_level(applespi, applespi->have_bl_level); + + return 0; +} + +static int __maybe_unused applespi_suspend(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct applespi_data *applespi = spi_get_drvdata(spi); + acpi_status acpi_sts; + int sts; + + /* turn off caps-lock - it'll stay on otherwise */ + sts = applespi_set_capsl_led(applespi, false); + if (sts) + dev_warn(&applespi->spi->dev, + "Failed to turn off caps-lock led (%d)\n", sts); + + applespi_drain_writes(applespi); + + /* disable the interrupt */ + acpi_sts = acpi_disable_gpe(NULL, applespi->gpe); + if (ACPI_FAILURE(acpi_sts)) + dev_err(&applespi->spi->dev, + "Failed to disable GPE handler for GPE %d: %s\n", + applespi->gpe, acpi_format_exception(acpi_sts)); + + applespi_drain_reads(applespi); + + return 0; +} + +static int __maybe_unused applespi_resume(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct applespi_data *applespi = spi_get_drvdata(spi); + acpi_status acpi_sts; + unsigned long flags; + + /* ensure our flags and state reflect a newly resumed device */ + spin_lock_irqsave(&applespi->cmd_msg_lock, flags); + + applespi->drain = false; + applespi->have_cl_led_on = false; + applespi->have_bl_level = 0; + applespi->cmd_msg_queued = false; + applespi->read_active = false; + applespi->write_active = false; + + applespi->suspended = false; + + spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags); + + /* switch on the SPI interface */ + applespi_enable_spi(applespi); + + /* re-enable the interrupt */ + acpi_sts = acpi_enable_gpe(NULL, applespi->gpe); + if (ACPI_FAILURE(acpi_sts)) + dev_err(&applespi->spi->dev, + "Failed to re-enable GPE handler for GPE %d: %s\n", + applespi->gpe, acpi_format_exception(acpi_sts)); + + /* switch the touchpad into multitouch mode */ + applespi_init(applespi, true); + + return 0; +} + +static const struct acpi_device_id applespi_acpi_match[] = { + { "APP000D", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, applespi_acpi_match); + +const struct dev_pm_ops applespi_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(applespi_suspend, applespi_resume) + .poweroff_late = applespi_poweroff_late, +}; + +static struct spi_driver applespi_driver = { + .driver = { + .name = "applespi", + .acpi_match_table = applespi_acpi_match, + .pm = &applespi_pm_ops, + }, + .probe = applespi_probe, + .remove = applespi_remove, + .shutdown = applespi_shutdown, +}; + +module_spi_driver(applespi_driver) + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("MacBook(Pro) SPI Keyboard/Touchpad driver"); +MODULE_AUTHOR("Federico Lorenzi"); +MODULE_AUTHOR("Ronald Tschalär"); diff --git a/drivers/input/keyboard/applespi.h b/drivers/input/keyboard/applespi.h new file mode 100644 index 000000000000..7f5ab10c597a --- /dev/null +++ b/drivers/input/keyboard/applespi.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * MacBook (Pro) SPI keyboard and touchpad driver + * + * Copyright (c) 2015-2019 Federico Lorenzi + * Copyright (c) 2017-2019 Ronald Tschalär + */ + +#ifndef _APPLESPI_H_ +#define _APPLESPI_H_ + +enum applespi_evt_type { + ET_CMD_TP_INI = BIT(0), + ET_CMD_BL = BIT(1), + ET_CMD_CL = BIT(2), + ET_RD_KEYB = BIT(8), + ET_RD_TPAD = BIT(9), + ET_RD_UNKN = BIT(10), + ET_RD_IRQ = BIT(11), + ET_RD_CRC = BIT(12), +}; + +enum applespi_pkt_type { + PT_READ, + PT_WRITE, + PT_STATUS, +}; + +#endif /* _APPLESPI_H_ */ diff --git a/drivers/input/keyboard/applespi_trace.h b/drivers/input/keyboard/applespi_trace.h new file mode 100644 index 000000000000..0ad1a3d79f50 --- /dev/null +++ b/drivers/input/keyboard/applespi_trace.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * MacBook (Pro) SPI keyboard and touchpad driver + * + * Copyright (c) 2015-2019 Federico Lorenzi + * Copyright (c) 2017-2019 Ronald Tschalär + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM applespi + +#if !defined(_APPLESPI_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _APPLESPI_TRACE_H_ + +#include <linux/types.h> +#include <linux/tracepoint.h> + +#include "applespi.h" + +DECLARE_EVENT_CLASS(dump_message_template, + TP_PROTO(enum applespi_evt_type evt_type, + enum applespi_pkt_type pkt_type, + u8 *buf, + size_t len), + + TP_ARGS(evt_type, pkt_type, buf, len), + + TP_STRUCT__entry( + __field(enum applespi_evt_type, evt_type) + __field(enum applespi_pkt_type, pkt_type) + __field(size_t, len) + __dynamic_array(u8, buf, len) + ), + + TP_fast_assign( + __entry->evt_type = evt_type; + __entry->pkt_type = pkt_type; + __entry->len = len; + memcpy(__get_dynamic_array(buf), buf, len); + ), + + TP_printk("%-6s: %s", + __print_symbolic(__entry->pkt_type, + { PT_READ, "read" }, + { PT_WRITE, "write" }, + { PT_STATUS, "status" } + ), + __print_hex(__get_dynamic_array(buf), __entry->len)) +); + +#define DEFINE_DUMP_MESSAGE_EVENT(name) \ +DEFINE_EVENT(dump_message_template, name, \ + TP_PROTO(enum applespi_evt_type evt_type, \ + enum applespi_pkt_type pkt_type, \ + u8 *buf, \ + size_t len), \ + TP_ARGS(evt_type, pkt_type, buf, len) \ +) + +DEFINE_DUMP_MESSAGE_EVENT(applespi_tp_ini_cmd); +DEFINE_DUMP_MESSAGE_EVENT(applespi_backlight_cmd); +DEFINE_DUMP_MESSAGE_EVENT(applespi_caps_lock_cmd); +DEFINE_DUMP_MESSAGE_EVENT(applespi_keyboard_data); +DEFINE_DUMP_MESSAGE_EVENT(applespi_touchpad_data); +DEFINE_DUMP_MESSAGE_EVENT(applespi_unknown_data); +DEFINE_DUMP_MESSAGE_EVENT(applespi_bad_crc); + +TRACE_EVENT(applespi_irq_received, + TP_PROTO(enum applespi_evt_type evt_type, + enum applespi_pkt_type pkt_type), + + TP_ARGS(evt_type, pkt_type), + + TP_STRUCT__entry( + __field(enum applespi_evt_type, evt_type) + __field(enum applespi_pkt_type, pkt_type) + ), + + TP_fast_assign( + __entry->evt_type = evt_type; + __entry->pkt_type = pkt_type; + ), + + "\n" +); + +#endif /* _APPLESPI_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/input/keyboard +#define TRACE_INCLUDE_FILE applespi_trace +#include <trace/define_trace.h> diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c index 746ff06eaf8d..62391d6c7da6 100644 --- a/drivers/input/keyboard/mtk-pmic-keys.c +++ b/drivers/input/keyboard/mtk-pmic-keys.c @@ -277,8 +277,10 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev) keys->keys[index].regs = &mtk_pmic_regs->keys_regs[index]; keys->keys[index].irq = platform_get_irq(pdev, index); - if (keys->keys[index].irq < 0) + if (keys->keys[index].irq < 0) { + of_node_put(child); return keys->keys[index].irq; + } error = of_property_read_u32(child, "linux,keycodes", &keys->keys[index].keycode); @@ -286,6 +288,7 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev) dev_err(keys->dev, "failed to read key:%d linux,keycode property: %d\n", index, error); + of_node_put(child); return error; } @@ -293,8 +296,10 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev) keys->keys[index].wakeup = true; error = mtk_pmic_key_setup(keys, &keys->keys[index]); - if (error) + if (error) { + of_node_put(child); return error; + } index++; } diff --git a/drivers/input/keyboard/sun4i-lradc-keys.c b/drivers/input/keyboard/sun4i-lradc-keys.c index 6ffdc26b9c89..4a796bed48ac 100644 --- a/drivers/input/keyboard/sun4i-lradc-keys.c +++ b/drivers/input/keyboard/sun4i-lradc-keys.c @@ -198,18 +198,21 @@ static int sun4i_lradc_load_dt_keymap(struct device *dev, error = of_property_read_u32(pp, "channel", &channel); if (error || channel != 0) { dev_err(dev, "%pOFn: Inval channel prop\n", pp); + of_node_put(pp); return -EINVAL; } error = of_property_read_u32(pp, "voltage", &map->voltage); if (error) { dev_err(dev, "%pOFn: Inval voltage prop\n", pp); + of_node_put(pp); return -EINVAL; } error = of_property_read_u32(pp, "linux,code", &map->keycode); if (error) { dev_err(dev, "%pOFn: Inval linux,code prop\n", pp); + of_node_put(pp); return -EINVAL; } diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 8996323ce8d9..34700eda0429 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -21,6 +21,7 @@ #include "psmouse.h" #include "alps.h" +#include "trackpoint.h" /* * Definitions for ALPS version 3 and 4 command mode protocol @@ -2861,6 +2862,23 @@ static const struct alps_protocol_info *alps_match_table(unsigned char *e7, return NULL; } +static bool alps_is_cs19_trackpoint(struct psmouse *psmouse) +{ + u8 param[2] = { 0 }; + + if (ps2_command(&psmouse->ps2dev, + param, MAKE_PS2_CMD(0, 2, TP_READ_ID))) + return false; + + /* + * param[0] contains the trackpoint device variant_id while + * param[1] contains the firmware_id. So far all alps + * trackpoint-only devices have their variant_ids equal + * TP_VARIANT_ALPS and their firmware_ids are in 0x20~0x2f range. + */ + return param[0] == TP_VARIANT_ALPS && ((param[1] & 0xf0) == 0x20); +} + static int alps_identify(struct psmouse *psmouse, struct alps_data *priv) { const struct alps_protocol_info *protocol; @@ -3162,6 +3180,20 @@ int alps_detect(struct psmouse *psmouse, bool set_properties) return error; /* + * ALPS cs19 is a trackpoint-only device, and uses different + * protocol than DualPoint ones, so we return -EINVAL here and let + * trackpoint.c drive this device. If the trackpoint driver is not + * enabled, the device will fall back to a bare PS/2 mouse. + * If ps2_command() fails here, we depend on the immediately + * followed psmouse_reset() to reset the device to normal state. + */ + if (alps_is_cs19_trackpoint(psmouse)) { + psmouse_dbg(psmouse, + "ALPS CS19 trackpoint-only device detected, ignoring\n"); + return -EINVAL; + } + + /* * Reset the device to make sure it is fully operational: * on some laptops, like certain Dell Latitudes, we may * fail to properly detect presence of trackstick if device diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 1080c0c49815..b1956ed4c0dd 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -176,6 +176,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0093", /* T480 */ "LEN0096", /* X280 */ "LEN0097", /* X280 -> ALPS trackpoint */ + "LEN009b", /* T580 */ "LEN200f", /* T450s */ "LEN2054", /* E480 */ "LEN2055", /* E580 */ @@ -705,7 +706,7 @@ static void synaptics_pt_create(struct psmouse *psmouse) serio->id.type = SERIO_PS_PSTHRU; strlcpy(serio->name, "Synaptics pass-through", sizeof(serio->name)); - strlcpy(serio->phys, "synaptics-pt/serio0", sizeof(serio->name)); + strlcpy(serio->phys, "synaptics-pt/serio0", sizeof(serio->phys)); serio->write = synaptics_pt_write; serio->start = synaptics_pt_start; serio->stop = synaptics_pt_stop; diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h index 0afffe8d824f..77110f3ec21d 100644 --- a/drivers/input/mouse/trackpoint.h +++ b/drivers/input/mouse/trackpoint.h @@ -158,7 +158,8 @@ struct trackpoint_data { #ifdef CONFIG_MOUSE_PS2_TRACKPOINT int trackpoint_detect(struct psmouse *psmouse, bool set_properties); #else -inline int trackpoint_detect(struct psmouse *psmouse, bool set_properties) +static inline int trackpoint_detect(struct psmouse *psmouse, + bool set_properties) { return -ENOSYS; } diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c index 8e457e50f837..88ae7c2ac3c8 100644 --- a/drivers/input/serio/hyperv-keyboard.c +++ b/drivers/input/serio/hyperv-keyboard.c @@ -75,8 +75,8 @@ struct synth_kbd_keystroke { #define HK_MAXIMUM_MESSAGE_SIZE 256 -#define KBD_VSC_SEND_RING_BUFFER_SIZE (10 * PAGE_SIZE) -#define KBD_VSC_RECV_RING_BUFFER_SIZE (10 * PAGE_SIZE) +#define KBD_VSC_SEND_RING_BUFFER_SIZE (40 * 1024) +#define KBD_VSC_RECV_RING_BUFFER_SIZE (40 * 1024) #define XTKBD_EMUL0 0xe0 #define XTKBD_EMUL1 0xe1 diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index 4b8b9d7aa75e..35031228a6d0 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -78,6 +78,7 @@ Scott Hill [email protected] /* Max size of a single report */ #define REPORT_MAX_SIZE 10 +#define MAX_COLLECTION_LEVELS 10 /* Bitmask whether pen is in range */ @@ -223,8 +224,7 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, char maintype = 'x'; char globtype[12]; int indent = 0; - char indentstr[10] = ""; - + char indentstr[MAX_COLLECTION_LEVELS + 1] = { 0 }; dev_dbg(ddev, "======>>>>>>PARSE<<<<<<======\n"); @@ -350,6 +350,13 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, case TAG_MAIN_COL_START: maintype = 'S'; + if (indent == MAX_COLLECTION_LEVELS) { + dev_err(ddev, "Collection level %d would exceed limit of %d\n", + indent + 1, + MAX_COLLECTION_LEVELS); + break; + } + if (data == 0) { dev_dbg(ddev, "======>>>>>> Physical\n"); strcpy(globtype, "Physical"); @@ -369,8 +376,15 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, break; case TAG_MAIN_COL_END: - dev_dbg(ddev, "<<<<<<======\n"); maintype = 'E'; + + if (indent == 0) { + dev_err(ddev, "Collection level already at zero\n"); + break; + } + + dev_dbg(ddev, "<<<<<<======\n"); + indent--; for (x = 0; x < indent; x++) indentstr[x] = '-'; diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c index 8e48fbda487a..8e9f3b7b8180 100644 --- a/drivers/input/touchscreen/auo-pixcir-ts.c +++ b/drivers/input/touchscreen/auo-pixcir-ts.c @@ -602,9 +602,8 @@ static int auo_pixcir_probe(struct i2c_client *client, return error; } - error = devm_add_action(&client->dev, auo_pixcir_reset, ts); + error = devm_add_action_or_reset(&client->dev, auo_pixcir_reset, ts); if (error) { - auo_pixcir_reset(ts); dev_err(&client->dev, "failed to register reset action, %d\n", error); return error; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 73740b969e62..b607a92791d3 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2533,7 +2533,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, npages = sg_num_pages(dev, sglist, nelems); address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask); - if (address == DMA_MAPPING_ERROR) + if (!address) goto out_err; prot = dir2prot(direction); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index eb104c719629..4413aa67000e 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -23,6 +23,8 @@ #include <linux/mem_encrypt.h> #include <asm/pci-direct.h> #include <asm/iommu.h> +#include <asm/apic.h> +#include <asm/msidef.h> #include <asm/gart.h> #include <asm/x86_init.h> #include <asm/iommu_table.h> @@ -1920,6 +1922,90 @@ static int iommu_setup_msi(struct amd_iommu *iommu) return 0; } +#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2) +#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8) +#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32) +#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56) + +/** + * Setup the IntCapXT registers with interrupt routing information + * based on the PCI MSI capability block registers, accessed via + * MMIO MSI address low/hi and MSI data registers. + */ +static void iommu_update_intcapxt(struct amd_iommu *iommu) +{ + u64 val; + u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET); + u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET); + u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET); + bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; + u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF); + + if (x2apic_enabled()) + dest |= MSI_ADDR_EXT_DEST_ID(addr_hi); + + val = XT_INT_VEC(data & 0xFF) | + XT_INT_DEST_MODE(dm) | + XT_INT_DEST_LO(dest) | + XT_INT_DEST_HI(dest); + + /** + * Current IOMMU implemtation uses the same IRQ for all + * 3 IOMMU interrupts. + */ + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); + writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); +} + +static void _irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + if (iommu->dev->irq == notify->irq) { + iommu_update_intcapxt(iommu); + break; + } + } +} + +static void _irq_notifier_release(struct kref *ref) +{ +} + +static int iommu_init_intcapxt(struct amd_iommu *iommu) +{ + int ret; + struct irq_affinity_notify *notify = &iommu->intcapxt_notify; + + /** + * IntCapXT requires XTSup=1, which can be inferred + * amd_iommu_xt_mode. + */ + if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE) + return 0; + + /** + * Also, we need to setup notifier to update the IntCapXT registers + * whenever the irq affinity is changed from user-space. + */ + notify->irq = iommu->dev->irq; + notify->notify = _irq_notifier_notify, + notify->release = _irq_notifier_release, + ret = irq_set_affinity_notifier(iommu->dev->irq, notify); + if (ret) { + pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n", + iommu->devid, iommu->dev->irq); + return ret; + } + + iommu_update_intcapxt(iommu); + iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); + return ret; +} + static int iommu_init_msi(struct amd_iommu *iommu) { int ret; @@ -1936,6 +2022,10 @@ static int iommu_init_msi(struct amd_iommu *iommu) return ret; enable_faults: + ret = iommu_init_intcapxt(iommu); + if (ret) + return ret; + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); if (iommu->ppr_log != NULL) diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 52c35d557fad..64edd5a9694c 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -60,6 +60,12 @@ #define MMIO_PPR_LOG_OFFSET 0x0038 #define MMIO_GA_LOG_BASE_OFFSET 0x00e0 #define MMIO_GA_LOG_TAIL_OFFSET 0x00e8 +#define MMIO_MSI_ADDR_LO_OFFSET 0x015C +#define MMIO_MSI_ADDR_HI_OFFSET 0x0160 +#define MMIO_MSI_DATA_OFFSET 0x0164 +#define MMIO_INTCAPXT_EVT_OFFSET 0x0170 +#define MMIO_INTCAPXT_PPR_OFFSET 0x0178 +#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180 #define MMIO_CMD_HEAD_OFFSET 0x2000 #define MMIO_CMD_TAIL_OFFSET 0x2008 #define MMIO_EVT_HEAD_OFFSET 0x2010 @@ -150,6 +156,7 @@ #define CONTROL_GALOG_EN 0x1CULL #define CONTROL_GAINT_EN 0x1DULL #define CONTROL_XT_EN 0x32ULL +#define CONTROL_INTCAPXT_EN 0x33ULL #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) #define CTRL_INV_TO_NONE 0 @@ -592,6 +599,8 @@ struct amd_iommu { /* DebugFS Info */ struct dentry *debugfs; #endif + /* IRQ notifier for IntCapXT interrupt */ + struct irq_affinity_notify intcapxt_notify; }; static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 73a552914455..2b25d9c59336 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -162,9 +162,9 @@ static inline void print_tbl_walk(struct seq_file *m) (u64)0, (u64)0, (u64)0); else seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n", - tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[0], + tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[2], tbl_wlk->pasid_tbl_entry->val[1], - tbl_wlk->pasid_tbl_entry->val[2]); + tbl_wlk->pasid_tbl_entry->val[0]); } static void pasid_tbl_walk(struct seq_file *m, struct pasid_entry *tbl_entry, diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ac4172c02244..bdaed2da8a55 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -339,8 +339,6 @@ static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); -static void domain_context_clear(struct intel_iommu *iommu, - struct device *dev); static int domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); static bool device_is_rmrr_locked(struct device *dev); @@ -1833,9 +1831,65 @@ static inline int guestwidth_to_adjustwidth(int gaw) return agaw; } +static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, + int guest_width) +{ + int adjust_width, agaw; + unsigned long sagaw; + int err; + + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + + err = init_iova_flush_queue(&domain->iovad, + iommu_flush_iova, iova_entry_free); + if (err) + return err; + + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + if (guest_width > cap_mgaw(iommu->cap)) + guest_width = cap_mgaw(iommu->cap); + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + agaw = width_to_agaw(adjust_width); + sagaw = cap_sagaw(iommu->cap); + if (!test_bit(agaw, &sagaw)) { + /* hardware doesn't support it, choose a bigger one */ + pr_debug("Hardware doesn't support agaw %d\n", agaw); + agaw = find_next_bit(&sagaw, 5, agaw); + if (agaw >= 5) + return -ENODEV; + } + domain->agaw = agaw; + + if (ecap_coherent(iommu->ecap)) + domain->iommu_coherency = 1; + else + domain->iommu_coherency = 0; + + if (ecap_sc_support(iommu->ecap)) + domain->iommu_snooping = 1; + else + domain->iommu_snooping = 0; + + if (intel_iommu_superpage) + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); + else + domain->iommu_superpage = 0; + + domain->nid = iommu->node; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + if (!domain->pgd) + return -ENOMEM; + __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); + return 0; +} + static void domain_exit(struct dmar_domain *domain) { - struct page *freelist; /* Remove associated devices and clear attached or cached domains */ domain_remove_dev_info(domain); @@ -1843,9 +1897,12 @@ static void domain_exit(struct dmar_domain *domain) /* destroy iovas */ put_iova_domain(&domain->iovad); - freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + if (domain->pgd) { + struct page *freelist; - dma_free_pagelist(freelist); + freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + dma_free_pagelist(freelist); + } free_domain_mem(domain); } @@ -2048,26 +2105,9 @@ out_unlock: return ret; } -struct domain_context_mapping_data { - struct dmar_domain *domain; - struct intel_iommu *iommu; - struct pasid_table *table; -}; - -static int domain_context_mapping_cb(struct pci_dev *pdev, - u16 alias, void *opaque) -{ - struct domain_context_mapping_data *data = opaque; - - return domain_context_mapping_one(data->domain, data->iommu, - data->table, PCI_BUS_NUM(alias), - alias & 0xff); -} - static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { - struct domain_context_mapping_data data; struct pasid_table *table; struct intel_iommu *iommu; u8 bus, devfn; @@ -2077,17 +2117,7 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) return -ENODEV; table = intel_pasid_get_table(dev); - - if (!dev_is_pci(dev)) - return domain_context_mapping_one(domain, iommu, table, - bus, devfn); - - data.domain = domain; - data.iommu = iommu; - data.table = table; - - return pci_for_each_dma_alias(to_pci_dev(dev), - &domain_context_mapping_cb, &data); + return domain_context_mapping_one(domain, iommu, table, bus, devfn); } static int domain_context_mapped_cb(struct pci_dev *pdev, @@ -2513,31 +2543,6 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) return 0; } -static int domain_init(struct dmar_domain *domain, int guest_width) -{ - int adjust_width; - - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); - domain_reserve_special_ranges(domain); - - /* calculate AGAW */ - domain->gaw = guest_width; - adjust_width = guestwidth_to_adjustwidth(guest_width); - domain->agaw = width_to_agaw(adjust_width); - - domain->iommu_coherency = 0; - domain->iommu_snooping = 0; - domain->iommu_superpage = 0; - domain->max_addr = 0; - - /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); - if (!domain->pgd) - return -ENOMEM; - domain_flush_cache(domain, domain->pgd, PAGE_SIZE); - return 0; -} - static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) { struct device_domain_info *info; @@ -2575,19 +2580,11 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) domain = alloc_domain(0); if (!domain) return NULL; - - if (domain_init(domain, gaw)) { + if (domain_init(domain, iommu, gaw)) { domain_exit(domain); return NULL; } - if (init_iova_flush_queue(&domain->iovad, - iommu_flush_iova, - iova_entry_free)) { - pr_warn("iova flush queue initialization failed\n"); - intel_iommu_strict = 1; - } - out: return domain; } @@ -2692,6 +2689,8 @@ static int domain_prepare_identity_map(struct device *dev, return iommu_domain_identity_map(domain, start, end); } +static int md_domain_init(struct dmar_domain *domain, int guest_width); + static int __init si_domain_init(int hw) { struct dmar_rmrr_unit *rmrr; @@ -2702,7 +2701,7 @@ static int __init si_domain_init(int hw) if (!si_domain) return -EFAULT; - if (domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { domain_exit(si_domain); return -EFAULT; } @@ -3564,7 +3563,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) freelist = domain_unmap(domain, start_pfn, last_pfn); - if (intel_iommu_strict || (pdev && pdev->untrusted)) { + if (intel_iommu_strict || (pdev && pdev->untrusted) || + !has_iova_flush_queue(&domain->iovad)) { iommu_flush_iotlb_psi(iommu, domain, start_pfn, nrpages, !freelist, 0); /* free iova */ @@ -4758,28 +4758,6 @@ out_free_dmar: return ret; } -static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) -{ - struct intel_iommu *iommu = opaque; - - domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff); - return 0; -} - -/* - * NB - intel-iommu lacks any sort of reference counting for the users of - * dependent devices. If multiple endpoints have intersecting dependent - * devices, unbinding the driver from any one of them will possibly leave - * the others unable to operate. - */ -static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) -{ - if (!iommu || !dev || !dev_is_pci(dev)) - return; - - pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu); -} - static void __dmar_remove_one_dev_info(struct device_domain_info *info) { struct dmar_domain *domain; @@ -4800,7 +4778,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) PASID_RID2PASID); iommu_disable_dev_iotlb(info); - domain_context_clear(iommu, info->dev); + domain_context_clear_one(iommu, info->bus, info->devfn); intel_pasid_free_table(info->dev); } @@ -4829,6 +4807,31 @@ static void dmar_remove_one_dev_info(struct device *dev) spin_unlock_irqrestore(&device_domain_lock, flags); } +static int md_domain_init(struct dmar_domain *domain, int guest_width) +{ + int adjust_width; + + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + domain->agaw = width_to_agaw(adjust_width); + + domain->iommu_coherency = 0; + domain->iommu_snooping = 0; + domain->iommu_superpage = 0; + domain->max_addr = 0; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); + if (!domain->pgd) + return -ENOMEM; + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + return 0; +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; @@ -4843,7 +4846,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) pr_err("Can't allocate dmar_domain\n"); return NULL; } - if (domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { pr_err("Domain initialization failed\n"); domain_exit(dmar_domain); return NULL; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d499b2621239..3e1a8a675572 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -54,9 +54,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, } EXPORT_SYMBOL_GPL(init_iova_domain); +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return !!iovad->fq; +} + static void free_iova_flush_queue(struct iova_domain *iovad) { - if (!iovad->fq) + if (!has_iova_flush_queue(iovad)) return; if (timer_pending(&iovad->fq_timer)) @@ -74,13 +79,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad) int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) { + struct iova_fq __percpu *queue; int cpu; atomic64_set(&iovad->fq_flush_start_cnt, 0); atomic64_set(&iovad->fq_flush_finish_cnt, 0); - iovad->fq = alloc_percpu(struct iova_fq); - if (!iovad->fq) + queue = alloc_percpu(struct iova_fq); + if (!queue) return -ENOMEM; iovad->flush_cb = flush_cb; @@ -89,13 +95,17 @@ int init_iova_flush_queue(struct iova_domain *iovad, for_each_possible_cpu(cpu) { struct iova_fq *fq; - fq = per_cpu_ptr(iovad->fq, cpu); + fq = per_cpu_ptr(queue, cpu); fq->head = 0; fq->tail = 0; spin_lock_init(&fq->lock); } + smp_wmb(); + + iovad->fq = queue; + timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); atomic_set(&iovad->fq_timer_on, 0); @@ -127,8 +137,9 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) struct iova *cached_iova; cached_iova = rb_entry(iovad->cached32_node, struct iova, node); - if (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo) { + if (free == cached_iova || + (free->pfn_hi < iovad->dma_32bit_pfn && + free->pfn_lo >= cached_iova->pfn_lo)) { iovad->cached32_node = rb_next(&free->node); iovad->max32_alloc_size = iovad->dma_32bit_pfn; } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 26e374fbf57c..20ed838e9413 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -931,6 +931,9 @@ int bch_cached_dev_run(struct cached_dev *dc) if (dc->io_disable) { pr_err("I/O disabled on cached dev %s", dc->backing_dev_name); + kfree(env[1]); + kfree(env[2]); + kfree(buf); return -EIO; } diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index 21fb90d66bfc..25c73c13cc7e 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -124,7 +124,7 @@ static inline int check_which(__u32 which) static inline int check_pad(struct v4l2_subdev *sd, __u32 pad) { #if defined(CONFIG_MEDIA_CONTROLLER) - if (sd->entity.graph_obj.mdev) { + if (sd->entity.num_pads) { if (pad >= sd->entity.num_pads) return -EINVAL; return 0; diff --git a/drivers/memory/.gitignore b/drivers/memory/.gitignore new file mode 100644 index 000000000000..cbca8b028437 --- /dev/null +++ b/drivers/memory/.gitignore @@ -0,0 +1 @@ +ti-emif-asm-offsets.h diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile index 9d5c409a1591..27b493435e61 100644 --- a/drivers/memory/Makefile +++ b/drivers/memory/Makefile @@ -29,9 +29,10 @@ ti-emif-sram-objs := ti-emif-pm.o ti-emif-sram-pm.o AFLAGS_ti-emif-sram-pm.o :=-Wa,-march=armv7-a -drivers/memory/ti-emif-sram-pm.o: include/generated/ti-emif-asm-offsets.h +$(obj)/ti-emif-sram-pm.o: $(obj)/ti-emif-asm-offsets.h -include/generated/ti-emif-asm-offsets.h: drivers/memory/emif-asm-offsets.s FORCE +$(obj)/ti-emif-asm-offsets.h: $(obj)/emif-asm-offsets.s FORCE $(call filechk,offsets,__TI_EMIF_ASM_OFFSETS_H__) targets += emif-asm-offsets.s +clean-files += ti-emif-asm-offsets.h diff --git a/drivers/memory/ti-emif-sram-pm.S b/drivers/memory/ti-emif-sram-pm.S index d75ae18efa7d..d1c83bd5b98e 100644 --- a/drivers/memory/ti-emif-sram-pm.S +++ b/drivers/memory/ti-emif-sram-pm.S @@ -14,12 +14,12 @@ * GNU General Public License for more details. */ -#include <generated/ti-emif-asm-offsets.h> #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/memory.h> #include "emif.h" +#include "ti-emif-asm-offsets.h" #define EMIF_POWER_MGMT_WAIT_SELF_REFRESH_8192_CYCLES 0x00a0 #define EMIF_POWER_MGMT_SR_TIMER_MASK 0x00f0 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 656ed80647f0..e2be5a685130 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -285,6 +285,9 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata) hw_cons = le16_to_cpu(*txdata->tx_cons_sb); sw_cons = txdata->tx_pkt_cons; + /* Ensure subsequent loads occur after hw_cons */ + smp_rmb(); + while (sw_cons != hw_cons) { u16 pkt_cons; diff --git a/drivers/net/ethernet/chelsio/cxgb/my3126.c b/drivers/net/ethernet/chelsio/cxgb/my3126.c index 20c09cc4b323..60aa45b375b6 100644 --- a/drivers/net/ethernet/chelsio/cxgb/my3126.c +++ b/drivers/net/ethernet/chelsio/cxgb/my3126.c @@ -94,7 +94,7 @@ static int my3126_interrupt_handler(struct cphy *cphy) return cphy_cause_link_change; } -static void my3216_poll(struct work_struct *work) +static void my3126_poll(struct work_struct *work) { struct cphy *cphy = container_of(work, struct cphy, phy_update.work); @@ -177,7 +177,7 @@ static struct cphy *my3126_phy_create(struct net_device *dev, return NULL; cphy_init(cphy, dev, phy_addr, &my3126_ops, mdio_ops); - INIT_DELAYED_WORK(&cphy->phy_update, my3216_poll); + INIT_DELAYED_WORK(&cphy->phy_update, my3126_poll); cphy->bmsr = 0; return cphy; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 67202b6f352e..4311ad9c84b2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5561,7 +5561,6 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) char name[IFNAMSIZ]; u32 devcap2; u16 flags; - int pos; /* If we want to instantiate Virtual Functions, then our * parent bridge's PCI-E needs to support Alternative Routing @@ -5569,9 +5568,8 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) * and above. */ pbridge = pdev->bus->self; - pos = pci_find_capability(pbridge, PCI_CAP_ID_EXP); - pci_read_config_word(pbridge, pos + PCI_EXP_FLAGS, &flags); - pci_read_config_dword(pbridge, pos + PCI_EXP_DEVCAP2, &devcap2); + pcie_capability_read_word(pbridge, PCI_EXP_FLAGS, &flags); + pcie_capability_read_dword(pbridge, PCI_EXP_DEVCAP2, &devcap2); if ((flags & PCI_EXP_FLAGS_VERS) < 2 || !(devcap2 & PCI_EXP_DEVCAP2_ARI)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 312599c6b35a..e447976bdd3e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -67,7 +67,8 @@ static struct ch_tc_pedit_fields pedits[] = { static struct ch_tc_flower_entry *allocate_flower_entry(void) { struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL); - spin_lock_init(&new->lock); + if (new) + spin_lock_init(&new->lock); return new; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 9dd5ed9a2965..f7fc553356f2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -7309,7 +7309,6 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size, } else { unsigned int pack_align; unsigned int ingpad, ingpack; - unsigned int pcie_cap; /* T5 introduced the separation of the Free List Padding and * Packing Boundaries. Thus, we can select a smaller Padding @@ -7334,8 +7333,7 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size, * multiple of the Maximum Payload Size. */ pack_align = fl_align; - pcie_cap = pci_find_capability(adap->pdev, PCI_CAP_ID_EXP); - if (pcie_cap) { + if (pci_is_pcie(adap->pdev)) { unsigned int mps, mps_log; u16 devctl; @@ -7343,9 +7341,8 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size, * [bits 7:5] encodes sizes as powers of 2 starting at * 128 bytes. */ - pci_read_config_word(adap->pdev, - pcie_cap + PCI_EXP_DEVCTL, - &devctl); + pcie_capability_read_word(adap->pdev, PCI_EXP_DEVCTL, + &devctl); mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7; mps = 1 << mps_log; if (mps > pack_align) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b7a246b33599..2edb86ec9fe9 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4698,8 +4698,13 @@ int be_update_queues(struct be_adapter *adapter) int status; if (netif_running(netdev)) { + /* be_tx_timeout() must not run concurrently with this + * function, synchronize with an already-running dev_watchdog + */ + netif_tx_lock_bh(netdev); /* device cannot transmit now, avoid dev_watchdog timeouts */ netif_carrier_off(netdev); + netif_tx_unlock_bh(netdev); be_close(netdev); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 8ad5292eebbe..75329ab775a6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -43,7 +43,7 @@ enum HCLGE_MBX_OPCODE { HCLGE_MBX_GET_QID_IN_PF, /* (VF -> PF) get queue id in pf */ HCLGE_MBX_LINK_STAT_MODE, /* (PF -> VF) link mode has changed */ HCLGE_MBX_GET_LINK_MODE, /* (VF -> PF) get the link mode of pf */ - HLCGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */ + HCLGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */ HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */ HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index a38ac7cfe16b..690b9990215c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -304,7 +304,7 @@ int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid, memcpy(&msg_data[6], &vlan_tag, sizeof(u16)); return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data), - HLCGE_MBX_PUSH_VLAN_INFO, vfid); + HCLGE_MBX_PUSH_VLAN_INFO, vfid); } static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index f60b80bd605e..6a96987bd8f0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -204,7 +204,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) case HCLGE_MBX_LINK_STAT_CHANGE: case HCLGE_MBX_ASSERTING_RESET: case HCLGE_MBX_LINK_STAT_MODE: - case HLCGE_MBX_PUSH_VLAN_INFO: + case HCLGE_MBX_PUSH_VLAN_INFO: /* set this mbx event as pending. This is required as we * might loose interrupt event when mbx task is busy * handling. This shall be cleared when mbx task just @@ -307,7 +307,7 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) hclgevf_reset_task_schedule(hdev); break; - case HLCGE_MBX_PUSH_VLAN_INFO: + case HCLGE_MBX_PUSH_VLAN_INFO: state = le16_to_cpu(msg_q[1]); vlan_info = &msg_q[1]; hclgevf_update_port_base_vlan_info(hdev, state, diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 93f3b4e6185b..aa9323e55406 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3912,13 +3912,11 @@ void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) { struct igc_adapter *adapter = hw->back; - u16 cap_offset; - cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); - if (!cap_offset) + if (!pci_is_pcie(adapter->pdev)) return -IGC_ERR_CONFIG; - pci_read_config_word(adapter->pdev, cap_offset + reg, value); + pcie_capability_read_word(adapter->pdev, reg, value); return IGC_SUCCESS; } @@ -3926,13 +3924,11 @@ s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) { struct igc_adapter *adapter = hw->back; - u16 cap_offset; - cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); - if (!cap_offset) + if (!pci_is_pcie(adapter->pdev)) return -IGC_ERR_CONFIG; - pci_write_config_word(adapter->pdev, cap_offset + reg, *value); + pcie_capability_write_word(adapter->pdev, reg, *value); return IGC_SUCCESS; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7245d287633d..7f747cb1a4f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -735,8 +735,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); - block_cb = flow_block_cb_alloc(f->net, - mlx5e_rep_indr_setup_block_cb, + block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb, indr_priv, indr_priv, mlx5e_rep_indr_tc_block_unbind); if (IS_ERR(block_cb)) { @@ -753,7 +752,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (!indr_priv) return -ENOENT; - block_cb = flow_block_cb_lookup(f, + block_cb = flow_block_cb_lookup(f->block, mlx5e_rep_indr_setup_block_cb, indr_priv); if (!block_cb) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4d34d42b3b0e..650638152bbc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1604,14 +1604,14 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port, bool register_block = false; int err; - block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower, + block_cb = flow_block_cb_lookup(f->block, + mlxsw_sp_setup_tc_block_cb_flower, mlxsw_sp); if (!block_cb) { acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net); if (!acl_block) return -ENOMEM; - block_cb = flow_block_cb_alloc(f->net, - mlxsw_sp_setup_tc_block_cb_flower, + block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb_flower, mlxsw_sp, acl_block, mlxsw_sp_tc_block_flower_release); if (IS_ERR(block_cb)) { @@ -1657,7 +1657,8 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port, struct flow_block_cb *block_cb; int err; - block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower, + block_cb = flow_block_cb_lookup(f->block, + mlxsw_sp_setup_tc_block_cb_flower, mlxsw_sp); if (!block_cb) return; @@ -1680,7 +1681,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port, struct flow_block_offload *f) { struct flow_block_cb *block_cb; - tc_setup_cb_t *cb; + flow_setup_cb_t *cb; bool ingress; int err; @@ -1702,7 +1703,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_block_cb_list)) return -EBUSY; - block_cb = flow_block_cb_alloc(f->net, cb, mlxsw_sp_port, + block_cb = flow_block_cb_alloc(cb, mlxsw_sp_port, mlxsw_sp_port, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); @@ -1718,7 +1719,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port, case FLOW_BLOCK_UNBIND: mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port, f, ingress); - block_cb = flow_block_cb_lookup(f, cb, mlxsw_sp_port); + block_cb = flow_block_cb_lookup(f->block, cb, mlxsw_sp_port); if (!block_cb) return -ENOENT; diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 7aaddc09c185..59487d446a09 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -316,15 +316,14 @@ int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port, if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) return -EOPNOTSUPP; - block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower, - port); + block_cb = flow_block_cb_lookup(f->block, + ocelot_setup_tc_block_cb_flower, port); if (!block_cb) { port_block = ocelot_port_block_create(port); if (!port_block) return -ENOMEM; - block_cb = flow_block_cb_alloc(f->net, - ocelot_setup_tc_block_cb_flower, + block_cb = flow_block_cb_alloc(ocelot_setup_tc_block_cb_flower, port, port_block, ocelot_tc_block_unbind); if (IS_ERR(block_cb)) { @@ -351,8 +350,8 @@ void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port, { struct flow_block_cb *block_cb; - block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower, - port); + block_cb = flow_block_cb_lookup(f->block, + ocelot_setup_tc_block_cb_flower, port); if (!block_cb) return; diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c index 9e6464ffae5d..16a6db71ca5e 100644 --- a/drivers/net/ethernet/mscc/ocelot_tc.c +++ b/drivers/net/ethernet/mscc/ocelot_tc.c @@ -134,7 +134,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port, struct flow_block_offload *f) { struct flow_block_cb *block_cb; - tc_setup_cb_t *cb; + flow_setup_cb_t *cb; int err; netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n", @@ -156,7 +156,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port, if (flow_block_cb_is_busy(cb, port, &ocelot_block_cb_list)) return -EBUSY; - block_cb = flow_block_cb_alloc(f->net, cb, port, port, NULL); + block_cb = flow_block_cb_alloc(cb, port, port, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); @@ -169,7 +169,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port, list_add_tail(&block_cb->driver_list, f->driver_block_list); return 0; case FLOW_BLOCK_UNBIND: - block_cb = flow_block_cb_lookup(f, cb, port); + block_cb = flow_block_cb_lookup(f->block, cb, port); if (!block_cb) return -ENOENT; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index faa8ba012a37..e209f150c5f2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1318,8 +1318,7 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev, &nfp_block_cb_list)) return -EBUSY; - block_cb = flow_block_cb_alloc(f->net, - nfp_flower_setup_tc_block_cb, + block_cb = flow_block_cb_alloc(nfp_flower_setup_tc_block_cb, repr, repr, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); @@ -1328,7 +1327,8 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev, list_add_tail(&block_cb->driver_list, &nfp_block_cb_list); return 0; case FLOW_BLOCK_UNBIND: - block_cb = flow_block_cb_lookup(f, nfp_flower_setup_tc_block_cb, + block_cb = flow_block_cb_lookup(f->block, + nfp_flower_setup_tc_block_cb, repr); if (!block_cb) return -ENOENT; @@ -1424,8 +1424,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, cb_priv->app = app; list_add(&cb_priv->list, &priv->indr_block_cb_priv); - block_cb = flow_block_cb_alloc(f->net, - nfp_flower_setup_indr_block_cb, + block_cb = flow_block_cb_alloc(nfp_flower_setup_indr_block_cb, cb_priv, cb_priv, nfp_flower_setup_indr_tc_release); if (IS_ERR(block_cb)) { @@ -1442,7 +1441,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, if (!cb_priv) return -ENOENT; - block_cb = flow_block_cb_lookup(f, + block_cb = flow_block_cb_lookup(f->block, nfp_flower_setup_indr_block_cb, cb_priv); if (!block_cb) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index f900fde448db..17c64e43d6c3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -530,9 +530,8 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1); /* Check atomic operations support in PCI configuration space. */ - pci_read_config_dword(cdev->pdev, - cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2, - &pci_status_control); + pcie_capability_read_dword(cdev->pdev, PCI_EXP_DEVCTL2, + &pci_status_control); if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN) SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 0637c6752a78..6272115b2848 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -3251,9 +3251,9 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) ret = phy_read_paged(tp->phydev, 0x0a46, 0x13); if (ret & BIT(8)) - phy_modify_paged(tp->phydev, 0x0c41, 0x12, 0, BIT(1)); + phy_modify_paged(tp->phydev, 0x0c41, 0x15, 0, BIT(1)); else - phy_modify_paged(tp->phydev, 0x0c41, 0x12, BIT(1), 0); + phy_modify_paged(tp->phydev, 0x0c41, 0x15, BIT(1), 0); /* Enable PHY auto speed down */ phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2)); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index afdcc5664ea6..3544e1991579 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -836,7 +836,6 @@ int netvsc_recv_callback(struct net_device *net, if (unlikely(!skb)) { ++net_device_ctx->eth_stats.rx_no_memory; - rcu_read_unlock(); return NVSP_STAT_FAIL; } diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 2d816aadea79..e36c04c26866 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -517,7 +517,7 @@ static int sfp_hwmon_read_sensor(struct sfp *sfp, int reg, long *value) static void sfp_hwmon_to_rx_power(long *value) { - *value = DIV_ROUND_CLOSEST(*value, 100); + *value = DIV_ROUND_CLOSEST(*value, 10); } static void sfp_hwmon_calibrate(struct sfp *sfp, unsigned int slope, int offset, diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 54edf8956a25..6e84328bdd40 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -165,23 +165,29 @@ static int vrf_ip6_local_out(struct net *net, struct sock *sk, static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { - const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct ipv6hdr *iph; struct net *net = dev_net(skb->dev); - struct flowi6 fl6 = { - /* needed to match OIF rule */ - .flowi6_oif = dev->ifindex, - .flowi6_iif = LOOPBACK_IFINDEX, - .daddr = iph->daddr, - .saddr = iph->saddr, - .flowlabel = ip6_flowinfo(iph), - .flowi6_mark = skb->mark, - .flowi6_proto = iph->nexthdr, - .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF, - }; + struct flowi6 fl6; int ret = NET_XMIT_DROP; struct dst_entry *dst; struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst; + if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) + goto err; + + iph = ipv6_hdr(skb); + + memset(&fl6, 0, sizeof(fl6)); + /* needed to match OIF rule */ + fl6.flowi6_oif = dev->ifindex; + fl6.flowi6_iif = LOOPBACK_IFINDEX; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = iph->nexthdr; + fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; + dst = ip6_route_output(net, NULL, &fl6); if (dst == dst_null) goto err; @@ -237,21 +243,27 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk, static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { - struct iphdr *ip4h = ip_hdr(skb); + struct iphdr *ip4h; int ret = NET_XMIT_DROP; - struct flowi4 fl4 = { - /* needed to match OIF rule */ - .flowi4_oif = vrf_dev->ifindex, - .flowi4_iif = LOOPBACK_IFINDEX, - .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, - .flowi4_proto = ip4h->protocol, - .daddr = ip4h->daddr, - .saddr = ip4h->saddr, - }; + struct flowi4 fl4; struct net *net = dev_net(vrf_dev); struct rtable *rt; + if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) + goto err; + + ip4h = ip_hdr(skb); + + memset(&fl4, 0, sizeof(fl4)); + /* needed to match OIF rule */ + fl4.flowi4_oif = vrf_dev->ifindex; + fl4.flowi4_iif = LOOPBACK_IFINDEX; + fl4.flowi4_tos = RT_TOS(ip4h->tos); + fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; + fl4.flowi4_proto = ip4h->protocol; + fl4.daddr = ip4h->daddr; + fl4.saddr = ip4h->saddr; + rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index d436cc51dfd1..2fb4258941a5 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -177,6 +177,7 @@ static const struct wiphy_vendor_command wil_nl80211_vendor_commands[] = { .info.subcmd = QCA_NL80211_VENDOR_SUBCMD_DMG_RF_GET_SECTOR_CFG, .flags = WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_RUNNING, + .policy = wil_rf_sector_policy, .doit = wil_rf_sector_get_cfg }, { @@ -184,6 +185,7 @@ static const struct wiphy_vendor_command wil_nl80211_vendor_commands[] = { .info.subcmd = QCA_NL80211_VENDOR_SUBCMD_DMG_RF_SET_SECTOR_CFG, .flags = WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_RUNNING, + .policy = wil_rf_sector_policy, .doit = wil_rf_sector_set_cfg }, { @@ -192,6 +194,7 @@ static const struct wiphy_vendor_command wil_nl80211_vendor_commands[] = { QCA_NL80211_VENDOR_SUBCMD_DMG_RF_GET_SELECTED_SECTOR, .flags = WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_RUNNING, + .policy = wil_rf_sector_policy, .doit = wil_rf_sector_get_selected }, { @@ -200,6 +203,7 @@ static const struct wiphy_vendor_command wil_nl80211_vendor_commands[] = { QCA_NL80211_VENDOR_SUBCMD_DMG_RF_SET_SELECTED_SECTOR, .flags = WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_RUNNING, + .policy = wil_rf_sector_policy, .doit = wil_rf_sector_set_selected }, }; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c index f6500899fc14..d07e7c7355d9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c @@ -112,6 +112,7 @@ const struct wiphy_vendor_command brcmf_vendor_cmds[] = { }, .flags = WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV, + .policy = VENDOR_CMD_RAW_DATA, .doit = brcmf_cfg80211_vndr_cmds_dcmd_handler }, }; diff --git a/drivers/net/wireless/ti/wlcore/vendor_cmd.c b/drivers/net/wireless/ti/wlcore/vendor_cmd.c index 5cf0b32c413b..e1bd344c4ebc 100644 --- a/drivers/net/wireless/ti/wlcore/vendor_cmd.c +++ b/drivers/net/wireless/ti/wlcore/vendor_cmd.c @@ -163,6 +163,7 @@ static const struct wiphy_vendor_command wlcore_vendor_commands[] = { .flags = WIPHY_VENDOR_CMD_NEED_NETDEV | WIPHY_VENDOR_CMD_NEED_RUNNING, .doit = wlcore_vendor_cmd_smart_config_start, + .policy = wlcore_vendor_attr_policy, }, { .info = { @@ -172,6 +173,7 @@ static const struct wiphy_vendor_command wlcore_vendor_commands[] = { .flags = WIPHY_VENDOR_CMD_NEED_NETDEV | WIPHY_VENDOR_CMD_NEED_RUNNING, .doit = wlcore_vendor_cmd_smart_config_stop, + .policy = wlcore_vendor_attr_policy, }, { .info = { @@ -181,6 +183,7 @@ static const struct wiphy_vendor_command wlcore_vendor_commands[] = { .flags = WIPHY_VENDOR_CMD_NEED_NETDEV | WIPHY_VENDOR_CMD_NEED_RUNNING, .doit = wlcore_vendor_cmd_smart_config_set_group_key, + .policy = wlcore_vendor_attr_policy, }, }; diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig index c99eed87382a..df16c755b4da 100644 --- a/drivers/ntb/Kconfig +++ b/drivers/ntb/Kconfig @@ -13,6 +13,17 @@ menuconfig NTB if NTB +config NTB_MSI + bool "MSI Interrupt Support" + depends on PCI_MSI + help + Support using MSI interrupt forwarding instead of (or in addition to) + hardware doorbells. MSI interrupts typically offer lower latency + than doorbells and more MSI interrupts can be made available to + clients. However this requires an extra memory window and support + in the hardware driver for creating the MSI interrupts. + + If unsure, say N. source "drivers/ntb/hw/Kconfig" source "drivers/ntb/test/Kconfig" diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile index 5c64438d5b3f..3a6fa181ff99 100644 --- a/drivers/ntb/Makefile +++ b/drivers/ntb/Makefile @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_NTB) += ntb.o hw/ test/ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o + +ntb-y := core.o +ntb-$(CONFIG_NTB_MSI) += msi.o diff --git a/drivers/ntb/ntb.c b/drivers/ntb/core.c index 2581ab724c34..2581ab724c34 100644 --- a/drivers/ntb/ntb.c +++ b/drivers/ntb/core.c diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index efb214fc545a..2859cc99b73e 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -160,8 +160,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, } /* set and verify setting the limit */ - write64(limit, mmio + limit_reg); - reg_val = read64(mmio + limit_reg); + write64(limit, peer_mmio + limit_reg); + reg_val = read64(peer_mmio + limit_reg); if (reg_val != limit) { write64(base_addr, mmio + limit_reg); write64(0, peer_mmio + xlat_reg); @@ -183,8 +183,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, } /* set and verify setting the limit */ - writel(limit, mmio + limit_reg); - reg_val = readl(mmio + limit_reg); + writel(limit, peer_mmio + limit_reg); + reg_val = readl(peer_mmio + limit_reg); if (reg_val != limit) { writel(base_addr, mmio + limit_reg); writel(0, peer_mmio + xlat_reg); @@ -333,7 +333,7 @@ static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector) if (db_vector < 0 || db_vector > ndev->db_count) return 0; - return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector); + return ntb_ndev(ntb)->db_valid_mask & (1ULL << db_vector); } static u64 amd_ntb_db_read(struct ntb_dev *ntb) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c index f475b56a3f49..c3397160db7f 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen3.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c @@ -532,9 +532,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, return 0; } -int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr, - resource_size_t *db_size, - u64 *db_data, int db_bit) +static int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr, + resource_size_t *db_size, + u64 *db_data, int db_bit) { phys_addr_t db_addr_base; struct intel_ntb_dev *ndev = ntb_ndev(ntb); diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index db4967748e4d..f4959458d909 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -86,7 +86,8 @@ struct switchtec_ntb { bool link_is_up; enum ntb_speed link_speed; enum ntb_width link_width; - struct work_struct link_reinit_work; + struct work_struct check_link_status_work; + bool link_force_down; }; static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb) @@ -485,33 +486,11 @@ enum switchtec_msg { static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev); -static void link_reinit_work(struct work_struct *work) -{ - struct switchtec_ntb *sndev; - - sndev = container_of(work, struct switchtec_ntb, link_reinit_work); - - switchtec_ntb_reinit_peer(sndev); -} - -static void switchtec_ntb_check_link(struct switchtec_ntb *sndev, - enum switchtec_msg msg) +static void switchtec_ntb_link_status_update(struct switchtec_ntb *sndev) { int link_sta; int old = sndev->link_is_up; - if (msg == MSG_LINK_FORCE_DOWN) { - schedule_work(&sndev->link_reinit_work); - - if (sndev->link_is_up) { - sndev->link_is_up = 0; - ntb_link_event(&sndev->ntb); - dev_info(&sndev->stdev->dev, "ntb link forced down\n"); - } - - return; - } - link_sta = sndev->self_shared->link_sta; if (link_sta) { u64 peer = ioread64(&sndev->peer_shared->magic); @@ -536,6 +515,38 @@ static void switchtec_ntb_check_link(struct switchtec_ntb *sndev, } } +static void check_link_status_work(struct work_struct *work) +{ + struct switchtec_ntb *sndev; + + sndev = container_of(work, struct switchtec_ntb, + check_link_status_work); + + if (sndev->link_force_down) { + sndev->link_force_down = false; + switchtec_ntb_reinit_peer(sndev); + + if (sndev->link_is_up) { + sndev->link_is_up = 0; + ntb_link_event(&sndev->ntb); + dev_info(&sndev->stdev->dev, "ntb link forced down\n"); + } + + return; + } + + switchtec_ntb_link_status_update(sndev); +} + +static void switchtec_ntb_check_link(struct switchtec_ntb *sndev, + enum switchtec_msg msg) +{ + if (msg == MSG_LINK_FORCE_DOWN) + sndev->link_force_down = true; + + schedule_work(&sndev->check_link_status_work); +} + static void switchtec_ntb_link_notification(struct switchtec_dev *stdev) { struct switchtec_ntb *sndev = stdev->sndev; @@ -568,7 +579,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb, sndev->self_shared->link_sta = 1; switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP); - switchtec_ntb_check_link(sndev, MSG_CHECK_LINK); + switchtec_ntb_link_status_update(sndev); return 0; } @@ -582,7 +593,7 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb) sndev->self_shared->link_sta = 0; switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN); - switchtec_ntb_check_link(sndev, MSG_CHECK_LINK); + switchtec_ntb_link_status_update(sndev); return 0; } @@ -835,7 +846,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) sndev->ntb.topo = NTB_TOPO_SWITCH; sndev->ntb.ops = &switchtec_ntb_ops; - INIT_WORK(&sndev->link_reinit_work, link_reinit_work); + INIT_WORK(&sndev->check_link_status_work, check_link_status_work); + sndev->link_force_down = false; sndev->self_partition = sndev->stdev->partition; @@ -872,7 +884,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) } sndev->peer_partition = ffs(tpart_vec) - 1; - if (!(part_map & (1 << sndev->peer_partition))) { + if (!(part_map & (1ULL << sndev->peer_partition))) { dev_err(&sndev->stdev->dev, "ntb target partition is not NT partition\n"); return -ENODEV; @@ -1448,10 +1460,16 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev) static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev) { - dev_info(&sndev->stdev->dev, "peer reinitialized\n"); - switchtec_ntb_deinit_shared_mw(sndev); - switchtec_ntb_init_mw(sndev); - return switchtec_ntb_init_shared_mw(sndev); + int rc; + + if (crosslink_is_enabled(sndev)) + return 0; + + dev_info(&sndev->stdev->dev, "reinitialize shared memory window\n"); + rc = config_rsvd_lut_win(sndev, sndev->mmio_peer_ctrl, 0, + sndev->self_partition, + sndev->self_shared_dma); + return rc; } static int switchtec_ntb_add(struct device *dev, diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c new file mode 100644 index 000000000000..9dddf133658f --- /dev/null +++ b/drivers/ntb/msi.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) + +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/ntb.h> +#include <linux/msi.h> +#include <linux/pci.h> + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.1"); +MODULE_AUTHOR("Logan Gunthorpe <[email protected]>"); +MODULE_DESCRIPTION("NTB MSI Interrupt Library"); + +struct ntb_msi { + u64 base_addr; + u64 end_addr; + + void (*desc_changed)(void *ctx); + + u32 __iomem *peer_mws[]; +}; + +/** + * ntb_msi_init() - Initialize the MSI context + * @ntb: NTB device context + * + * This function must be called before any other ntb_msi function. + * It initializes the context for MSI operations and maps + * the peer memory windows. + * + * This function reserves the last N outbound memory windows (where N + * is the number of peers). + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_init(struct ntb_dev *ntb, + void (*desc_changed)(void *ctx)) +{ + phys_addr_t mw_phys_addr; + resource_size_t mw_size; + size_t struct_size; + int peer_widx; + int peers; + int ret; + int i; + + peers = ntb_peer_port_count(ntb); + if (peers <= 0) + return -EINVAL; + + struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers; + + ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL); + if (!ntb->msi) + return -ENOMEM; + + ntb->msi->desc_changed = desc_changed; + + for (i = 0; i < peers; i++) { + peer_widx = ntb_peer_mw_count(ntb) - 1 - i; + + ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, + &mw_size); + if (ret) + goto unroll; + + ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr, + mw_size); + if (!ntb->msi->peer_mws[i]) { + ret = -EFAULT; + goto unroll; + } + } + + return 0; + +unroll: + for (i = 0; i < peers; i++) + if (ntb->msi->peer_mws[i]) + devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]); + + devm_kfree(&ntb->dev, ntb->msi); + ntb->msi = NULL; + return ret; +} +EXPORT_SYMBOL(ntb_msi_init); + +/** + * ntb_msi_setup_mws() - Initialize the MSI inbound memory windows + * @ntb: NTB device context + * + * This function sets up the required inbound memory windows. It should be + * called from a work function after a link up event. + * + * Over the entire network, this function will reserves the last N + * inbound memory windows for each peer (where N is the number of peers). + * + * ntb_msi_init() must be called before this function. + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_setup_mws(struct ntb_dev *ntb) +{ + struct msi_desc *desc; + u64 addr; + int peer, peer_widx; + resource_size_t addr_align, size_align, size_max; + resource_size_t mw_size = SZ_32K; + resource_size_t mw_min_size = mw_size; + int i; + int ret; + + if (!ntb->msi) + return -EINVAL; + + desc = first_msi_entry(&ntb->pdev->dev); + addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32); + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + return peer_widx; + + ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align, + NULL, NULL); + if (ret) + return ret; + + addr &= ~(addr_align - 1); + } + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) { + ret = peer_widx; + goto error_out; + } + + ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL, + &size_align, &size_max); + if (ret) + goto error_out; + + mw_size = round_up(mw_size, size_align); + mw_size = max(mw_size, size_max); + if (mw_size < mw_min_size) + mw_min_size = mw_size; + + ret = ntb_mw_set_trans(ntb, peer, peer_widx, + addr, mw_size); + if (ret) + goto error_out; + } + + ntb->msi->base_addr = addr; + ntb->msi->end_addr = addr + mw_min_size; + + return 0; + +error_out: + for (i = 0; i < peer; i++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + continue; + + ntb_mw_clear_trans(ntb, i, peer_widx); + } + + return ret; +} +EXPORT_SYMBOL(ntb_msi_setup_mws); + +/** + * ntb_msi_clear_mws() - Clear all inbound memory windows + * @ntb: NTB device context + * + * This function tears down the resources used by ntb_msi_setup_mws(). + */ +void ntb_msi_clear_mws(struct ntb_dev *ntb) +{ + int peer; + int peer_widx; + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + continue; + + ntb_mw_clear_trans(ntb, peer, peer_widx); + } +} +EXPORT_SYMBOL(ntb_msi_clear_mws); + +struct ntb_msi_devres { + struct ntb_dev *ntb; + struct msi_desc *entry; + struct ntb_msi_desc *msi_desc; +}; + +static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry, + struct ntb_msi_desc *msi_desc) +{ + u64 addr; + + addr = entry->msg.address_lo + + ((uint64_t)entry->msg.address_hi << 32); + + if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) { + dev_warn_once(&ntb->dev, + "IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n", + entry->irq, addr, ntb->msi->base_addr, + ntb->msi->end_addr); + return -EFAULT; + } + + msi_desc->addr_offset = addr - ntb->msi->base_addr; + msi_desc->data = entry->msg.data; + + return 0; +} + +static void ntb_msi_write_msg(struct msi_desc *entry, void *data) +{ + struct ntb_msi_devres *dr = data; + + WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc)); + + if (dr->ntb->msi->desc_changed) + dr->ntb->msi->desc_changed(dr->ntb->ctx); +} + +static void ntbm_msi_callback_release(struct device *dev, void *res) +{ + struct ntb_msi_devres *dr = res; + + dr->entry->write_msi_msg = NULL; + dr->entry->write_msi_msg_data = NULL; +} + +static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry, + struct ntb_msi_desc *msi_desc) +{ + struct ntb_msi_devres *dr; + + dr = devres_alloc(ntbm_msi_callback_release, + sizeof(struct ntb_msi_devres), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + dr->ntb = ntb; + dr->entry = entry; + dr->msi_desc = msi_desc; + + devres_add(&ntb->dev, dr); + + dr->entry->write_msi_msg = ntb_msi_write_msg; + dr->entry->write_msi_msg_data = dr; + + return 0; +} + +/** + * ntbm_msi_request_threaded_irq() - allocate an MSI interrupt + * @ntb: NTB device context + * @handler: Function to be called when the IRQ occurs + * @thread_fn: Function to be called in a threaded interrupt context. NULL + * for clients which handle everything in @handler + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function + * + * This function assigns an interrupt handler to an unused + * MSI interrupt and returns the descriptor used to trigger + * it. The descriptor can then be sent to a peer to trigger + * the interrupt. + * + * The interrupt resource is managed with devres so it will + * be automatically freed when the NTB device is torn down. + * + * If an IRQ allocated with this function needs to be freed + * separately, ntbm_free_irq() must be used. + * + * Return: IRQ number assigned on success, otherwise a negative error number. + */ +int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + struct msi_desc *entry; + struct irq_desc *desc; + int ret; + + if (!ntb->msi) + return -EINVAL; + + for_each_pci_msi_entry(entry, ntb->pdev) { + desc = irq_to_desc(entry->irq); + if (desc->action) + continue; + + ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler, + thread_fn, 0, name, dev_id); + if (ret) + continue; + + if (ntb_msi_set_desc(ntb, entry, msi_desc)) { + devm_free_irq(&ntb->dev, entry->irq, dev_id); + continue; + } + + ret = ntbm_msi_setup_callback(ntb, entry, msi_desc); + if (ret) { + devm_free_irq(&ntb->dev, entry->irq, dev_id); + return ret; + } + + + return entry->irq; + } + + return -ENODEV; +} +EXPORT_SYMBOL(ntbm_msi_request_threaded_irq); + +static int ntbm_msi_callback_match(struct device *dev, void *res, void *data) +{ + struct ntb_dev *ntb = dev_ntb(dev); + struct ntb_msi_devres *dr = res; + + return dr->ntb == ntb && dr->entry == data; +} + +/** + * ntbm_msi_free_irq() - free an interrupt + * @ntb: NTB device context + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * This function should be used to manually free IRQs allocated with + * ntbm_request_[threaded_]irq(). + */ +void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id) +{ + struct msi_desc *entry = irq_get_msi_desc(irq); + + entry->write_msi_msg = NULL; + entry->write_msi_msg_data = NULL; + + WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release, + ntbm_msi_callback_match, entry)); + + devm_free_irq(&ntb->dev, irq, dev_id); +} +EXPORT_SYMBOL(ntbm_msi_free_irq); + +/** + * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer + * @ntb: NTB device context + * @peer: Peer index + * @desc: MSI descriptor data which triggers the interrupt + * + * This function triggers an interrupt on a peer. It requires + * the descriptor structure to have been passed from that peer + * by some other means. + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc) +{ + int idx; + + if (!ntb->msi) + return -EINVAL; + + idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]); + + iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]); + + return 0; +} +EXPORT_SYMBOL(ntb_msi_peer_trigger); + +/** + * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt + * @ntb: NTB device context + * @peer: Peer index + * @desc: MSI descriptor data which triggers the interrupt + * @msi_addr: Physical address to trigger the interrupt + * + * This function allows using DMA engines to trigger an interrupt + * (for example, trigger an interrupt to process the data after + * sending it). To trigger the interrupt, write @desc.data to the address + * returned in @msi_addr + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr) +{ + int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer; + phys_addr_t mw_phys_addr; + int ret; + + ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL); + if (ret) + return ret; + + if (msi_addr) + *msi_addr = mw_phys_addr + desc->addr_offset; + + return 0; +} +EXPORT_SYMBOL(ntb_msi_peer_addr); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index d4f39ba1d976..40c90ca10729 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -93,6 +93,12 @@ static bool use_dma; module_param(use_dma, bool, 0644); MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); +static bool use_msi; +#ifdef CONFIG_NTB_MSI +module_param(use_msi, bool, 0644); +MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); +#endif + static struct dentry *nt_debugfs_dir; /* Only two-ports NTB devices are supported */ @@ -188,6 +194,11 @@ struct ntb_transport_qp { u64 tx_err_no_buf; u64 tx_memcpy; u64 tx_async; + + bool use_msi; + int msi_irq; + struct ntb_msi_desc msi_desc; + struct ntb_msi_desc peer_msi_desc; }; struct ntb_transport_mw { @@ -221,6 +232,10 @@ struct ntb_transport_ctx { u64 qp_bitmap; u64 qp_bitmap_free; + bool use_msi; + unsigned int msi_spad_offset; + u64 msi_db_mask; + bool link_is_up; struct delayed_work link_work; struct work_struct link_cleanup; @@ -667,6 +682,114 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, return 0; } +static irqreturn_t ntb_transport_isr(int irq, void *dev) +{ + struct ntb_transport_qp *qp = dev; + + tasklet_schedule(&qp->rxc_db_work); + + return IRQ_HANDLED; +} + +static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt, + unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + int spad = qp_num * 2 + nt->msi_spad_offset; + + if (!nt->use_msi) + return; + + if (spad >= ntb_spad_count(nt->ndev)) + return; + + qp->peer_msi_desc.addr_offset = + ntb_peer_spad_read(qp->ndev, PIDX, spad); + qp->peer_msi_desc.data = + ntb_peer_spad_read(qp->ndev, PIDX, spad + 1); + + dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n", + qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data); + + if (qp->peer_msi_desc.addr_offset) { + qp->use_msi = true; + dev_info(&qp->ndev->pdev->dev, + "Using MSI interrupts for QP%d\n", qp_num); + } +} + +static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt, + unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + int spad = qp_num * 2 + nt->msi_spad_offset; + int rc; + + if (!nt->use_msi) + return; + + if (spad >= ntb_spad_count(nt->ndev)) { + dev_warn_once(&qp->ndev->pdev->dev, + "Not enough SPADS to use MSI interrupts\n"); + return; + } + + ntb_spad_write(qp->ndev, spad, 0); + ntb_spad_write(qp->ndev, spad + 1, 0); + + if (!qp->msi_irq) { + qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr, + KBUILD_MODNAME, qp, + &qp->msi_desc); + if (qp->msi_irq < 0) { + dev_warn(&qp->ndev->pdev->dev, + "Unable to allocate MSI interrupt for qp%d\n", + qp_num); + return; + } + } + + rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset); + if (rc) + goto err_free_interrupt; + + rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data); + if (rc) + goto err_free_interrupt; + + dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n", + qp_num, qp->msi_irq, qp->msi_desc.addr_offset, + qp->msi_desc.data); + + return; + +err_free_interrupt: + devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp); +} + +static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt) +{ + int i; + + dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed"); + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_peer_msi(nt, i); +} + +static void ntb_transport_msi_desc_changed(void *data) +{ + struct ntb_transport_ctx *nt = data; + int i; + + dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed"); + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_msi(nt, i); + + ntb_peer_db_set(nt->ndev, nt->msi_db_mask); +} + static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; @@ -905,6 +1028,20 @@ static void ntb_transport_link_work(struct work_struct *work) int rc = 0, i, spad; /* send the local info, in the opposite order of the way we read it */ + + if (nt->use_msi) { + rc = ntb_msi_setup_mws(ndev); + if (rc) { + dev_warn(&pdev->dev, + "Failed to register MSI memory window: %d\n", + rc); + nt->use_msi = false; + } + } + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_msi(nt, i); + for (i = 0; i < nt->mw_count; i++) { size = nt->mw_vec[i].phys_size; @@ -962,6 +1099,7 @@ static void ntb_transport_link_work(struct work_struct *work) struct ntb_transport_qp *qp = &nt->qp_vec[i]; ntb_transport_setup_qp_mw(nt, i); + ntb_transport_setup_qp_peer_msi(nt, i); if (qp->client_ready) schedule_delayed_work(&qp->link_work, 0); @@ -1135,6 +1273,19 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) return -ENOMEM; nt->ndev = ndev; + + /* + * If we are using MSI, and have at least one extra memory window, + * we will reserve the last MW for the MSI window. + */ + if (use_msi && mw_count > 1) { + rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed); + if (!rc) { + mw_count -= 1; + nt->use_msi = true; + } + } + spad_count = ntb_spad_count(ndev); /* Limit the MW's based on the availability of scratchpads */ @@ -1148,6 +1299,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2; nt->mw_count = min(mw_count, max_mw_count_for_spads); + nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH; + nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec), GFP_KERNEL, node); if (!nt->mw_vec) { @@ -1178,6 +1331,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) qp_bitmap = ntb_db_valid_mask(ndev); qp_count = ilog2(qp_bitmap); + if (nt->use_msi) { + qp_count -= 1; + nt->msi_db_mask = 1 << qp_count; + ntb_db_clear_mask(ndev, nt->msi_db_mask); + } + if (max_num_clients && max_num_clients < qp_count) qp_count = max_num_clients; else if (nt->mw_count < qp_count) @@ -1601,7 +1760,10 @@ static void ntb_tx_copy_callback(void *data, iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); - ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); + if (qp->use_msi) + ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); + else + ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); /* The entry length can only be zero if the packet is intended to be a * "link down" or similar. Since no payload is being sent in these @@ -1869,6 +2031,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev, qp->rx_dma_chan = NULL; } + qp->tx_mw_dma_addr = 0; if (qp->tx_dma_chan) { qp->tx_mw_dma_addr = dma_map_resource(qp->tx_dma_chan->device->dev, @@ -2268,6 +2431,11 @@ static void ntb_transport_doorbell_callback(void *data, int vector) u64 db_bits; unsigned int qp_num; + if (ntb_db_read(nt->ndev) & nt->msi_db_mask) { + ntb_transport_msi_peer_desc_changed(nt); + ntb_db_clear(nt->ndev, nt->msi_db_mask); + } + db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & ntb_db_vector_mask(nt->ndev, vector)); diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig index a8db00a7e087..516b991f33b9 100644 --- a/drivers/ntb/test/Kconfig +++ b/drivers/ntb/test/Kconfig @@ -26,3 +26,12 @@ config NTB_PERF to and from the window without additional software interaction. If unsure, say N. + +config NTB_MSI_TEST + tristate "NTB MSI Test Client" + depends on NTB_MSI + help + This tool demonstrates the use of the NTB MSI library to + send MSI interrupts between peers. + + If unsure, say N. diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile index cbfd67622ef7..19ed91d8a3b1 100644 --- a/drivers/ntb/test/Makefile +++ b/drivers/ntb/test/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o obj-$(CONFIG_NTB_TOOL) += ntb_tool.o obj-$(CONFIG_NTB_PERF) += ntb_perf.o +obj-$(CONFIG_NTB_MSI_TEST) += ntb_msi_test.o diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c new file mode 100644 index 000000000000..99d826ed9c34 --- /dev/null +++ b/drivers/ntb/test/ntb_msi_test.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) + +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/ntb.h> +#include <linux/pci.h> +#include <linux/radix-tree.h> +#include <linux/workqueue.h> + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.1"); +MODULE_AUTHOR("Logan Gunthorpe <[email protected]>"); +MODULE_DESCRIPTION("Test for sending MSI interrupts over an NTB memory window"); + +static int num_irqs = 4; +module_param(num_irqs, int, 0644); +MODULE_PARM_DESC(num_irqs, "number of irqs to use"); + +struct ntb_msit_ctx { + struct ntb_dev *ntb; + struct dentry *dbgfs_dir; + struct work_struct setup_work; + + struct ntb_msit_isr_ctx { + int irq_idx; + int irq_num; + int occurrences; + struct ntb_msit_ctx *nm; + struct ntb_msi_desc desc; + } *isr_ctx; + + struct ntb_msit_peer { + struct ntb_msit_ctx *nm; + int pidx; + int num_irqs; + struct completion init_comp; + struct ntb_msi_desc *msi_desc; + } peers[]; +}; + +static struct dentry *ntb_msit_dbgfs_topdir; + +static irqreturn_t ntb_msit_isr(int irq, void *dev) +{ + struct ntb_msit_isr_ctx *isr_ctx = dev; + struct ntb_msit_ctx *nm = isr_ctx->nm; + + dev_dbg(&nm->ntb->dev, "Interrupt Occurred: %d", + isr_ctx->irq_idx); + + isr_ctx->occurrences++; + + return IRQ_HANDLED; +} + +static void ntb_msit_setup_work(struct work_struct *work) +{ + struct ntb_msit_ctx *nm = container_of(work, struct ntb_msit_ctx, + setup_work); + int irq_count = 0; + int irq; + int ret; + uintptr_t i; + + ret = ntb_msi_setup_mws(nm->ntb); + if (ret) { + dev_err(&nm->ntb->dev, "Unable to setup MSI windows: %d\n", + ret); + return; + } + + for (i = 0; i < num_irqs; i++) { + nm->isr_ctx[i].irq_idx = i; + nm->isr_ctx[i].nm = nm; + + if (!nm->isr_ctx[i].irq_num) { + irq = ntbm_msi_request_irq(nm->ntb, ntb_msit_isr, + KBUILD_MODNAME, + &nm->isr_ctx[i], + &nm->isr_ctx[i].desc); + if (irq < 0) + break; + + nm->isr_ctx[i].irq_num = irq; + } + + ret = ntb_spad_write(nm->ntb, 2 * i + 1, + nm->isr_ctx[i].desc.addr_offset); + if (ret) + break; + + ret = ntb_spad_write(nm->ntb, 2 * i + 2, + nm->isr_ctx[i].desc.data); + if (ret) + break; + + irq_count++; + } + + ntb_spad_write(nm->ntb, 0, irq_count); + ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb))); +} + +static void ntb_msit_desc_changed(void *ctx) +{ + struct ntb_msit_ctx *nm = ctx; + int i; + + dev_dbg(&nm->ntb->dev, "MSI Descriptors Changed\n"); + + for (i = 0; i < num_irqs; i++) { + ntb_spad_write(nm->ntb, 2 * i + 1, + nm->isr_ctx[i].desc.addr_offset); + ntb_spad_write(nm->ntb, 2 * i + 2, + nm->isr_ctx[i].desc.data); + } + + ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb))); +} + +static void ntb_msit_link_event(void *ctx) +{ + struct ntb_msit_ctx *nm = ctx; + + if (!ntb_link_is_up(nm->ntb, NULL, NULL)) + return; + + schedule_work(&nm->setup_work); +} + +static void ntb_msit_copy_peer_desc(struct ntb_msit_ctx *nm, int peer) +{ + int i; + struct ntb_msi_desc *desc = nm->peers[peer].msi_desc; + int irq_count = nm->peers[peer].num_irqs; + + for (i = 0; i < irq_count; i++) { + desc[i].addr_offset = ntb_peer_spad_read(nm->ntb, peer, + 2 * i + 1); + desc[i].data = ntb_peer_spad_read(nm->ntb, peer, 2 * i + 2); + } + + dev_info(&nm->ntb->dev, "Found %d interrupts on peer %d\n", + irq_count, peer); + + complete_all(&nm->peers[peer].init_comp); +} + +static void ntb_msit_db_event(void *ctx, int vec) +{ + struct ntb_msit_ctx *nm = ctx; + struct ntb_msi_desc *desc; + u64 peer_mask = ntb_db_read(nm->ntb); + u32 irq_count; + int peer; + + ntb_db_clear(nm->ntb, peer_mask); + + for (peer = 0; peer < sizeof(peer_mask) * 8; peer++) { + if (!(peer_mask & BIT(peer))) + continue; + + irq_count = ntb_peer_spad_read(nm->ntb, peer, 0); + if (irq_count == -1) + continue; + + desc = kcalloc(irq_count, sizeof(*desc), GFP_ATOMIC); + if (!desc) + continue; + + kfree(nm->peers[peer].msi_desc); + nm->peers[peer].msi_desc = desc; + nm->peers[peer].num_irqs = irq_count; + + ntb_msit_copy_peer_desc(nm, peer); + } +} + +static const struct ntb_ctx_ops ntb_msit_ops = { + .link_event = ntb_msit_link_event, + .db_event = ntb_msit_db_event, +}; + +static int ntb_msit_dbgfs_trigger(void *data, u64 idx) +{ + struct ntb_msit_peer *peer = data; + + if (idx >= peer->num_irqs) + return -EINVAL; + + dev_dbg(&peer->nm->ntb->dev, "trigger irq %llu on peer %u\n", + idx, peer->pidx); + + return ntb_msi_peer_trigger(peer->nm->ntb, peer->pidx, + &peer->msi_desc[idx]); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_trigger_fops, NULL, + ntb_msit_dbgfs_trigger, "%llu\n"); + +static int ntb_msit_dbgfs_port_get(void *data, u64 *port) +{ + struct ntb_msit_peer *peer = data; + + *port = ntb_peer_port_number(peer->nm->ntb, peer->pidx); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_port_fops, ntb_msit_dbgfs_port_get, + NULL, "%llu\n"); + +static int ntb_msit_dbgfs_count_get(void *data, u64 *count) +{ + struct ntb_msit_peer *peer = data; + + *count = peer->num_irqs; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_count_fops, ntb_msit_dbgfs_count_get, + NULL, "%llu\n"); + +static int ntb_msit_dbgfs_ready_get(void *data, u64 *ready) +{ + struct ntb_msit_peer *peer = data; + + *ready = try_wait_for_completion(&peer->init_comp); + + return 0; +} + +static int ntb_msit_dbgfs_ready_set(void *data, u64 ready) +{ + struct ntb_msit_peer *peer = data; + + return wait_for_completion_interruptible(&peer->init_comp); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_ready_fops, ntb_msit_dbgfs_ready_get, + ntb_msit_dbgfs_ready_set, "%llu\n"); + +static int ntb_msit_dbgfs_occurrences_get(void *data, u64 *occurrences) +{ + struct ntb_msit_isr_ctx *isr_ctx = data; + + *occurrences = isr_ctx->occurrences; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_occurrences_fops, + ntb_msit_dbgfs_occurrences_get, + NULL, "%llu\n"); + +static int ntb_msit_dbgfs_local_port_get(void *data, u64 *port) +{ + struct ntb_msit_ctx *nm = data; + + *port = ntb_port_number(nm->ntb); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_local_port_fops, + ntb_msit_dbgfs_local_port_get, + NULL, "%llu\n"); + +static void ntb_msit_create_dbgfs(struct ntb_msit_ctx *nm) +{ + struct pci_dev *pdev = nm->ntb->pdev; + char buf[32]; + int i; + struct dentry *peer_dir; + + nm->dbgfs_dir = debugfs_create_dir(pci_name(pdev), + ntb_msit_dbgfs_topdir); + debugfs_create_file("port", 0400, nm->dbgfs_dir, nm, + &ntb_msit_local_port_fops); + + for (i = 0; i < ntb_peer_port_count(nm->ntb); i++) { + nm->peers[i].pidx = i; + nm->peers[i].nm = nm; + init_completion(&nm->peers[i].init_comp); + + snprintf(buf, sizeof(buf), "peer%d", i); + peer_dir = debugfs_create_dir(buf, nm->dbgfs_dir); + + debugfs_create_file_unsafe("trigger", 0200, peer_dir, + &nm->peers[i], + &ntb_msit_trigger_fops); + + debugfs_create_file_unsafe("port", 0400, peer_dir, + &nm->peers[i], &ntb_msit_port_fops); + + debugfs_create_file_unsafe("count", 0400, peer_dir, + &nm->peers[i], + &ntb_msit_count_fops); + + debugfs_create_file_unsafe("ready", 0600, peer_dir, + &nm->peers[i], + &ntb_msit_ready_fops); + } + + for (i = 0; i < num_irqs; i++) { + snprintf(buf, sizeof(buf), "irq%d_occurrences", i); + debugfs_create_file_unsafe(buf, 0400, nm->dbgfs_dir, + &nm->isr_ctx[i], + &ntb_msit_occurrences_fops); + } +} + +static void ntb_msit_remove_dbgfs(struct ntb_msit_ctx *nm) +{ + debugfs_remove_recursive(nm->dbgfs_dir); +} + +static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb) +{ + struct ntb_msit_ctx *nm; + size_t struct_size; + int peers; + int ret; + + peers = ntb_peer_port_count(ntb); + if (peers <= 0) + return -EINVAL; + + if (ntb_spad_is_unsafe(ntb) || ntb_spad_count(ntb) < 2 * num_irqs + 1) { + dev_err(&ntb->dev, "NTB MSI test requires at least %d spads for %d irqs\n", + 2 * num_irqs + 1, num_irqs); + return -EFAULT; + } + + ret = ntb_spad_write(ntb, 0, -1); + if (ret) { + dev_err(&ntb->dev, "Unable to write spads: %d\n", ret); + return ret; + } + + ret = ntb_db_clear_mask(ntb, GENMASK(peers - 1, 0)); + if (ret) { + dev_err(&ntb->dev, "Unable to clear doorbell mask: %d\n", ret); + return ret; + } + + ret = ntb_msi_init(ntb, ntb_msit_desc_changed); + if (ret) { + dev_err(&ntb->dev, "Unable to initialize MSI library: %d\n", + ret); + return ret; + } + + struct_size = sizeof(*nm) + sizeof(*nm->peers) * peers; + + nm = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL); + if (!nm) + return -ENOMEM; + + nm->isr_ctx = devm_kcalloc(&ntb->dev, num_irqs, sizeof(*nm->isr_ctx), + GFP_KERNEL); + if (!nm->isr_ctx) + return -ENOMEM; + + INIT_WORK(&nm->setup_work, ntb_msit_setup_work); + nm->ntb = ntb; + + ntb_msit_create_dbgfs(nm); + + ret = ntb_set_ctx(ntb, nm, &ntb_msit_ops); + if (ret) + goto remove_dbgfs; + + if (!nm->isr_ctx) + goto remove_dbgfs; + + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + + return 0; + +remove_dbgfs: + ntb_msit_remove_dbgfs(nm); + devm_kfree(&ntb->dev, nm->isr_ctx); + devm_kfree(&ntb->dev, nm); + return ret; +} + +static void ntb_msit_remove(struct ntb_client *client, struct ntb_dev *ntb) +{ + struct ntb_msit_ctx *nm = ntb->ctx; + int i; + + ntb_link_disable(ntb); + ntb_db_set_mask(ntb, ntb_db_valid_mask(ntb)); + ntb_msi_clear_mws(ntb); + + for (i = 0; i < ntb_peer_port_count(ntb); i++) + kfree(nm->peers[i].msi_desc); + + ntb_clear_ctx(ntb); + ntb_msit_remove_dbgfs(nm); +} + +static struct ntb_client ntb_msit_client = { + .ops = { + .probe = ntb_msit_probe, + .remove = ntb_msit_remove + } +}; + +static int __init ntb_msit_init(void) +{ + int ret; + + if (debugfs_initialized()) + ntb_msit_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, + NULL); + + ret = ntb_register_client(&ntb_msit_client); + if (ret) + debugfs_remove_recursive(ntb_msit_dbgfs_topdir); + + return ret; +} +module_init(ntb_msit_init); + +static void __exit ntb_msit_exit(void) +{ + ntb_unregister_client(&ntb_msit_client); + debugfs_remove_recursive(ntb_msit_dbgfs_topdir); +} +module_exit(ntb_msit_exit); diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 11a6cd374004..d028331558ea 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -100,7 +100,7 @@ MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); #define DMA_TRIES 100 #define DMA_MDELAY 10 -#define MSG_TRIES 500 +#define MSG_TRIES 1000 #define MSG_UDELAY_LOW 1000 #define MSG_UDELAY_HIGH 2000 @@ -734,8 +734,6 @@ static void perf_disable_service(struct perf_ctx *perf) { int pidx; - ntb_link_disable(perf->ntb); - if (perf->cmd_send == perf_msg_cmd_send) { u64 inbits; @@ -752,6 +750,16 @@ static void perf_disable_service(struct perf_ctx *perf) for (pidx = 0; pidx < perf->pcnt; pidx++) flush_work(&perf->peers[pidx].service); + + for (pidx = 0; pidx < perf->pcnt; pidx++) { + struct perf_peer *peer = &perf->peers[pidx]; + + ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0); + } + + ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); + + ntb_link_disable(perf->ntb); } /*============================================================================== diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cc09b81fc7f4..8f3fbe5ca937 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2311,17 +2311,15 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off); } -static void __nvme_release_subsystem(struct nvme_subsystem *subsys) +static void nvme_release_subsystem(struct device *dev) { + struct nvme_subsystem *subsys = + container_of(dev, struct nvme_subsystem, dev); + ida_simple_remove(&nvme_subsystems_ida, subsys->instance); kfree(subsys); } -static void nvme_release_subsystem(struct device *dev) -{ - __nvme_release_subsystem(container_of(dev, struct nvme_subsystem, dev)); -} - static void nvme_destroy_subsystem(struct kref *ref) { struct nvme_subsystem *subsys = @@ -2477,7 +2475,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) mutex_lock(&nvme_subsystems_lock); found = __nvme_find_get_subsystem(subsys->subnqn); if (found) { - __nvme_release_subsystem(subsys); + put_device(&subsys->dev); subsys = found; if (!nvme_validate_cntlid(subsys, ctrl, id)) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a9a927677970..4f0d0d12744e 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -12,11 +12,6 @@ module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); -inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) -{ - return multipath && ctrl->subsys && (ctrl->subsys->cmic & (1 << 3)); -} - /* * If multipathing is enabled we need to always use the subsystem instance * number for numbering our devices to avoid conflicts between subsystems that @@ -622,7 +617,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { int error; - if (!nvme_ctrl_use_ana(ctrl)) + /* check if multipath is enabled and we have the capability */ + if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3))) return 0; ctrl->anacap = id->anacap; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 716a876119c8..26b563f9985b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -485,7 +485,11 @@ extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH -bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl); +static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) +{ + return ctrl->ana_log_buf != NULL; +} + void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, struct nvme_ctrl *ctrl, int *flags); void nvme_failover_req(struct request *req); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bb970ca82517..db160cee42ad 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2254,9 +2254,7 @@ static int nvme_dev_add(struct nvme_dev *dev) if (!dev->ctrl.tagset) { dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; - dev->tagset.nr_maps = 1; /* default */ - if (dev->io_queues[HCTX_TYPE_READ]) - dev->tagset.nr_maps++; + dev->tagset.nr_maps = 2; /* default + read */ if (dev->io_queues[HCTX_TYPE_POLL]) dev->tagset.nr_maps++; dev->tagset.timeout = NVME_IO_TIMEOUT; @@ -3029,6 +3027,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ .driver_data = NVME_QUIRK_LIGHTNVM, }, + { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 59a6d232f77a..0884bedcfc7a 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) { + if (desc->msi_attrib.is_virtual) + return NULL; + return desc->mask_base + desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; } @@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) { u32 mask_bits = desc->masked; + void __iomem *desc_addr; if (pci_msi_ignore_mask) return 0; + desc_addr = pci_msix_desc_addr(desc); + if (!desc_addr) + return 0; mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; if (flag) mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; - writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL); + + writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); return mask_bits; } @@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) if (entry->msi_attrib.is_msix) { void __iomem *base = pci_msix_desc_addr(entry); + if (!base) { + WARN_ON(1); + return; + } + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); msg->data = readl(base + PCI_MSIX_ENTRY_DATA); @@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) } else if (entry->msi_attrib.is_msix) { void __iomem *base = pci_msix_desc_addr(entry); + if (!base) + goto skip; + writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); writel(msg->data, base + PCI_MSIX_ENTRY_DATA); @@ -327,7 +343,13 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) msg->data); } } + +skip: entry->msg = *msg; + + if (entry->write_msi_msg) + entry->write_msi_msg(entry, entry->write_msi_msg_data); + } void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) @@ -550,6 +572,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); + entry->msi_attrib.is_virtual = 0; entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT); entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ @@ -674,6 +697,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct irq_affinity_desc *curmsk, *masks = NULL; struct msi_desc *entry; int ret, i; + int vec_count = pci_msix_vec_count(dev); if (affd) masks = irq_create_affinity_masks(nvec, affd); @@ -696,6 +720,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.entry_nr = entries[i].entry; else entry->msi_attrib.entry_nr = i; + + entry->msi_attrib.is_virtual = + entry->msi_attrib.entry_nr >= vec_count; + entry->msi_attrib.default_irq = dev->irq; entry->mask_base = base; @@ -714,12 +742,19 @@ static void msix_program_entries(struct pci_dev *dev, { struct msi_desc *entry; int i = 0; + void __iomem *desc_addr; for_each_pci_msi_entry(entry, dev) { if (entries) entries[i++].vector = entry->irq; - entry->masked = readl(pci_msix_desc_addr(entry) + - PCI_MSIX_ENTRY_VECTOR_CTRL); + + desc_addr = pci_msix_desc_addr(entry); + if (desc_addr) + entry->masked = readl(desc_addr + + PCI_MSIX_ENTRY_VECTOR_CTRL); + else + entry->masked = 0; + msix_mask_irq(entry, 1); } } @@ -932,7 +967,7 @@ int pci_msix_vec_count(struct pci_dev *dev) EXPORT_SYMBOL(pci_msix_vec_count); static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int nvec, struct irq_affinity *affd) + int nvec, struct irq_affinity *affd, int flags) { int nr_entries; int i, j; @@ -943,7 +978,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, nr_entries = pci_msix_vec_count(dev); if (nr_entries < 0) return nr_entries; - if (nvec > nr_entries) + if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL)) return nr_entries; if (entries) { @@ -1079,7 +1114,8 @@ EXPORT_SYMBOL(pci_enable_msi); static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, - int maxvec, struct irq_affinity *affd) + int maxvec, struct irq_affinity *affd, + int flags) { int rc, nvec = maxvec; @@ -1096,7 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, return -ENOSPC; } - rc = __pci_enable_msix(dev, entries, nvec, affd); + rc = __pci_enable_msix(dev, entries, nvec, affd, flags); if (rc == 0) return nvec; @@ -1127,7 +1163,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec) { - return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL); + return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); } EXPORT_SYMBOL(pci_enable_msix_range); @@ -1167,7 +1203,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, if (flags & PCI_IRQ_MSIX) { msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs, - max_vecs, affd); + max_vecs, affd, flags); if (msix_vecs > 0) return msix_vecs; } diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index bebbde4ebec0..8c94cd3fd1f2 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -30,6 +30,10 @@ module_param(use_dma_mrpc, bool, 0644); MODULE_PARM_DESC(use_dma_mrpc, "Enable the use of the DMA MRPC feature"); +static int nirqs = 32; +module_param(nirqs, int, 0644); +MODULE_PARM_DESC(nirqs, "number of interrupts to allocate (more may be useful for NTB applications)"); + static dev_t switchtec_devt; static DEFINE_IDA(switchtec_minor_ida); @@ -1263,8 +1267,12 @@ static int switchtec_init_isr(struct switchtec_dev *stdev) int dma_mrpc_irq; int rc; - nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, 4, - PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nirqs < 4) + nirqs = 4; + + nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, nirqs, + PCI_IRQ_MSIX | PCI_IRQ_MSI | + PCI_IRQ_VIRTUAL); if (nvecs < 0) return nvecs; diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 9fd6dd342169..6df481896b5f 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1454,7 +1454,7 @@ static void __exit rapl_exit(void) unregister_pm_notifier(&rapl_pm_notifier); } -module_init(rapl_init); +fs_initcall(rapl_init); module_exit(rapl_exit); MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code"); diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 540e8aafc990..f808c5fa9838 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -671,7 +671,7 @@ static int __init powercap_init(void) return class_register(&powercap_class); } -device_initcall(powercap_init); +fs_initcall(powercap_init); MODULE_DESCRIPTION("PowerCap sysfs Driver"); MODULE_AUTHOR("Srinivas Pandruvada <[email protected]>"); diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index e8fc28dba8df..96f0d34e9459 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/kthread.h> +#include <linux/bug.h> #include "zfcp_ext.h" #include "zfcp_reqlist.h" @@ -217,6 +218,12 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(enum zfcp_erp_act_type need, struct zfcp_erp_action *erp_action; struct zfcp_scsi_dev *zfcp_sdev; + if (WARN_ON_ONCE(need != ZFCP_ERP_ACTION_REOPEN_LUN && + need != ZFCP_ERP_ACTION_REOPEN_PORT && + need != ZFCP_ERP_ACTION_REOPEN_PORT_FORCED && + need != ZFCP_ERP_ACTION_REOPEN_ADAPTER)) + return NULL; + switch (need) { case ZFCP_ERP_ACTION_REOPEN_LUN: zfcp_sdev = sdev_to_zfcp(sdev); diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index d94496ee6883..296bbc3c4606 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/blktrace_api.h> +#include <linux/types.h> #include <linux/slab.h> #include <scsi/fc/fc_els.h> #include "zfcp_ext.h" @@ -741,6 +742,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio, static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) { + const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req); struct zfcp_adapter *adapter = req->adapter; struct zfcp_qdio *qdio = adapter->qdio; int req_id = req->req_id; @@ -757,8 +759,20 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) return -EIO; } + /* + * NOTE: DO NOT TOUCH ASYNC req PAST THIS POINT. + * ONLY TOUCH SYNC req AGAIN ON req->completion. + * + * The request might complete and be freed concurrently at any point + * now. This is not protected by the QDIO-lock (req_q_lock). So any + * uncontrolled access after this might result in an use-after-free bug. + * Only if the request doesn't have ZFCP_STATUS_FSFREQ_CLEANUP set, and + * when it is completed via req->completion, is it safe to use req + * again. + */ + /* Don't increase for unsolicited status */ - if (!zfcp_fsf_req_is_status_read_buffer(req)) + if (!is_srb) adapter->fsf_req_seq_no++; adapter->req_no++; @@ -805,6 +819,7 @@ int zfcp_fsf_status_read(struct zfcp_qdio *qdio) retval = zfcp_fsf_req_send(req); if (retval) goto failed_req_send; + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ goto out; @@ -914,8 +929,10 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *scmnd) req->qtcb->bottom.support.req_handle = (u64) old_req_id; zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT); - if (!zfcp_fsf_req_send(req)) + if (!zfcp_fsf_req_send(req)) { + /* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */ goto out; + } out_error_free: zfcp_fsf_req_free(req); @@ -1098,6 +1115,7 @@ int zfcp_fsf_send_ct(struct zfcp_fc_wka_port *wka_port, ret = zfcp_fsf_req_send(req); if (ret) goto failed_send; + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ goto out; @@ -1198,6 +1216,7 @@ int zfcp_fsf_send_els(struct zfcp_adapter *adapter, u32 d_id, ret = zfcp_fsf_req_send(req); if (ret) goto failed_send; + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ goto out; @@ -1243,6 +1262,7 @@ int zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1279,8 +1299,10 @@ int zfcp_fsf_exchange_config_data_sync(struct zfcp_qdio *qdio, zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); retval = zfcp_fsf_req_send(req); spin_unlock_irq(&qdio->req_q_lock); - if (!retval) + if (!retval) { + /* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */ wait_for_completion(&req->completion); + } zfcp_fsf_req_free(req); return retval; @@ -1330,6 +1352,7 @@ int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1372,8 +1395,10 @@ int zfcp_fsf_exchange_port_data_sync(struct zfcp_qdio *qdio, retval = zfcp_fsf_req_send(req); spin_unlock_irq(&qdio->req_q_lock); - if (!retval) + if (!retval) { + /* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */ wait_for_completion(&req->completion); + } zfcp_fsf_req_free(req); @@ -1493,6 +1518,7 @@ int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action) erp_action->fsf_req_id = 0; put_device(&port->dev); } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1557,6 +1583,7 @@ int zfcp_fsf_close_port(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1600,6 +1627,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; struct zfcp_fsf_req *req; + unsigned long req_id = 0; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1622,14 +1650,17 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) hton24(req->qtcb->bottom.support.d_id, wka_port->d_id); req->data = wka_port; + req_id = req->req_id; + zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); retval = zfcp_fsf_req_send(req); if (retval) zfcp_fsf_req_free(req); + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); if (!retval) - zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id); + zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req_id); return retval; } @@ -1655,6 +1686,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; struct zfcp_fsf_req *req; + unsigned long req_id = 0; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1677,14 +1709,17 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) req->data = wka_port; req->qtcb->header.port_handle = wka_port->handle; + req_id = req->req_id; + zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); retval = zfcp_fsf_req_send(req); if (retval) zfcp_fsf_req_free(req); + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); if (!retval) - zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req->req_id); + zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req_id); return retval; } @@ -1776,6 +1811,7 @@ int zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1899,6 +1935,7 @@ int zfcp_fsf_open_lun(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1987,6 +2024,7 @@ int zfcp_fsf_close_lun(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -2299,6 +2337,7 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd) retval = zfcp_fsf_req_send(req); if (unlikely(retval)) goto failed_scsi_cmnd; + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ goto out; @@ -2373,8 +2412,10 @@ struct zfcp_fsf_req *zfcp_fsf_fcp_task_mgmt(struct scsi_device *sdev, zfcp_fc_fcp_tm(fcp_cmnd, sdev, tm_flags); zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT); - if (!zfcp_fsf_req_send(req)) + if (!zfcp_fsf_req_send(req)) { + /* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */ goto out; + } zfcp_fsf_req_free(req); req = NULL; diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index aeda53901064..c00e3dd57990 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -185,7 +185,7 @@ zalon7xx-objs := zalon.o ncr53c8xx.o # Files generated that shall be removed upon make clean clean-files := 53c700_d.h 53c700_u.h scsi_devinfo_tbl.c -$(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h +$(obj)/53c700.o: $(obj)/53c700_d.h $(obj)/scsi_sysfs.o: $(obj)/scsi_devinfo_tbl.c diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index ff0d8c6a8d0c..55522b7162d3 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -462,6 +462,9 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) else shost->dma_boundary = 0xffffffff; + if (sht->virt_boundary_mask) + shost->virt_boundary_mask = sht->virt_boundary_mask; + device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 8e1053bdd843..52e866659853 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -2591,7 +2591,7 @@ void fc_exch_recv(struct fc_lport *lport, struct fc_frame *fp) /* lport lock ? */ if (!lport || lport->state == LPORT_ST_DISABLED) { - FC_LPORT_DBG(lport, "Receiving frames for an lport that " + FC_LIBFC_DBG("Receiving frames for an lport that " "has not been initialized correctly\n"); fc_frame_free(fp); return; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 4f339f939a51..bec83eb8ab87 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -414,7 +414,6 @@ static void sas_wait_eh(struct domain_device *dev) goto retry; } } -EXPORT_SYMBOL(sas_wait_eh); static int sas_queue_reset(struct domain_device *dev, int reset_type, u64 lun, int wait) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h index 2322ddb085c0..34070874616d 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.h +++ b/drivers/scsi/lpfc/lpfc_debugfs.h @@ -330,7 +330,7 @@ enum { * This function dumps an entry indexed by @idx from a queue specified by the * queue descriptor @q. **/ -static inline void +static void lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx) { char line_buf[LPFC_LBUF_SZ]; diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index ca724fe91b8d..a14e8344822b 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -21,8 +21,8 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "07.710.06.00-rc1" -#define MEGASAS_RELDATE "June 18, 2019" +#define MEGASAS_VERSION "07.710.50.00-rc1" +#define MEGASAS_RELDATE "June 28, 2019" /* * Device IDs diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 80ab9700f1de..b2339d04a700 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -105,6 +105,10 @@ MODULE_PARM_DESC(perf_mode, "Performance mode (only for Aero adapters), options: "default mode is 'balanced'" ); +int event_log_level = MFI_EVT_CLASS_CRITICAL; +module_param(event_log_level, int, 0644); +MODULE_PARM_DESC(event_log_level, "Asynchronous event logging level- range is: -2(CLASS_DEBUG) to 4(CLASS_DEAD), Default: 2(CLASS_CRITICAL)"); + MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("[email protected]"); @@ -280,7 +284,7 @@ void megasas_set_dma_settings(struct megasas_instance *instance, } } -void +static void megasas_issue_dcmd(struct megasas_instance *instance, struct megasas_cmd *cmd) { instance->instancet->fire_cmd(instance, @@ -404,7 +408,13 @@ megasas_decode_evt(struct megasas_instance *instance) union megasas_evt_class_locale class_locale; class_locale.word = le32_to_cpu(evt_detail->cl.word); - if (class_locale.members.class >= MFI_EVT_CLASS_CRITICAL) + if ((event_log_level < MFI_EVT_CLASS_DEBUG) || + (event_log_level > MFI_EVT_CLASS_DEAD)) { + printk(KERN_WARNING "megaraid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n"); + event_log_level = MFI_EVT_CLASS_CRITICAL; + } + + if (class_locale.members.class >= event_log_level) dev_info(&instance->pdev->dev, "%d (%s/0x%04x/%s) - %s\n", le32_to_cpu(evt_detail->seq_num), format_timestamp(le32_to_cpu(evt_detail->time_stamp)), @@ -2237,7 +2247,7 @@ megasas_internal_reset_defer_cmds(struct megasas_instance *instance); static void process_fw_state_change_wq(struct work_struct *work); -void megasas_do_ocr(struct megasas_instance *instance) +static void megasas_do_ocr(struct megasas_instance *instance) { if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) || (instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) || @@ -3303,7 +3313,7 @@ static DEVICE_ATTR_RO(fw_cmds_outstanding); static DEVICE_ATTR_RO(dump_system_regs); static DEVICE_ATTR_RO(raid_map_id); -struct device_attribute *megaraid_host_attrs[] = { +static struct device_attribute *megaraid_host_attrs[] = { &dev_attr_fw_crash_buffer_size, &dev_attr_fw_crash_buffer, &dev_attr_fw_crash_state, @@ -3334,6 +3344,7 @@ static struct scsi_host_template megasas_template = { .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, .change_queue_depth = scsi_change_queue_depth, + .max_segment_size = 0xffffffff, .no_write_same = 1, }; @@ -5933,7 +5944,8 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->is_rdpq = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ? 1 : 0; - if (!instance->msix_combined) { + if (instance->adapter_type >= INVADER_SERIES && + !instance->msix_combined) { instance->msix_load_balance = true; instance->smp_affinity_enable = false; } @@ -6546,7 +6558,8 @@ megasas_get_target_prop(struct megasas_instance *instance, int ret; struct megasas_cmd *cmd; struct megasas_dcmd_frame *dcmd; - u16 targetId = (sdev->channel % 2) + sdev->id; + u16 targetId = ((sdev->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + + sdev->id; cmd = megasas_get_cmd(instance); @@ -8748,6 +8761,12 @@ static int __init megasas_init(void) goto err_pcidrv; } + if ((event_log_level < MFI_EVT_CLASS_DEBUG) || + (event_log_level > MFI_EVT_CLASS_DEAD)) { + printk(KERN_WARNING "megarid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n"); + event_log_level = MFI_EVT_CLASS_CRITICAL; + } + rval = driver_create_file(&megasas_pci_driver.driver, &driver_attr_version); if (rval) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 27c731a3fb49..717ba0845a2a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -10238,6 +10238,7 @@ static struct scsi_host_template mpt3sas_driver_template = { .this_id = -1, .sg_tablesize = MPT3SAS_SG_DEPTH, .max_sectors = 32767, + .max_segment_size = 0xffffffff, .cmd_per_lun = 7, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index dd38c356a1a4..9453705f643a 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -888,6 +888,8 @@ static void pm8001_dev_gone_notify(struct domain_device *dev) spin_unlock_irqrestore(&pm8001_ha->lock, flags); pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev , dev, 1, 0); + while (pm8001_dev->running_req) + msleep(20); spin_lock_irqsave(&pm8001_ha->lock, flags); } PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id); @@ -1256,8 +1258,10 @@ int pm8001_abort_task(struct sas_task *task) PM8001_MSG_DBG(pm8001_ha, pm8001_printk("Waiting for Port reset\n")); wait_for_completion(&completion_reset); - if (phy->port_reset_status) + if (phy->port_reset_status) { + pm8001_dev_gone_notify(dev); goto out; + } /* * 4. SATA Abort ALL diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 1128d86d241a..73261902d75d 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -604,7 +604,7 @@ static void update_main_config_table(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer &= 0x0000ffff; pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer |= - 0x140000; + CHIP_8006_PORT_RECOVERY_TIMEOUT; } pm8001_mw32(address, MAIN_PORT_RECOVERY_TIMER, pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer); diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index 84d7426441bf..dc9ab7689060 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -230,6 +230,8 @@ #define SAS_MAX_AIP 0x200000 #define IT_NEXUS_TIMEOUT 0x7D0 #define PORT_RECOVERY_TIMEOUT ((IT_NEXUS_TIMEOUT/100) + 30) +/* Port recovery timeout, 10000 ms for PM8006 controller */ +#define CHIP_8006_PORT_RECOVERY_TIMEOUT 0x640000 #ifdef __LITTLE_ENDIAN_BITFIELD struct sas_identify_frame_local { diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index a08ff3bd6310..df14597752ec 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -239,6 +239,8 @@ static struct { {"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"SanDisk", "Cruzer Blade", NULL, BLIST_TRY_VPD_PAGES | + BLIST_INQUIRY_36}, {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36}, {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN}, {"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */ diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e1da8c70a266..9381171c2fc0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -84,11 +84,11 @@ int scsi_init_sense_cache(struct Scsi_Host *shost) struct kmem_cache *cache; int ret = 0; + mutex_lock(&scsi_sense_cache_mutex); cache = scsi_select_sense_cache(shost->unchecked_isa_dma); if (cache) - return 0; + goto exit; - mutex_lock(&scsi_sense_cache_mutex); if (shost->unchecked_isa_dma) { scsi_sense_isadma_cache = kmem_cache_create("scsi_sense_cache(DMA)", @@ -104,7 +104,7 @@ int scsi_init_sense_cache(struct Scsi_Host *shost) if (!scsi_sense_cache) ret = -ENOMEM; } - + exit: mutex_unlock(&scsi_sense_cache_mutex); return ret; } @@ -1452,7 +1452,7 @@ static void scsi_softirq_done(struct request *rq) disposition = scsi_decide_disposition(cmd); if (disposition != SUCCESS && time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { - sdev_printk(KERN_ERR, cmd->device, + scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n", wait_for/HZ); disposition = SUCCESS; @@ -1784,6 +1784,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); } + shost->max_sectors = min_t(unsigned int, shost->max_sectors, + dma_max_mapping_size(dev) << SECTOR_SHIFT); blk_queue_max_hw_sectors(q, shost->max_sectors); if (shost->unchecked_isa_dma) blk_queue_bounce_limit(q, BLK_BOUNCE_ISA); @@ -1791,7 +1793,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) dma_set_seg_boundary(dev, shost->dma_boundary); blk_queue_max_segment_size(q, shost->max_segment_size); - dma_set_max_seg_size(dev, shost->max_segment_size); + blk_queue_virt_boundary(q, shost->virt_boundary_mask); + dma_set_max_seg_size(dev, queue_max_segment_size(q)); /* * Set a reasonable default alignment: The larger of 32-byte (dword), diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index db16c19e05c4..5d6ff3931632 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -461,7 +461,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) { struct gendisk *disk = sdkp->disk; unsigned int nr_zones; - u32 zone_blocks; + u32 zone_blocks = 0; int ret; if (!sd_is_zoned(sdkp)) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index c2b6a0ca6933..ed8b9ac805e6 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1423,9 +1423,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice) { blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ)); - /* Ensure there are no gaps in presented sgls */ - blk_queue_virt_boundary(sdevice->request_queue, PAGE_SIZE - 1); - sdevice->no_write_same = 1; /* @@ -1698,6 +1695,8 @@ static struct scsi_host_template scsi_driver = { .this_id = -1, /* Make sure we dont get a sg segment crosses a page boundary */ .dma_boundary = PAGE_SIZE-1, + /* Ensure there are no gaps in presented sgls */ + .virt_boundary_mask = PAGE_SIZE-1, .no_write_same = 1, .track_queue_depth = 1, .change_queue_depth = storvsc_change_queue_depth, diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 04d3686511c8..e274053109d0 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4587,8 +4587,6 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) struct request_queue *q = sdev->request_queue; blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1); - blk_queue_max_segment_size(q, PRDT_DATA_BYTE_COUNT_MAX); - return 0; } @@ -7022,6 +7020,7 @@ static struct scsi_host_template ufshcd_driver_template = { .sg_tablesize = SG_ALL, .cmd_per_lun = UFSHCD_CMD_PER_LUN, .can_queue = UFSHCD_CAN_QUEUE, + .max_segment_size = PRDT_DATA_BYTE_COUNT_MAX, .max_host_blocked = 1, .track_queue_depth = 1, .sdev_groups = ufshcd_driver_groups, diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index 213ab3cc6b80..d3446acf9bbd 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -487,6 +487,7 @@ static int proc_thermal_rapl_add(struct pci_dev *pdev, rapl_mmio_cpu_online, rapl_mmio_cpu_down_prep); if (ret < 0) { powercap_unregister_control_type(rapl_mmio_priv.control_type); + rapl_mmio_priv.control_type = NULL; return ret; } rapl_mmio_priv.pcap_rapl_online = ret; @@ -496,6 +497,9 @@ static int proc_thermal_rapl_add(struct pci_dev *pdev, static void proc_thermal_rapl_remove(void) { + if (IS_ERR_OR_NULL(rapl_mmio_priv.control_type)) + return; + cpuhp_remove_state(rapl_mmio_priv.pcap_rapl_online); powercap_unregister_control_type(rapl_mmio_priv.control_type); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 4707dfff991b..c2a85b587922 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -345,15 +345,24 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct bio *bio; bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; bool is_read = (iov_iter_rw(iter) == READ), is_sync; + bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0; loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; - int ret = 0; + gfp_t gfp; + ssize_t ret; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); + if (nowait) + gfp = GFP_NOWAIT; + else + gfp = GFP_KERNEL; + + bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool); + if (!bio) + return -EAGAIN; dio = container_of(bio, struct blkdev_dio, bio); dio->is_sync = is_sync = is_sync_kiocb(iocb); @@ -375,7 +384,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (!is_poll) blk_start_plug(&plug); + ret = 0; for (;;) { + int err; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = pos >> 9; bio->bi_write_hint = iocb->ki_hint; @@ -383,8 +395,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio->bi_end_io = blkdev_bio_end_io; bio->bi_ioprio = iocb->ki_ioprio; - ret = bio_iov_iter_get_pages(bio, iter); - if (unlikely(ret)) { + err = bio_iov_iter_get_pages(bio, iter); + if (unlikely(err)) { + if (!ret) + ret = err; bio->bi_status = BLK_STS_IOERR; bio_endio(bio); break; @@ -399,6 +413,14 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) task_io_account_write(bio->bi_iter.bi_size); } + /* + * Tell underlying layer to not block for resource shortage. + * And if we would have blocked, return error inline instead + * of through the bio->bi_end_io() callback. + */ + if (nowait) + bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE); + dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; @@ -412,6 +434,11 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } qc = submit_bio(bio); + if (qc == BLK_QC_T_EAGAIN) { + if (!ret) + ret = -EAGAIN; + goto error; + } if (polled) WRITE_ONCE(iocb->ki_cookie, qc); @@ -432,8 +459,20 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) atomic_inc(&dio->ref); } - submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, nr_pages); + qc = submit_bio(bio); + if (qc == BLK_QC_T_EAGAIN) { + if (!ret) + ret = -EAGAIN; + goto error; + } + ret += bio->bi_iter.bi_size; + + bio = bio_alloc(gfp, nr_pages); + if (!bio) { + if (!ret) + ret = -EAGAIN; + goto error; + } } if (!is_poll) @@ -453,13 +492,16 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } __set_current_state(TASK_RUNNING); +out: if (!ret) ret = blk_status_to_errno(dio->bio.bi_status); - if (likely(!ret)) - ret = dio->size; bio_put(&dio->bio); return ret; +error: + if (!is_poll) + blk_finish_plug(&plug); + goto out; } static ssize_t diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 212b4a854f2c..38651fae7f21 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -4,6 +4,7 @@ config BTRFS_FS tristate "Btrfs filesystem support" select CRYPTO select CRYPTO_CRC32C + select LIBCRC32C select ZLIB_INFLATE select ZLIB_DEFLATE select LZO_COMPRESS diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41a2bd2e0c56..5f7ee70b3d1a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4106,6 +4106,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_destroy(&fs_info->dev_replace.bio_counter); cleanup_srcu_struct(&fs_info->subvol_srcu); + btrfs_free_csum_hash(fs_info); btrfs_free_stripe_hash_table(fs_info); btrfs_free_ref_cache(fs_info); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1af069a9a0c7..ee582a36653d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -395,10 +395,31 @@ static noinline int add_async_extent(struct async_chunk *cow, return 0; } +/* + * Check if the inode has flags compatible with compression + */ +static inline bool inode_can_compress(struct inode *inode) +{ + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW || + BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) + return false; + return true; +} + +/* + * Check if the inode needs to be submitted to compression, based on mount + * options, defragmentation, properties or heuristics. + */ static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + if (!inode_can_compress(inode)) { + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: unexpected compression for ino %llu\n", + btrfs_ino(BTRFS_I(inode))); + return 0; + } /* force compress */ if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) return 1; @@ -1631,7 +1652,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page, } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - } else if (!inode_need_compress(inode, start, end)) { + } else if (!inode_can_compress(inode) || + !inode_need_compress(inode, start, end)) { ret = cow_file_range(inode, locked_page, start, end, end, page_started, nr_written, 1, NULL); } else { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a13ddba1ebc3..d74b74ca07af 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5941,6 +5941,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 stripe_len; u64 raid56_full_stripe_start = (u64)-1; int data_stripes; + int ret = 0; ASSERT(op != BTRFS_MAP_DISCARD); @@ -5961,8 +5962,8 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, btrfs_crit(fs_info, "stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu", stripe_offset, offset, em->start, logical, stripe_len); - free_extent_map(em); - return -EINVAL; + ret = -EINVAL; + goto out; } /* stripe_offset is the offset of this block in its stripe */ @@ -6009,7 +6010,10 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, io_geom->stripe_offset = stripe_offset; io_geom->raid56_stripe_offset = raid56_full_stripe_start; - return 0; +out: + /* once for us */ + free_extent_map(em); + return ret; } static int __btrfs_map_block(struct btrfs_fs_info *fs_info, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 270d3c58fb3b..3289b566463f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1104,6 +1104,10 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, goto out; } + rc = -EOPNOTSUPP; + if (!target_tcon->ses->server->ops->copychunk_range) + goto out; + /* * Note: cifs case is easier than btrfs since server responsible for * checks for proper open modes and file type and if it wants @@ -1115,11 +1119,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); - if (target_tcon->ses->server->ops->copychunk_range) + rc = file_modified(dst_file); + if (!rc) rc = target_tcon->ses->server->ops->copychunk_range(xid, smb_file_src, smb_file_target, off, len, destoff); - else - rc = -EOPNOTSUPP; + + file_accessed(src_file); /* force revalidate of size and timestamps of target file now * that target is updated on the server diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index aea005703785..4b21a90015a9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -152,5 +152,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.20" +#define CIFS_VERSION "2.21" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1bffe029fb66..56ca4b8ccaba 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2406,6 +2406,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) struct inode *inode = d_inode(direntry); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifsFileInfo *wfile; + struct cifs_tcon *tcon; char *full_path = NULL; int rc = -EACCES; __u32 dosattr = 0; @@ -2452,6 +2454,20 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) mapping_set_error(inode->i_mapping, rc); rc = 0; + if (attrs->ia_valid & ATTR_MTIME) { + rc = cifs_get_writable_file(cifsInode, false, &wfile); + if (!rc) { + tcon = tlink_tcon(wfile->tlink); + rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid); + cifsFileInfo_put(wfile); + if (rc) + return rc; + } else if (rc != -EBADF) + return rc; + else + rc = 0; + } + if (attrs->ia_valid & ATTR_SIZE) { rc = cifs_set_file_size(inode, attrs, xid, full_path); if (rc != 0) diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 54bffb2a1786..e6a1fc72018f 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -88,14 +88,20 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, } if (buf) { - /* open response does not have IndexNumber field - get it */ - rc = SMB2_get_srv_num(xid, oparms->tcon, fid->persistent_fid, + /* if open response does not have IndexNumber field - get it */ + if (smb2_data->IndexNumber == 0) { + rc = SMB2_get_srv_num(xid, oparms->tcon, + fid->persistent_fid, fid->volatile_fid, &smb2_data->IndexNumber); - if (rc) { - /* let get_inode_info disable server inode numbers */ - smb2_data->IndexNumber = 0; - rc = 0; + if (rc) { + /* + * let get_inode_info disable server inode + * numbers + */ + smb2_data->IndexNumber = 0; + rc = 0; + } } move_smb2_info_to_cifs(buf, smb2_data); } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0cdc4e47ca87..a5bc1b671c12 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -694,8 +694,51 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) smb2_set_related(&rqst[1]); + /* + * We do not hold the lock for the open because in case + * SMB2_open needs to reconnect, it will end up calling + * cifs_mark_open_files_invalid() which takes the lock again + * thus causing a deadlock + */ + + mutex_unlock(&tcon->crfid.fid_mutex); rc = compound_send_recv(xid, ses, flags, 2, rqst, resp_buftype, rsp_iov); + mutex_lock(&tcon->crfid.fid_mutex); + + /* + * Now we need to check again as the cached root might have + * been successfully re-opened from a concurrent process + */ + + if (tcon->crfid.is_valid) { + /* work was already done */ + + /* stash fids for close() later */ + struct cifs_fid fid = { + .persistent_fid = pfid->persistent_fid, + .volatile_fid = pfid->volatile_fid, + }; + + /* + * caller expects this func to set pfid to a valid + * cached root, so we copy the existing one and get a + * reference. + */ + memcpy(pfid, tcon->crfid.fid, sizeof(*pfid)); + kref_get(&tcon->crfid.refcount); + + mutex_unlock(&tcon->crfid.fid_mutex); + + if (rc == 0) { + /* close extra handle outside of crit sec */ + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + } + goto oshr_free; + } + + /* Cached root is still invalid, continue normaly */ + if (rc) goto oshr_exit; @@ -711,11 +754,12 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) tcon->crfid.is_valid = true; kref_init(&tcon->crfid.refcount); + /* BB TBD check to see if oplock level check can be removed below */ if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) { kref_get(&tcon->crfid.refcount); - oplock = smb2_parse_lease_state(server, o_rsp, - &oparms.fid->epoch, - oparms.fid->lease_key); + smb2_parse_contexts(server, o_rsp, + &oparms.fid->epoch, + oparms.fid->lease_key, &oplock, NULL); } else goto oshr_exit; @@ -729,8 +773,9 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) (char *)&tcon->crfid.file_all_info)) tcon->crfid.file_all_info_is_valid = 1; - oshr_exit: +oshr_exit: mutex_unlock(&tcon->crfid.fid_mutex); +oshr_free: SMB2_open_free(&rqst[0]); SMB2_query_info_free(&rqst[1]); free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index f58e4dc3987b..c8cd7b6cdda2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1873,10 +1873,21 @@ create_reconnect_durable_buf(struct cifs_fid *fid) return buf; } -__u8 -smb2_parse_lease_state(struct TCP_Server_Info *server, +static void +parse_query_id_ctxt(struct create_context *cc, struct smb2_file_all_info *buf) +{ + struct create_on_disk_id *pdisk_id = (struct create_on_disk_id *)cc; + + cifs_dbg(FYI, "parse query id context 0x%llx 0x%llx\n", + pdisk_id->DiskFileId, pdisk_id->VolumeId); + buf->IndexNumber = pdisk_id->DiskFileId; +} + +void +smb2_parse_contexts(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key) + unsigned int *epoch, char *lease_key, __u8 *oplock, + struct smb2_file_all_info *buf) { char *data_offset; struct create_context *cc; @@ -1884,15 +1895,24 @@ smb2_parse_lease_state(struct TCP_Server_Info *server, unsigned int remaining; char *name; + *oplock = 0; data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); remaining = le32_to_cpu(rsp->CreateContextsLength); cc = (struct create_context *)data_offset; + + /* Initialize inode number to 0 in case no valid data in qfid context */ + if (buf) + buf->IndexNumber = 0; + while (remaining >= sizeof(struct create_context)) { name = le16_to_cpu(cc->NameOffset) + (char *)cc; if (le16_to_cpu(cc->NameLength) == 4 && - strncmp(name, "RqLs", 4) == 0) - return server->ops->parse_lease_buf(cc, epoch, - lease_key); + strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0) + *oplock = server->ops->parse_lease_buf(cc, epoch, + lease_key); + else if (buf && (le16_to_cpu(cc->NameLength) == 4) && + strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) + parse_query_id_ctxt(cc, buf); next = le32_to_cpu(cc->Next); if (!next) @@ -1901,7 +1921,10 @@ smb2_parse_lease_state(struct TCP_Server_Info *server, cc = (struct create_context *)((char *)cc + next); } - return 0; + if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) + *oplock = rsp->OplockLevel; + + return; } static int @@ -2588,12 +2611,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, buf->DeletePending = 0; } - if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) - *oplock = smb2_parse_lease_state(server, rsp, - &oparms->fid->epoch, - oparms->fid->lease_key); - else - *oplock = rsp->OplockLevel; + + smb2_parse_contexts(server, rsp, &oparms->fid->epoch, + oparms->fid->lease_key, oplock, buf); creat_exit: SMB2_open_free(&rqst); free_rsp_buf(resp_buftype, rsp); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 7e2e782f8edd..747de9317659 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -818,7 +818,9 @@ struct durable_reconnect_context_v2 { } __packed; /* See MS-SMB2 2.2.14.2.9 */ -struct on_disk_id { +struct create_on_disk_id { + struct create_context ccontext; + __u8 Name[8]; __le64 DiskFileId; __le64 VolumeId; __u32 Reserved[4]; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 52df125e9189..07ca72486cfa 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -228,9 +228,10 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); -extern __u8 smb2_parse_lease_state(struct TCP_Server_Info *server, - struct smb2_create_rsp *rsp, - unsigned int *epoch, char *lease_key); +extern void smb2_parse_contexts(struct TCP_Server_Info *server, + struct smb2_create_rsp *rsp, + unsigned int *epoch, char *lease_key, + __u8 *oplock, struct smb2_file_all_info *buf); extern int smb3_encryption_required(const struct cifs_tcon *tcon); extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length, struct kvec *iov, unsigned int min_buf_size); diff --git a/fs/io_uring.c b/fs/io_uring.c index e2a66e12fbc6..012bc0efb9d3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -202,7 +202,7 @@ struct async_list { struct file *file; off_t io_end; - size_t io_pages; + size_t io_len; }; struct io_ring_ctx { @@ -333,7 +333,8 @@ struct io_kiocb { #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ #define REQ_F_IO_DRAINED 32 /* drain done */ #define REQ_F_LINK 64 /* linked sqes */ -#define REQ_F_FAIL_LINK 128 /* fail rest of links */ +#define REQ_F_LINK_DONE 128 /* linked sqes done */ +#define REQ_F_FAIL_LINK 256 /* fail rest of links */ u64 user_data; u32 result; u32 sequence; @@ -429,7 +430,7 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx, if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) return false; - return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped; + return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped; } static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) @@ -632,6 +633,7 @@ static void io_req_link_next(struct io_kiocb *req) nxt->flags |= REQ_F_LINK; } + nxt->flags |= REQ_F_LINK_DONE; INIT_WORK(&nxt->work, io_sq_wq_submit_work); queue_work(req->ctx->sqo_wq, &nxt->work); } @@ -1064,8 +1066,44 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, */ offset = buf_addr - imu->ubuf; iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); - if (offset) - iov_iter_advance(iter, offset); + + if (offset) { + /* + * Don't use iov_iter_advance() here, as it's really slow for + * using the latter parts of a big fixed buffer - it iterates + * over each segment manually. We can cheat a bit here, because + * we know that: + * + * 1) it's a BVEC iter, we set it up + * 2) all bvecs are PAGE_SIZE in size, except potentially the + * first and last bvec + * + * So just find our index, and adjust the iterator afterwards. + * If the offset is within the first bvec (or the whole first + * bvec, just use iov_iter_advance(). This makes it easier + * since we can just skip the first segment, which may not + * be PAGE_SIZE aligned. + */ + const struct bio_vec *bvec = imu->bvec; + + if (offset <= bvec->bv_len) { + iov_iter_advance(iter, offset); + } else { + unsigned long seg_skip; + + /* skip first vec */ + offset -= bvec->bv_len; + seg_skip = 1 + (offset >> PAGE_SHIFT); + + iter->bvec = bvec + seg_skip; + iter->nr_segs -= seg_skip; + iter->count -= (seg_skip << PAGE_SHIFT); + iter->iov_offset = offset & ~PAGE_MASK; + if (iter->iov_offset) + iter->count -= iter->iov_offset; + } + } + return 0; } @@ -1120,28 +1158,26 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) off_t io_end = kiocb->ki_pos + len; if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) { - unsigned long max_pages; + unsigned long max_bytes; /* Use 8x RA size as a decent limiter for both reads/writes */ - max_pages = filp->f_ra.ra_pages; - if (!max_pages) - max_pages = VM_READAHEAD_PAGES; - max_pages *= 8; - - /* If max pages are exceeded, reset the state */ - len >>= PAGE_SHIFT; - if (async_list->io_pages + len <= max_pages) { + max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3); + if (!max_bytes) + max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3); + + /* If max len are exceeded, reset the state */ + if (async_list->io_len + len <= max_bytes) { req->flags |= REQ_F_SEQ_PREV; - async_list->io_pages += len; + async_list->io_len += len; } else { io_end = 0; - async_list->io_pages = 0; + async_list->io_len = 0; } } /* New file? Reset state. */ if (async_list->file != filp) { - async_list->io_pages = 0; + async_list->io_len = 0; async_list->file = filp; } async_list->io_end = io_end; @@ -1630,6 +1666,8 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) INIT_LIST_HEAD(&poll->wait.entry); init_waitqueue_func_entry(&poll->wait, io_poll_wake); + INIT_LIST_HEAD(&req->list); + mask = vfs_poll(poll->file, &ipt.pt) & poll->events; spin_lock_irq(&ctx->completion_lock); @@ -1844,6 +1882,10 @@ restart: /* async context always use a copy of the sqe */ kfree(sqe); + /* req from defer and link list needn't decrease async cnt */ + if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) + goto out; + if (!async_list) break; if (!list_empty(&req_list)) { @@ -1891,6 +1933,7 @@ restart: } } +out: if (cur_mm) { set_fs(old_fs); unuse_mm(cur_mm); @@ -1917,6 +1960,10 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req) ret = true; spin_lock(&list->lock); list_add_tail(&req->list, &list->list); + /* + * Ensure we see a simultaneous modification from io_sq_wq_submit_work() + */ + smp_mb(); if (!atomic_read(&list->cnt)) { list_del_init(&req->list); ret = false; diff --git a/fs/open.c b/fs/open.c index b5b80469b93d..a59abe3c669a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -374,6 +374,25 @@ long do_faccessat(int dfd, const char __user *filename, int mode) override_cred->cap_permitted; } + /* + * The new set of credentials can *only* be used in + * task-synchronous circumstances, and does not need + * RCU freeing, unless somebody then takes a separate + * reference to it. + * + * NOTE! This is _only_ true because this credential + * is used purely for override_creds() that installs + * it as the subjective cred. Other threads will be + * accessing ->real_cred, not the subjective cred. + * + * If somebody _does_ make a copy of this (using the + * 'get_current_cred()' function), that will clear the + * non_rcu field, because now that other user may be + * expecting RCU freeing. But normal thread-synchronous + * cred accesses will keep things non-RCY. + */ + override_cred->non_rcu = 1; + old_cred = override_creds(override_cred); retry: res = user_path_at(dfd, filename, lookup_flags, &path); diff --git a/include/Kbuild b/include/Kbuild index 7e9f1acb9dd5..c38f0d46b267 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -31,7 +31,7 @@ header-test- += acpi/platform/acintel.h header-test- += acpi/platform/aclinux.h header-test- += acpi/platform/aclinuxex.h header-test- += acpi/processor.h -header-test- += clocksource/hyperv_timer.h +header-test-$(CONFIG_X86) += clocksource/hyperv_timer.h header-test- += clocksource/timer-sp804.h header-test- += crypto/cast_common.h header-test- += crypto/internal/cryptouser.h @@ -246,6 +246,7 @@ header-test- += linux/intel-pti.h header-test- += linux/intel-svm.h header-test- += linux/interconnect-provider.h header-test- += linux/ioc3.h +header-test-$(CONFIG_BLOCK) += linux/iomap.h header-test- += linux/ipack.h header-test- += linux/irq_cpustat.h header-test- += linux/irq_poll.h @@ -454,9 +455,6 @@ header-test- += linux/phy/omap_control_phy.h header-test- += linux/phy/tegra/xusb.h header-test- += linux/phy/ulpi_phy.h header-test- += linux/phy_fixed.h -header-test- += linux/pinctrl/pinconf-generic.h -header-test- += linux/pinctrl/pinconf.h -header-test- += linux/pinctrl/pinctrl.h header-test- += linux/pipe_fs_i.h header-test- += linux/pktcdvd.h header-test- += linux/pl320-ipc.h @@ -905,10 +903,11 @@ header-test- += net/netfilter/nf_nat_redirect.h header-test- += net/netfilter/nf_queue.h header-test- += net/netfilter/nf_reject.h header-test- += net/netfilter/nf_synproxy.h -header-test- += net/netfilter/nf_tables.h -header-test- += net/netfilter/nf_tables_core.h -header-test- += net/netfilter/nf_tables_ipv4.h +header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables.h +header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_core.h +header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_ipv4.h header-test- += net/netfilter/nf_tables_ipv6.h +header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_offload.h header-test- += net/netfilter/nft_fib.h header-test- += net/netfilter/nft_meta.h header-test- += net/netfilter/nft_reject.h @@ -949,7 +948,6 @@ header-test- += pcmcia/ds.h header-test- += rdma/ib.h header-test- += rdma/iw_portmap.h header-test- += rdma/opa_port_info.h -header-test- += rdma/rdma_counter.h header-test- += rdma/rdmavt_cq.h header-test- += rdma/restrack.h header-test- += rdma/signature.h diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 689a58231288..12811091fd50 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -181,6 +181,7 @@ struct blkcg_policy { extern struct blkcg blkcg_root; extern struct cgroup_subsys_state * const blkcg_root_css; +extern bool blkcg_debug_stats; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index feff3fe4467e..1b1fa1557e68 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -311,6 +311,7 @@ enum req_flag_bits { __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ __REQ_NOWAIT, /* Don't wait if request will block */ + __REQ_NOWAIT_INLINE, /* Return would-block error inline */ /* * When a shared kthread needs to issue a bio for a cgroup, doing * so synchronously can lead to priority inversions as the kthread @@ -345,6 +346,7 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) +#define REQ_NOWAIT_INLINE (1ULL << __REQ_NOWAIT_INLINE) #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) @@ -418,12 +420,13 @@ static inline int op_stat_group(unsigned int op) typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U +#define BLK_QC_T_EAGAIN -2U #define BLK_QC_T_SHIFT 16 #define BLK_QC_T_INTERNAL (1U << 31) static inline bool blk_qc_t_valid(blk_qc_t cookie) { - return cookie != BLK_QC_T_NONE; + return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN; } static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e8579412ad21..d7ee4c6bad48 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -170,3 +170,5 @@ #else #define __diag_GCC_8(s) #endif + +#define __no_fgcse __attribute__((optimize("-fno-gcse"))) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8aaf7cd026b0..f0fd5636fddb 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -116,9 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, ".pushsection .discard.unreachable\n\t" \ ".long 999b - .\n\t" \ ".popsection\n\t" + +/* Annotate a C jump table to allow objtool to follow the code flow */ +#define __annotate_jump_table __section(".rodata..c_jump_table") + #else #define annotate_reachable() #define annotate_unreachable() +#define __annotate_jump_table #endif #ifndef ASM_UNREACHABLE diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 095d55c3834d..599c27b56c29 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -189,6 +189,10 @@ struct ftrace_likely_data { #define asm_volatile_goto(x...) asm goto(x) #endif +#ifndef __no_fgcse +# define __no_fgcse +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) diff --git a/include/linux/connector.h b/include/linux/connector.h index 6b6c7396a584..cb732643471b 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -50,7 +50,6 @@ struct cn_dev { u32 seq, groups; struct sock *nls; - void (*input) (struct sk_buff *skb); struct cn_queue_dev *cbdev; }; diff --git a/include/linux/cred.h b/include/linux/cred.h index 7eb43a038330..f7a30e0099be 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -145,7 +145,11 @@ struct cred { struct user_struct *user; /* real user ID subscription */ struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ - struct rcu_head rcu; /* RCU deletion hook */ + /* RCU deletion */ + union { + int non_rcu; /* Can we skip RCU deletion? */ + struct rcu_head rcu; /* RCU deletion hook */ + }; } __randomize_layout; extern void __put_cred(struct cred *); @@ -246,6 +250,7 @@ static inline const struct cred *get_cred(const struct cred *cred) if (!cred) return cred; validate_creds(cred); + nonconst_cred->non_rcu = 0; return get_new_cred(nonconst_cred); } @@ -257,6 +262,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) if (!atomic_inc_not_zero(&nonconst_cred->usage)) return NULL; validate_creds(cred); + nonconst_cred->non_rcu = 0; return cred; } diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index b7338702592a..adf993a3bd58 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -32,6 +32,15 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ +#ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED +bool force_dma_unencrypted(struct device *dev); +#else +static inline bool force_dma_unencrypted(struct device *dev) +{ + return false; +} +#endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */ + /* * If memory encryption is supported, phys_to_dma will set the memory encryption * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 8d13e28a8e07..f7d1eea32c78 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -679,6 +679,20 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) return dma_set_mask_and_coherent(dev, mask); } +/** + * dma_addressing_limited - return if the device is addressing limited + * @dev: device to check + * + * Return %true if the devices DMA mask is too small to address all memory in + * the system, else %false. Lack of addressing bits is the prime reason for + * bounce buffering, but might not be the only one. + */ +static inline bool dma_addressing_limited(struct device *dev) +{ + return min_not_zero(dma_get_mask(dev), dev->bus_dma_mask) < + dma_get_required_mask(dev); +} + #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 17cd0078377c..1dd014c9c87b 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -45,7 +45,6 @@ struct elevator_mq_ops { struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); bool (*has_work)(struct blk_mq_hw_ctx *); void (*completed_request)(struct request *, u64); - void (*started_request)(struct request *); void (*requeue_request)(struct request *); struct request *(*former_request)(struct request_queue *, struct request *); struct request *(*next_request)(struct request_queue *, struct request *); diff --git a/include/linux/iova.h b/include/linux/iova.h index 781b96ac706f..a0637abffee8 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -155,6 +155,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +bool has_iova_flush_queue(struct iova_domain *iovad); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -235,6 +236,11 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } +static inline bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return false; +} + static inline int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c5da875f19e3..5c5b5867024c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -318,6 +318,7 @@ struct kvm_vcpu { } spin_loop; #endif bool preempted; + bool ready; struct kvm_vcpu_arch arch; struct dentry *debugfs_dentry; }; diff --git a/include/linux/msi.h b/include/linux/msi.h index d48e919d55ae..8ad679e9d9c0 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -64,6 +64,10 @@ struct ti_sci_inta_msi_desc { * @msg: The last set MSI message cached for reuse * @affinity: Optional pointer to a cpu affinity mask for this descriptor * + * @write_msi_msg: Callback that may be called when the MSI message + * address or data changes + * @write_msi_msg_data: Data parameter for the callback. + * * @masked: [PCI MSI/X] Mask bits * @is_msix: [PCI MSI/X] True if MSI-X * @multiple: [PCI MSI/X] log2 num of messages allocated @@ -90,6 +94,9 @@ struct msi_desc { const void *iommu_cookie; #endif + void (*write_msi_msg)(struct msi_desc *entry, void *data); + void *write_msi_msg_data; + union { /* PCI MSI/X specific data */ struct { @@ -100,6 +107,7 @@ struct msi_desc { u8 multi_cap : 3; u8 maskbit : 1; u8 is_64 : 1; + u8 is_virtual : 1; u16 entry_nr; unsigned default_irq; } msi_attrib; diff --git a/include/linux/netfilter/nf_conntrack_h323_asn1.h b/include/linux/netfilter/nf_conntrack_h323_asn1.h index 91d6275292a5..19df78341fb3 100644 --- a/include/linux/netfilter/nf_conntrack_h323_asn1.h +++ b/include/linux/netfilter/nf_conntrack_h323_asn1.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /**************************************************************************** - * ip_conntrack_h323_asn1.h - BER and PER decoding library for H.323 - * conntrack/NAT module. + * BER and PER decoding library for H.323 conntrack/NAT module. * * Copyright (c) 2006 by Jing Min Zhao <[email protected]> * diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 56a92e3ae3ae..8c13538aeffe 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -58,9 +58,11 @@ #include <linux/completion.h> #include <linux/device.h> +#include <linux/interrupt.h> struct ntb_client; struct ntb_dev; +struct ntb_msi; struct pci_dev; /** @@ -205,7 +207,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) } /** - * struct ntb_ctx_ops - ntb device operations + * struct ntb_dev_ops - ntb device operations * @port_number: See ntb_port_number(). * @peer_port_count: See ntb_peer_port_count(). * @peer_port_number: See ntb_peer_port_number(). @@ -404,7 +406,7 @@ struct ntb_client { #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) /** - * struct ntb_device - ntb device + * struct ntb_dev - ntb device * @dev: Linux device object. * @pdev: PCI device entry of the ntb. * @topo: Detected topology of the ntb. @@ -426,6 +428,10 @@ struct ntb_dev { spinlock_t ctx_lock; /* block unregister until device is fully released */ struct completion released; + +#ifdef CONFIG_NTB_MSI + struct ntb_msi *msi; +#endif }; #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) @@ -616,7 +622,6 @@ static inline int ntb_port_number(struct ntb_dev *ntb) return ntb->ops->port_number(ntb); } - /** * ntb_peer_port_count() - get the number of peer device ports * @ntb: NTB device context. @@ -654,6 +659,58 @@ static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx) } /** + * ntb_logical_port_number() - get the logical port number of the local port + * @ntb: NTB device context. + * + * The Logical Port Number is defined to be a unique number for each + * port starting from zero through to the number of ports minus one. + * This is in contrast to the Port Number where each port can be assigned + * any unique physical number by the hardware. + * + * The logical port number is useful for calculating the resource indexes + * used by peers. + * + * Return: the logical port number or negative value indicating an error + */ +static inline int ntb_logical_port_number(struct ntb_dev *ntb) +{ + int lport = ntb_port_number(ntb); + int pidx; + + if (lport < 0) + return lport; + + for (pidx = 0; pidx < ntb_peer_port_count(ntb); pidx++) + if (lport <= ntb_peer_port_number(ntb, pidx)) + return pidx; + + return pidx; +} + +/** + * ntb_peer_logical_port_number() - get the logical peer port by given index + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * The Logical Port Number is defined to be a unique number for each + * port starting from zero through to the number of ports minus one. + * This is in contrast to the Port Number where each port can be assigned + * any unique physical number by the hardware. + * + * The logical port number is useful for calculating the resource indexes + * used by peers. + * + * Return: the peer's logical port number or negative value indicating an error + */ +static inline int ntb_peer_logical_port_number(struct ntb_dev *ntb, int pidx) +{ + if (ntb_peer_port_number(ntb, pidx) < ntb_port_number(ntb)) + return pidx; + else + return pidx + 1; +} + +/** * ntb_peer_port_idx() - get the peer device port index by given port number * @ntb: NTB device context. * @port: Peer port number. @@ -1506,4 +1563,141 @@ static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx, return ntb->ops->peer_msg_write(ntb, pidx, midx, msg); } +/** + * ntb_peer_resource_idx() - get a resource index for a given peer idx + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * When constructing a graph of peers, each remote peer must use a different + * resource index (mw, doorbell, etc) to communicate with each other + * peer. + * + * In a two peer system, this function should always return 0 such that + * resource 0 points to the remote peer on both ports. + * + * In a 5 peer system, this function will return the following matrix + * + * pidx \ port 0 1 2 3 4 + * 0 0 0 1 2 3 + * 1 0 1 1 2 3 + * 2 0 1 2 2 3 + * 3 0 1 2 3 3 + * + * For example, if this function is used to program peer's memory + * windows, port 0 will program MW 0 on all it's peers to point to itself. + * port 1 will program MW 0 in port 0 to point to itself and MW 1 on all + * other ports. etc. + * + * For the legacy two host case, ntb_port_number() and ntb_peer_port_number() + * both return zero and therefore this function will always return zero. + * So MW 0 on each host would be programmed to point to the other host. + * + * Return: the resource index to use for that peer. + */ +static inline int ntb_peer_resource_idx(struct ntb_dev *ntb, int pidx) +{ + int local_port, peer_port; + + if (pidx >= ntb_peer_port_count(ntb)) + return -EINVAL; + + local_port = ntb_logical_port_number(ntb); + peer_port = ntb_peer_logical_port_number(ntb, pidx); + + if (peer_port < local_port) + return local_port - 1; + else + return local_port; +} + +/** + * ntb_peer_highest_mw_idx() - get a memory window index for a given peer idx + * using the highest index memory windows first + * + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * Like ntb_peer_resource_idx(), except it returns indexes starting with + * last memory window index. + * + * Return: the resource index to use for that peer. + */ +static inline int ntb_peer_highest_mw_idx(struct ntb_dev *ntb, int pidx) +{ + int ret; + + ret = ntb_peer_resource_idx(ntb, pidx); + if (ret < 0) + return ret; + + return ntb_mw_count(ntb, pidx) - ret - 1; +} + +struct ntb_msi_desc { + u32 addr_offset; + u32 data; +}; + +#ifdef CONFIG_NTB_MSI + +int ntb_msi_init(struct ntb_dev *ntb, void (*desc_changed)(void *ctx)); +int ntb_msi_setup_mws(struct ntb_dev *ntb); +void ntb_msi_clear_mws(struct ntb_dev *ntb); +int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc); +void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id); +int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc); +int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr); + +#else /* not CONFIG_NTB_MSI */ + +static inline int ntb_msi_init(struct ntb_dev *ntb, + void (*desc_changed)(void *ctx)) +{ + return -EOPNOTSUPP; +} +static inline int ntb_msi_setup_mws(struct ntb_dev *ntb) +{ + return -EOPNOTSUPP; +} +static inline void ntb_msi_clear_mws(struct ntb_dev *ntb) {} +static inline int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, + irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + return -EOPNOTSUPP; +} +static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, + void *dev_id) {} +static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc) +{ + return -EOPNOTSUPP; +} +static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr) +{ + return -EOPNOTSUPP; + +} + +#endif /* CONFIG_NTB_MSI */ + +static inline int ntbm_msi_request_irq(struct ntb_dev *ntb, + irq_handler_t handler, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + return ntbm_msi_request_threaded_irq(ntb, handler, NULL, name, + dev_id, msi_desc); +} + #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index 2972793e3028..9e700d9f9f28 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1412,6 +1412,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, #define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ #define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ #define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ + +/* + * Virtual interrupts allow for more interrupts to be allocated + * than the device has interrupts for. These are not programmed + * into the device's MSI-X table and must be handled by some + * other driver means. + */ +#define PCI_IRQ_VIRTUAL (1 << 4) + #define PCI_IRQ_ALL_TYPES \ (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index b0fb1446fe04..6c8512d3be88 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -19,6 +19,7 @@ enum hk_flags { DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); extern int housekeeping_any_cpu(enum hk_flags flags); extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); +extern bool housekeeping_enabled(enum hk_flags flags); extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); extern void __init housekeeping_init(void); @@ -35,6 +36,11 @@ static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) return cpu_possible_mask; } +static inline bool housekeeping_enabled(enum hk_flags flags) +{ + return false; +} + static inline void housekeeping_affine(struct task_struct *t, enum hk_flags flags) { } static inline void housekeeping_init(void) { } diff --git a/include/linux/wait.h b/include/linux/wait.h index b6f77cf60dd7..30c515520fb2 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -127,6 +127,19 @@ static inline int waitqueue_active(struct wait_queue_head *wq_head) } /** + * wq_has_single_sleeper - check if there is only one sleeper + * @wq_head: wait queue head + * + * Returns true of wq_head has only one sleeper on the list. + * + * Please refer to the comment for waitqueue_active. + */ +static inline bool wq_has_single_sleeper(struct wait_queue_head *wq_head) +{ + return list_is_singular(&wq_head->head); +} + +/** * wq_has_sleeper - check if there are any waiting processes * @wq_head: wait queue head * diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 88c27153a4bc..45850a8391d9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4170,7 +4170,7 @@ struct sta_opmode_info { u8 rx_nss; }; -#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)ERR_PTR(-ENODATA)) +#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)(long)(-ENODATA)) /** * struct wiphy_vendor_command - vendor command definition diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index db337299e81e..b16d21636d69 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -2,8 +2,8 @@ #define _NET_FLOW_OFFLOAD_H #include <linux/kernel.h> +#include <linux/list.h> #include <net/flow_dissector.h> -#include <net/sch_generic.h> struct flow_match { struct flow_dissector *dissector; @@ -249,6 +249,10 @@ enum flow_block_binder_type { FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS, }; +struct flow_block { + struct list_head cb_list; +}; + struct netlink_ext_ack; struct flow_block_offload { @@ -256,29 +260,33 @@ struct flow_block_offload { enum flow_block_binder_type binder_type; bool block_shared; struct net *net; + struct flow_block *block; struct list_head cb_list; struct list_head *driver_block_list; struct netlink_ext_ack *extack; }; +enum tc_setup_type; +typedef int flow_setup_cb_t(enum tc_setup_type type, void *type_data, + void *cb_priv); + struct flow_block_cb { struct list_head driver_list; struct list_head list; - struct net *net; - tc_setup_cb_t *cb; + flow_setup_cb_t *cb; void *cb_ident; void *cb_priv; void (*release)(void *cb_priv); unsigned int refcnt; }; -struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb, +struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)); void flow_block_cb_free(struct flow_block_cb *block_cb); -struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *offload, - tc_setup_cb_t *cb, void *cb_ident); +struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block, + flow_setup_cb_t *cb, void *cb_ident); void *flow_block_cb_priv(struct flow_block_cb *block_cb); void flow_block_cb_incref(struct flow_block_cb *block_cb); @@ -296,11 +304,12 @@ static inline void flow_block_cb_remove(struct flow_block_cb *block_cb, list_move(&block_cb->list, &offload->cb_list); } -bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident, +bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident, struct list_head *driver_block_list); int flow_block_cb_setup_simple(struct flow_block_offload *f, - struct list_head *driver_list, tc_setup_cb_t *cb, + struct list_head *driver_list, + flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, bool ingress_only); enum flow_cls_command { @@ -333,4 +342,9 @@ flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) return flow_cmd->rule; } +static inline void flow_block_init(struct flow_block *flow_block) +{ + INIT_LIST_HEAD(&flow_block->cb_list); +} + #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 93ce6b0daaba..573429be4d59 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -76,6 +76,11 @@ struct nf_conntrack_expect_policy { #define NF_CT_EXPECT_CLASS_DEFAULT 0 #define NF_CT_EXPECT_MAX_CNT 255 +/* Allow to reuse expectations with the same tuples from different master + * conntracks. + */ +#define NF_CT_EXP_F_SKIP_MASTER 0x1 + int nf_conntrack_expect_pernet_init(struct net *net); void nf_conntrack_expect_pernet_fini(struct net *net); @@ -122,10 +127,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, u_int8_t, const __be16 *, const __be16 *); void nf_ct_expect_put(struct nf_conntrack_expect *exp); int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 portid, int report); -static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect) + u32 portid, int report, unsigned int flags); +static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect, + unsigned int flags) { - return nf_ct_expect_related_report(expect, 0, 0); + return nf_ct_expect_related_report(expect, 0, 0, flags); } #endif /*_NF_CONNTRACK_EXPECT_H*/ diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 8f00125b06f4..44513b93bd55 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -68,6 +68,7 @@ struct synproxy_options { u8 options; u8 wscale; u16 mss; + u16 mss_encode; u32 tsval; u32 tsecr; }; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 35dfdd9f69b3..9b624566b82d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -11,6 +11,7 @@ #include <linux/rhashtable.h> #include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> +#include <net/flow_offload.h> struct module; @@ -951,7 +952,7 @@ struct nft_stats { * @stats: per-cpu chain stats * @chain: the chain * @dev_name: device name that this base chain is attached to (if any) - * @cb_list: list of flow block callbacks (for hardware offload) + * @flow_block: flow block (for hardware offload) */ struct nft_base_chain { struct nf_hook_ops ops; @@ -961,7 +962,7 @@ struct nft_base_chain { struct nft_stats __percpu *stats; struct nft_chain chain; char dev_name[IFNAMSIZ]; - struct list_head cb_list; + struct flow_block flow_block; }; static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 841faadceb6e..e429809ca90d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -6,7 +6,6 @@ #include <linux/workqueue.h> #include <net/sch_generic.h> #include <net/act_api.h> -#include <net/flow_offload.h> #include <net/net_namespace.h> /* TC action not accessible from user space */ @@ -126,14 +125,14 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) } static inline -int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb, +int tc_setup_cb_block_register(struct tcf_block *block, flow_setup_cb_t *cb, void *cb_priv) { return 0; } static inline -void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb, +void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb, void *cb_priv) { } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 855167bbc372..6b6b01234dd9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -15,6 +15,7 @@ #include <linux/mutex.h> #include <net/gen_stats.h> #include <net/rtnetlink.h> +#include <net/flow_offload.h> struct Qdisc_ops; struct qdisc_walker; @@ -22,9 +23,6 @@ struct tcf_walker; struct module; struct bpf_flow_keys; -typedef int tc_setup_cb_t(enum tc_setup_type type, - void *type_data, void *cb_priv); - typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, enum tc_setup_type type, void *type_data); @@ -313,7 +311,7 @@ struct tcf_proto_ops { void (*walk)(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held); int (*reoffload)(struct tcf_proto *tp, bool add, - tc_setup_cb_t *cb, void *cb_priv, + flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); void (*bind_class)(void *, u32, unsigned long); void * (*tmplt_create)(struct net *net, @@ -401,7 +399,7 @@ struct tcf_block { refcount_t refcnt; struct net *net; struct Qdisc *q; - struct list_head cb_list; + struct flow_block flow_block; struct list_head owner_list; bool keep_dst; unsigned int offloadcnt; /* Number of oddloaded filters */ diff --git a/include/net/tcp.h b/include/net/tcp.h index f42d300f0cfa..e5cf514ba118 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1709,6 +1709,11 @@ static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk) return skb_rb_first(&sk->tcp_rtx_queue); } +static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) +{ + return skb_rb_last(&sk->tcp_rtx_queue); +} + static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) { return skb_peek(&sk->sk_write_queue); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index a5fcdad4a03e..cc139dbd71e5 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -369,6 +369,8 @@ struct scsi_host_template { */ unsigned long dma_boundary; + unsigned long virt_boundary_mask; + /* * This specifies "machine infinity" for host templates which don't * limit the transfer size. Note this limit represents an absolute @@ -587,6 +589,7 @@ struct Scsi_Host { unsigned int max_sectors; unsigned int max_segment_size; unsigned long dma_boundary; + unsigned long virt_boundary_mask; /* * In scsi-mq mode, the number of hardware queues supported by the LLD. * diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index c5188ff724d1..bc88d6f964da 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -173,10 +173,7 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - if (stream->direction == SND_COMPRESS_PLAYBACK) - stream->runtime->state = SNDRV_PCM_STATE_SETUP; - else - stream->runtime->state = SNDRV_PCM_STATE_PREPARED; + stream->runtime->state = SNDRV_PCM_STATE_SETUP; wake_up(&stream->runtime->sleep); } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a7c19540ce21..5e3f12d5359e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -116,7 +116,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. - * For ARM: See Documentation/virtual/kvm/api.txt + * For ARM: See Documentation/virt/kvm/api.txt */ union { __u32 irq; @@ -1086,7 +1086,7 @@ struct kvm_xen_hvm_config { * * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies * the irqfd to operate in resampling mode for level triggered interrupt - * emulation. See Documentation/virtual/kvm/api.txt. + * emulation. See Documentation/virt/kvm/api.txt. */ #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 75758ec26c8b..beb9a9d0c00a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2863,7 +2863,7 @@ enum nl80211_attrs { #define NL80211_HT_CAPABILITY_LEN 26 #define NL80211_VHT_CAPABILITY_LEN 12 #define NL80211_HE_MIN_CAPABILITY_LEN 16 -#define NL80211_HE_MAX_CAPABILITY_LEN 51 +#define NL80211_HE_MAX_CAPABILITY_LEN 54 #define NL80211_MAX_NR_CIPHER_SUITES 5 #define NL80211_MAX_NR_AKM_SUITES 2 diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 9d9705ceda76..2427bc4d8eba 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -518,7 +518,13 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_RGBX444 v4l2_fourcc('R', 'X', '1', '2') /* 16 rrrrgggg bbbbxxxx */ #define V4L2_PIX_FMT_ABGR444 v4l2_fourcc('A', 'B', '1', '2') /* 16 aaaabbbb ggggrrrr */ #define V4L2_PIX_FMT_XBGR444 v4l2_fourcc('X', 'B', '1', '2') /* 16 xxxxbbbb ggggrrrr */ -#define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('B', 'A', '1', '2') /* 16 bbbbgggg rrrraaaa */ + +/* + * Originally this had 'BA12' as fourcc, but this clashed with the older + * V4L2_PIX_FMT_SGRBG12 which inexplicably used that same fourcc. + * So use 'GA12' instead for V4L2_PIX_FMT_BGRA444. + */ +#define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('G', 'A', '1', '2') /* 16 bbbbgggg rrrraaaa */ #define V4L2_PIX_FMT_BGRX444 v4l2_fourcc('B', 'X', '1', '2') /* 16 bbbbgggg rrrrxxxx */ #define V4L2_PIX_FMT_RGB555 v4l2_fourcc('R', 'G', 'B', 'O') /* 16 RGB-5-5-5 */ #define V4L2_PIX_FMT_ARGB555 v4l2_fourcc('A', 'R', '1', '5') /* 16 ARGB-1-5-5-5 */ diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index dc0b682ec2d9..deff97217496 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -38,7 +38,7 @@ config PREEMPT_VOLUNTARY config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" depends on !ARCH_NO_PREEMPT - select PREEMPT_COUNT + select PREEMPTION select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK help This option reduces the latency of the kernel by making @@ -55,7 +55,28 @@ config PREEMPT embedded system with latency requirements in the milliseconds range. +config PREEMPT_RT + bool "Fully Preemptible Kernel (Real-Time)" + depends on EXPERT && ARCH_SUPPORTS_RT + select PREEMPTION + help + This option turns the kernel into a real-time kernel by replacing + various locking primitives (spinlocks, rwlocks, etc.) with + preemptible priority-inheritance aware variants, enforcing + interrupt threading and introducing mechanisms to break up long + non-preemptible sections. This makes the kernel, except for very + low level and critical code pathes (entry code, scheduler, low + level interrupt handling) fully preemptible and brings most + execution contexts under scheduler control. + + Select this if you are building a kernel for systems which + require real-time guarantees. + endchoice config PREEMPT_COUNT bool + +config PREEMPTION + bool + select PREEMPT_COUNT diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 16079550db6d..8191a7db2777 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1295,11 +1295,11 @@ bool bpf_opcode_in_insntable(u8 code) * * Decode and execute eBPF instructions. */ -static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) +static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z - static const void *jumptable[256] = { + static const void * const jumptable[256] __annotate_jump_table = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL), @@ -1558,7 +1558,6 @@ out: BUG_ON(1); return 0; } -STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */ #define PROG_NAME(stack_size) __bpf_prog_run##stack_size #define DEFINE_BPF_PROG_RUN(stack_size) \ diff --git a/kernel/cred.c b/kernel/cred.c index f9a0ce66c9c3..c0a4c12d38b2 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -144,7 +144,10 @@ void __put_cred(struct cred *cred) BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); - call_rcu(&cred->rcu, put_cred_rcu); + if (cred->non_rcu) + put_cred_rcu(&cred->rcu); + else + call_rcu(&cred->rcu, put_cred_rcu); } EXPORT_SYMBOL(__put_cred); @@ -261,6 +264,7 @@ struct cred *prepare_creds(void) old = task->cred; memcpy(new, old, sizeof(struct cred)); + new->non_rcu = 0; atomic_set(&new->usage, 1); set_cred_subscribers(new, 0); get_group_info(new->group_info); @@ -544,7 +548,19 @@ const struct cred *override_creds(const struct cred *new) validate_creds(old); validate_creds(new); - get_cred(new); + + /* + * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'. + * + * That means that we do not clear the 'non_rcu' flag, since + * we are only installing the cred into the thread-synchronous + * '->cred' pointer, not the '->real_cred' pointer that is + * visible to other threads under RCU. + * + * Also note that we did validate_creds() manually, not depending + * on the validation in 'get_cred()'. + */ + get_new_cred((struct cred *)new); alter_cred_subscribers(new, 1); rcu_assign_pointer(current->cred, new); alter_cred_subscribers(old, -1); @@ -681,6 +697,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) validate_creds(old); *new = *old; + new->non_rcu = 0; atomic_set(&new->usage, 1); set_cred_subscribers(new, 0); get_uid(new->user); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 70f8f8d9200e..9decbba255fc 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -48,6 +48,9 @@ config ARCH_HAS_DMA_COHERENT_TO_PFN config ARCH_HAS_DMA_MMAP_PGPROT bool +config ARCH_HAS_FORCE_DMA_UNENCRYPTED + bool + config DMA_NONCOHERENT_CACHE_SYNC bool diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index b90e1aede743..59bdceea3737 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -23,14 +23,6 @@ #define ARCH_ZONE_DMA_BITS 24 #endif -/* - * For AMD SEV all DMA must be to unencrypted addresses. - */ -static inline bool force_dma_unencrypted(void) -{ - return sev_active(); -} - static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) { if (!dev->dma_mask) { @@ -46,7 +38,7 @@ static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) { - if (force_dma_unencrypted()) + if (force_dma_unencrypted(dev)) return __phys_to_dma(dev, phys); return phys_to_dma(dev, phys); } @@ -67,7 +59,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) dma_mask = dev->bus_dma_mask; - if (force_dma_unencrypted()) + if (force_dma_unencrypted(dev)) *phys_mask = __dma_to_phys(dev, dma_mask); else *phys_mask = dma_to_phys(dev, dma_mask); @@ -159,7 +151,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, } ret = page_address(page); - if (force_dma_unencrypted()) { + if (force_dma_unencrypted(dev)) { set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); *dma_handle = __phys_to_dma(dev, page_to_phys(page)); } else { @@ -192,7 +184,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, return; } - if (force_dma_unencrypted()) + if (force_dma_unencrypted(dev)) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && @@ -242,12 +234,14 @@ void dma_direct_sync_sg_for_device(struct device *dev, int i; for_each_sg(sgl, sg, nents, i) { - if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) - swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, + phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, SYNC_FOR_DEVICE); if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, + arch_sync_dma_for_device(dev, paddr, sg->length, dir); } } @@ -279,11 +273,13 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, int i; for_each_sg(sgl, sg, nents, i) { + phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); + if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); - - if (unlikely(is_swiotlb_buffer(sg_phys(sg)))) - swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir, + arch_sync_dma_for_cpu(dev, paddr, sg->length, dir); + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, SYNC_FOR_CPU); } @@ -407,11 +403,9 @@ int dma_direct_supported(struct device *dev, u64 mask) size_t dma_direct_max_mapping_size(struct device *dev) { - size_t size = SIZE_MAX; - /* If SWIOTLB is active, use its maximum mapping size */ - if (is_swiotlb_active()) - size = swiotlb_max_mapping_size(dev); - - return size; + if (is_swiotlb_active() && + (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE)) + return swiotlb_max_mapping_size(dev); + return SIZE_MAX; } diff --git a/kernel/exit.c b/kernel/exit.c index a75b6a7f458a..4436158a6d30 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -720,6 +720,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) if (group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); + tsk->exit_state = EXIT_ZOMBIE; if (unlikely(tsk->ptrace)) { int sig = thread_group_leader(tsk) && thread_group_empty(tsk) && diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 123ea07a3f3b..ccb28085b114 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -14,6 +14,12 @@ EXPORT_SYMBOL_GPL(housekeeping_overridden); static cpumask_var_t housekeeping_mask; static unsigned int housekeeping_flags; +bool housekeeping_enabled(enum hk_flags flags) +{ + return !!(housekeeping_flags & flags); +} +EXPORT_SYMBOL_GPL(housekeeping_enabled); + int housekeeping_any_cpu(enum hk_flags flags) { if (static_branch_unlikely(&housekeeping_overridden)) diff --git a/kernel/smp.c b/kernel/smp.c index 616d4d114847..7dbcb402c2fc 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -291,6 +291,14 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() && !oops_in_progress); + /* + * When @wait we can deadlock when we interrupt between llist_add() and + * arch_send_call_function_ipi*(); when !@wait we can deadlock due to + * csd_lock() on because the interrupt context uses the same csd + * storage. + */ + WARN_ON_ONCE(!in_task()); + csd = &csd_stack; if (!wait) { csd = this_cpu_ptr(&csd_data); @@ -416,6 +424,14 @@ void smp_call_function_many(const struct cpumask *mask, WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() && !oops_in_progress && !early_boot_irqs_disabled); + /* + * When @wait we can deadlock when we interrupt between llist_add() and + * arch_send_call_function_ipi*(); when !@wait we can deadlock due to + * csd_lock() on because the interrupt context uses the same csd + * storage. + */ + WARN_ON_ONCE(!in_task()); + /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ cpu = cpumask_first_and(mask, cpu_online_mask); if (cpu == this_cpu) diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index e6a02b274b73..f5440abb7532 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -226,12 +226,17 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size) .store = store, .size = size, }; + mm_segment_t fs; /* Trace user stack if not a kernel thread */ if (current->flags & PF_KTHREAD) return 0; + fs = get_fs(); + set_fs(USER_DS); arch_stack_walk_user(consume_entry, &c, task_pt_regs(current)); + set_fs(fs); + return c.len; } #endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index bc6673ab3a08..5960e2980a8a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -353,23 +353,13 @@ config DEBUG_SECTION_MISMATCH which results in the code/data being placed in specific sections. The section mismatch analysis is always performed after a full kernel build, and enabling this option causes the following - additional steps to occur: + additional step to occur: - Add the option -fno-inline-functions-called-once to gcc commands. When inlining a function annotated with __init in a non-init function, we would lose the section information and thus the analysis would not catch the illegal reference. This option tells gcc to inline less (but it does result in a larger kernel). - - Run the section mismatch analysis for each module/built-in.a file. - When we run the section mismatch analysis on vmlinux.o, we - lose valuable information about where the mismatch was - introduced. - Running the analysis for each module/built-in.a file - tells where the mismatch happens much closer to the - source. The drawback is that the same mismatch is - reported at least twice. - - Enable verbose reporting from modpost in order to help resolve - the section mismatches that are reported. config SECTION_MISMATCH_WARN_ONLY bool "Make section mismatch errors non-fatal" diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 154fa558bb90..5040fe43f4b4 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -6,7 +6,7 @@ menuconfig NF_TABLES_BRIDGE depends on BRIDGE && NETFILTER && NF_TABLES select NETFILTER_FAMILY_BRIDGE - bool "Ethernet Bridge nf_tables support" + tristate "Ethernet Bridge nf_tables support" if NF_TABLES_BRIDGE @@ -25,6 +25,8 @@ config NF_LOG_BRIDGE tristate "Bridge packet logging" select NF_LOG_COMMON +endif # NF_TABLES_BRIDGE + config NF_CONNTRACK_BRIDGE tristate "IPv4/IPV6 bridge connection tracking support" depends on NF_CONNTRACK @@ -39,8 +41,6 @@ config NF_CONNTRACK_BRIDGE To compile it as a module, choose M here. If unsure, say N. -endif # NF_TABLES_BRIDGE - menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" depends on BRIDGE && NETFILTER && NETFILTER_XTABLES diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 76f8db3841d7..d63b970784dc 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -165,7 +165,7 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_enc_opts); -struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb, +struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) { @@ -175,7 +175,6 @@ struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb, if (!block_cb) return ERR_PTR(-ENOMEM); - block_cb->net = net; block_cb->cb = cb; block_cb->cb_ident = cb_ident; block_cb->cb_priv = cb_priv; @@ -194,14 +193,13 @@ void flow_block_cb_free(struct flow_block_cb *block_cb) } EXPORT_SYMBOL(flow_block_cb_free); -struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f, - tc_setup_cb_t *cb, void *cb_ident) +struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block, + flow_setup_cb_t *cb, void *cb_ident) { struct flow_block_cb *block_cb; - list_for_each_entry(block_cb, f->driver_block_list, driver_list) { - if (block_cb->net == f->net && - block_cb->cb == cb && + list_for_each_entry(block_cb, &block->cb_list, list) { + if (block_cb->cb == cb && block_cb->cb_ident == cb_ident) return block_cb; } @@ -228,7 +226,7 @@ unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb) } EXPORT_SYMBOL(flow_block_cb_decref); -bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident, +bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident, struct list_head *driver_block_list) { struct flow_block_cb *block_cb; @@ -245,7 +243,8 @@ EXPORT_SYMBOL(flow_block_cb_is_busy); int flow_block_cb_setup_simple(struct flow_block_offload *f, struct list_head *driver_block_list, - tc_setup_cb_t *cb, void *cb_ident, void *cb_priv, + flow_setup_cb_t *cb, + void *cb_ident, void *cb_priv, bool ingress_only) { struct flow_block_cb *block_cb; @@ -261,8 +260,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f, if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list)) return -EBUSY; - block_cb = flow_block_cb_alloc(f->net, cb, cb_ident, - cb_priv, NULL); + block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); @@ -270,7 +268,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f, list_add_tail(&block_cb->driver_list, driver_block_list); return 0; case FLOW_BLOCK_UNBIND: - block_cb = flow_block_cb_lookup(f, cb, cb_ident); + block_cb = flow_block_cb_lookup(f->block, cb, cb_ident); if (!block_cb) return -ENOENT; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 614c38ece104..33f41178afcc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -951,7 +951,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) { struct flow_block_cb *block_cb; - tc_setup_cb_t *cb; + flow_setup_cb_t *cb; if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) cb = dsa_slave_setup_tc_block_cb_ig; @@ -967,7 +967,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list)) return -EBUSY; - block_cb = flow_block_cb_alloc(f->net, cb, dev, dev, NULL); + block_cb = flow_block_cb_alloc(cb, dev, dev, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); @@ -975,7 +975,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list); return 0; case FLOW_BLOCK_UNBIND: - block_cb = flow_block_cb_lookup(f, cb, dev); + block_cb = flow_block_cb_lookup(f->block, cb, dev); if (!block_cb) return -ENOENT; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 4d6bf7ac0792..6bdb1ab8af61 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -416,8 +416,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) ctinfo == IP_CT_RELATED_REPLY)) return XT_CONTINUE; - /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, - * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here + /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO, + * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ hash = clusterip_hashfn(skb, cipinfo->config); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 8e7f84ec783d..0e70f3f65f6f 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -36,6 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; + opts.mss_encode = opts.mss; + opts.mss = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 59031670b16a..cc23f1ce239c 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -78,6 +78,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; + flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par)); return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 87b711fd5a44..3e2685c120c7 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -221,11 +221,11 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, int ret; rtp_exp->tuple.dst.u.udp.port = htons(nated_port); - ret = nf_ct_expect_related(rtp_exp); + ret = nf_ct_expect_related(rtp_exp, 0); if (ret == 0) { rtcp_exp->tuple.dst.u.udp.port = htons(nated_port + 1); - ret = nf_ct_expect_related(rtcp_exp); + ret = nf_ct_expect_related(rtcp_exp, 0); if (ret == 0) break; else if (ret == -EBUSY) { @@ -296,7 +296,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, int ret; exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp); + ret = nf_ct_expect_related(exp, 0); if (ret == 0) break; else if (ret != -EBUSY) { @@ -352,7 +352,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, int ret; exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp); + ret = nf_ct_expect_related(exp, 0); if (ret == 0) break; else if (ret != -EBUSY) { @@ -444,7 +444,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, int ret; exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp); + ret = nf_ct_expect_related(exp, 0); if (ret == 0) break; else if (ret != -EBUSY) { @@ -537,7 +537,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, int ret; exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp); + ret = nf_ct_expect_related(exp, 0); if (ret == 0) break; else if (ret != -EBUSY) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4af1f5dae9d3..6e4afc48d7bb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1288,6 +1288,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; int nsize, old_factor; + long limit; int nlen; u8 flags; @@ -1298,8 +1299,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (nsize < 0) nsize = 0; - if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf && - tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) { + /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb. + * We need some allowance to not penalize applications setting small + * SO_SNDBUF values. + * Also allow first and last skb in retransmit queue to be split. + */ + limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE); + if (unlikely((sk->sk_wmem_queued >> 1) > limit && + tcp_queue != TCP_FRAG_IN_WRITE_QUEUE && + skb != tcp_rtx_queue_head(sk) && + skb != tcp_rtx_queue_tail(sk))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); return -ENOMEM; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index e77ea1ed5edd..5cdb4a69d277 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -36,6 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; + opts.mss_encode = opts.mss; + opts.mss = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 6bcaf7357183..d800801a5dd2 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -55,7 +55,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, if (rpfilter_addr_linklocal(&iph->saddr)) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6.flowi6_oif = dev->ifindex; - } else if ((flags & XT_RPFILTER_LOOSE) == 0) + /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */ + } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) || + (flags & XT_RPFILTER_LOOSE) == 0) fl6.flowi6_oif = dev->ifindex; rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); @@ -70,7 +72,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, goto out; } - if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) + if (rt->rt6i_idev->dev == dev || + l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex || + (flags & XT_RPFILTER_LOOSE)) ret = true; out: ip6_rt_put(rt); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 76cc9e967fa6..4d458067d80d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -936,8 +936,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, err = ieee80211_set_probe_resp(sdata, params->probe_resp, params->probe_resp_len, csa); - if (err < 0) + if (err < 0) { + kfree(new); return err; + } if (err == 0) changed |= BSS_CHANGED_AP_PROBE_RESP; @@ -949,8 +951,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, params->civicloc, params->civicloc_len); - if (err < 0) + if (err < 0) { + kfree(new); return err; + } changed |= BSS_CHANGED_FTM_RESPONDER; } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index acd4afb4944b..c9a8a2433e8a 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -187,11 +187,16 @@ int drv_conf_tx(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return -EIO; - if (WARN_ONCE(params->cw_min == 0 || - params->cw_min > params->cw_max, - "%s: invalid CW_min/CW_max: %d/%d\n", - sdata->name, params->cw_min, params->cw_max)) + if (params->cw_min == 0 || params->cw_min > params->cw_max) { + /* + * If we can't configure hardware anyway, don't warn. We may + * never have initialized the CW parameters. + */ + WARN_ONCE(local->ops->conf_tx, + "%s: invalid CW_min/CW_max: %d/%d\n", + sdata->name, params->cw_min, params->cw_max); return -EINVAL; + } trace_drv_conf_tx(local, sdata, ac, params); if (local->ops->conf_tx) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 32a45c03786e..0d65f4d39494 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -223,8 +223,6 @@ config NF_CONNTRACK_FTP of Network Address Translation on them. This is FTP support on Layer 3 independent connection tracking. - Layer 3 independent connection tracking is experimental scheme - which generalize ip_conntrack to support other layer 3 protocols. To compile it as a module, choose M here. If unsure, say N. @@ -338,7 +336,7 @@ config NF_CONNTRACK_SIP help SIP is an application-layer control protocol that can establish, modify, and terminate multimedia sessions (conferences) such as - Internet telephony calls. With the ip_conntrack_sip and + Internet telephony calls. With the nf_conntrack_sip and the nf_nat_sip modules you can support the protocol on a connection tracking/NATing firewall. @@ -1313,7 +1311,7 @@ config NETFILTER_XT_MATCH_HELPER depends on NETFILTER_ADVANCED help Helper matching allows you to match packets in dynamic connections - tracked by a conntrack-helper, ie. ip_conntrack_ftp + tracked by a conntrack-helper, ie. nf_conntrack_ftp To compile it as a module, choose M here. If unsure, say Y. diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 403541996952..08adcb222986 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -231,7 +231,7 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", __func__, ct, ARG_TUPLE(&exp->tuple)); - nf_ct_expect_related(exp); + nf_ct_expect_related(exp, 0); nf_ct_expect_put(exp); } EXPORT_SYMBOL(ip_vs_nfct_expect_related); diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 42ee659d0d1e..d011d2eb0848 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -159,7 +159,7 @@ static int amanda_help(struct sk_buff *skb, if (nf_nat_amanda && ct->status & IPS_NAT_MASK) ret = nf_nat_amanda(skb, ctinfo, protoff, off - dataoff, len, exp); - else if (nf_ct_expect_related(exp) != 0) { + else if (nf_ct_expect_related(exp, 0) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; } diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 921a7b95be68..1ba6becc3079 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -68,7 +68,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->class = NF_CT_EXPECT_CLASS_DEFAULT; exp->helper = NULL; - nf_ct_expect_related(exp); + nf_ct_expect_related(exp, 0); nf_ct_expect_put(exp); nf_ct_refresh(ct, skb, timeout * HZ); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index bdfeacee0817..a542761e90d1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1817,9 +1817,7 @@ EXPORT_SYMBOL_GPL(nf_ct_kill_acct); #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/mutex.h> -/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be - * in ip_conntrack_core, since we don't want the protocols to autoload - * or depend on ctnetlink */ +/* Generic function for tcp/udp/sctp/dccp and alike. */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index ffd1f4906c4f..65364de915d1 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -249,13 +249,22 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { - return a->master == b->master && - nf_ct_tuple_equal(&a->tuple, &b->tuple) && + return nf_ct_tuple_equal(&a->tuple, &b->tuple) && nf_ct_tuple_mask_equal(&a->mask, &b->mask) && net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) && nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } +static bool master_matches(const struct nf_conntrack_expect *a, + const struct nf_conntrack_expect *b, + unsigned int flags) +{ + if (flags & NF_CT_EXP_F_SKIP_MASTER) + return true; + + return a->master == b->master; +} + /* Generally a bad idea to call this: could have matched already. */ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { @@ -399,7 +408,8 @@ static void evict_oldest_expect(struct nf_conn *master, nf_ct_remove_expect(last); } -static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) +static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect, + unsigned int flags) { const struct nf_conntrack_expect_policy *p; struct nf_conntrack_expect *i; @@ -417,8 +427,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) } h = nf_ct_expect_dst_hash(net, &expect->tuple); hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { - if (expect_matches(i, expect)) { - if (i->class != expect->class) + if (master_matches(i, expect, flags) && + expect_matches(i, expect)) { + if (i->class != expect->class || + i->master != expect->master) return -EALREADY; if (nf_ct_remove_expect(i)) @@ -453,12 +465,12 @@ out: } int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 portid, int report) + u32 portid, int report, unsigned int flags) { int ret; spin_lock_bh(&nf_conntrack_expect_lock); - ret = __nf_ct_expect_check(expect); + ret = __nf_ct_expect_check(expect, flags); if (ret < 0) goto out; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 8c6c11bab5b6..0ecb3e289ef2 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -525,7 +525,7 @@ skip_nl_seq: protoff, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ - if (nf_ct_expect_related(exp) != 0) { + if (nf_ct_expect_related(exp, 0) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; } else diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 8f6ba8162f0b..573cb4481481 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -1,11 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323 - * conntrack/NAT module. + * BER and PER decoding library for H.323 conntrack/NAT module. * * Copyright (c) 2006 by Jing Min Zhao <[email protected]> * - * See ip_conntrack_helper_h323_asn1.h for details. + * See nf_conntrack_helper_h323_asn1.h for details. */ #ifdef __KERNEL__ diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 6497e5fc0871..8ba037b76ad3 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -305,8 +305,8 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ - if (nf_ct_expect_related(rtp_exp) == 0) { - if (nf_ct_expect_related(rtcp_exp) == 0) { + if (nf_ct_expect_related(rtp_exp, 0) == 0) { + if (nf_ct_expect_related(rtcp_exp, 0) == 0) { pr_debug("nf_ct_h323: expect RTP "); nf_ct_dump_tuple(&rtp_exp->tuple); pr_debug("nf_ct_h323: expect RTCP "); @@ -364,7 +364,7 @@ static int expect_t120(struct sk_buff *skb, ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_ct_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_h323: expect T.120 "); nf_ct_dump_tuple(&exp->tuple); } else @@ -701,7 +701,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_ct_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_q931: expect H.245 "); nf_ct_dump_tuple(&exp->tuple); } else @@ -825,7 +825,7 @@ static int expect_callforwarding(struct sk_buff *skb, protoff, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ - if (nf_ct_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_q931: expect Call Forwarding "); nf_ct_dump_tuple(&exp->tuple); } else @@ -1284,7 +1284,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, ret = nat_q931(skb, ct, ctinfo, protoff, data, taddr, i, port, exp); } else { /* Conntrack only */ - if (nf_ct_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); nf_ct_dump_tuple(&exp->tuple); @@ -1349,7 +1349,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, IPPROTO_UDP, NULL, &port); exp->helper = nf_conntrack_helper_ras; - if (nf_ct_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); nf_ct_dump_tuple(&exp->tuple); } else @@ -1561,7 +1561,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, exp->flags = NF_CT_EXPECT_PERMANENT; exp->helper = nf_conntrack_helper_q931; - if (nf_ct_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); nf_ct_dump_tuple(&exp->tuple); } else @@ -1615,7 +1615,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, exp->flags = NF_CT_EXPECT_PERMANENT; exp->helper = nf_conntrack_helper_q931; - if (nf_ct_expect_related(exp) == 0) { + if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); nf_ct_dump_tuple(&exp->tuple); } else diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 7ac156f1f3bc..e40988a2f22f 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -213,7 +213,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); - else if (nf_ct_expect_related(exp) != 0) { + else if (nf_ct_expect_related(exp, 0) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1b77444d5b52..6aa01eb6fe99 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2616,7 +2616,7 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (IS_ERR(exp)) return PTR_ERR(exp); - err = nf_ct_expect_related_report(exp, portid, report); + err = nf_ct_expect_related_report(exp, portid, report, 0); nf_ct_expect_put(exp); return err; } @@ -3367,7 +3367,7 @@ ctnetlink_create_expect(struct net *net, goto err_rcu; } - err = nf_ct_expect_related_report(exp, portid, report); + err = nf_ct_expect_related_report(exp, portid, report, 0); nf_ct_expect_put(exp); err_rcu: rcu_read_unlock(); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index b22042ad0fca..a971183f11af 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -234,9 +234,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre); if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK) nf_nat_pptp_exp_gre(exp_orig, exp_reply); - if (nf_ct_expect_related(exp_orig) != 0) + if (nf_ct_expect_related(exp_orig, 0) != 0) goto out_put_both; - if (nf_ct_expect_related(exp_reply) != 0) + if (nf_ct_expect_related(exp_reply, 0) != 0) goto out_unexpect_orig; /* Add GRE keymap entries */ diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index c2eb365f1723..5b05487a60d2 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * ip_conntrack_proto_gre.c - Version 3.0 - * * Connection tracking protocol helper module for GRE. * * GRE is a generic encapsulation protocol, which is generally not very diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index dd53e2b20f6b..097deba7441a 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -215,7 +215,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, return -NF_ACCEPT; } - /* See ip_conntrack_proto_tcp.c */ + /* See nf_conntrack_proto_tcp.c */ if (state->net->ct.sysctl_checksum && state->hook == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index d5fdfa00d683..85c1f8c213b0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -472,6 +472,7 @@ static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; + u16 win_raw; s32 receiver_offset; bool res, in_recv_win; @@ -480,7 +481,8 @@ static bool tcp_in_window(const struct nf_conn *ct, */ seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); - win = ntohs(tcph->window); + win_raw = ntohs(tcph->window); + win = win_raw; end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) @@ -655,14 +657,14 @@ static bool tcp_in_window(const struct nf_conn *ct, && state->last_seq == seq && state->last_ack == ack && state->last_end == end - && state->last_win == win) + && state->last_win == win_raw) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; - state->last_win = win; + state->last_win = win_raw; state->retrans = 0; } } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 81448c3db661..1aebd6569d4e 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -153,7 +153,7 @@ static int help(struct sk_buff *skb, nf_ct_dump_tuple(&exp->tuple); /* Can't expect this? Best to drop packet now. */ - if (nf_ct_expect_related(exp) != 0) { + if (nf_ct_expect_related(exp, 0) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 107251731809..b83dc9bf0a5d 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -977,11 +977,15 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, /* -EALREADY handling works around end-points that send * SDP messages with identical port but different media type, * we pretend expectation was set up. + * It also works in the case that SDP messages are sent with + * identical expect tuples but for different master conntracks. */ - int errp = nf_ct_expect_related(rtp_exp); + int errp = nf_ct_expect_related(rtp_exp, + NF_CT_EXP_F_SKIP_MASTER); if (errp == 0 || errp == -EALREADY) { - int errcp = nf_ct_expect_related(rtcp_exp); + int errcp = nf_ct_expect_related(rtcp_exp, + NF_CT_EXP_F_SKIP_MASTER); if (errcp == 0 || errcp == -EALREADY) ret = NF_ACCEPT; @@ -1296,7 +1300,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, ret = hooks->expect(skb, protoff, dataoff, dptr, datalen, exp, matchoff, matchlen); else { - if (nf_ct_expect_related(exp) != 0) { + if (nf_ct_expect_related(exp, 0) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; } else diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index df6d6d61bd58..80ee53f29f68 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -78,7 +78,7 @@ static int tftp_help(struct sk_buff *skb, nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) ret = nf_nat_tftp(skb, ctinfo, exp); - else if (nf_ct_expect_related(exp) != 0) { + else if (nf_ct_expect_related(exp, 0) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); ret = NF_DROP; } diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index a352604d6186..3bc7e0854efe 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -48,7 +48,7 @@ static unsigned int help(struct sk_buff *skb, int res; exp->tuple.dst.u.tcp.port = htons(port); - res = nf_ct_expect_related(exp); + res = nf_ct_expect_related(exp, 0); if (res == 0) break; else if (res != -EBUSY) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 9ab410455992..3f6023ed4966 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -519,7 +519,7 @@ another_round: * and NF_INET_LOCAL_OUT, we change the destination to map into the * range. It might not be possible to get a unique tuple, but we try. * At worst (or if we race), we will end up with a final duplicate in - * __ip_conntrack_confirm and drop the packet. */ + * __nf_conntrack_confirm and drop the packet. */ static void get_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig_tuple, diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index d48484a9d52d..aace6768a64e 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -91,7 +91,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, int ret; exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp); + ret = nf_ct_expect_related(exp, 0); if (ret == 0) break; else if (ret != -EBUSY) { diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index dfb7ef8845bd..c691ab8d234c 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff *skb, int ret; exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp); + ret = nf_ct_expect_related(exp, 0); if (ret == 0) break; else if (ret != -EBUSY) { diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index e338d91980d8..f0a735e86851 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -414,7 +414,7 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, int ret; exp->tuple.dst.u.udp.port = htons(port); - ret = nf_ct_expect_related(exp); + ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER); if (ret == 0) break; else if (ret != -EBUSY) { @@ -607,7 +607,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, int ret; rtp_exp->tuple.dst.u.udp.port = htons(port); - ret = nf_ct_expect_related(rtp_exp); + ret = nf_ct_expect_related(rtp_exp, + NF_CT_EXP_F_SKIP_MASTER); if (ret == -EBUSY) continue; else if (ret < 0) { @@ -615,7 +616,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, break; } rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); - ret = nf_ct_expect_related(rtcp_exp); + ret = nf_ct_expect_related(rtcp_exp, + NF_CT_EXP_F_SKIP_MASTER); if (ret == 0) break; else if (ret == -EBUSY) { diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c index 833a11f68031..1a591132d6eb 100644 --- a/net/netfilter/nf_nat_tftp.c +++ b/net/netfilter/nf_nat_tftp.c @@ -30,7 +30,7 @@ static unsigned int help(struct sk_buff *skb, = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; - if (nf_ct_expect_related(exp) != 0) { + if (nf_ct_expect_related(exp, 0) != 0) { nf_ct_helper_log(skb, exp->master, "cannot add expectation"); return NF_DROP; } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index b101f187eda8..c769462a839e 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -470,7 +470,7 @@ synproxy_send_client_synack(struct net *net, struct iphdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; - u16 mss = opts->mss; + u16 mss = opts->mss_encode; iph = ip_hdr(skb); @@ -687,7 +687,7 @@ ipv4_synproxy_hook(void *priv, struct sk_buff *skb, state = &ct->proto.tcp; switch (state->state) { case TCP_CONNTRACK_CLOSE: - if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq) + 1); break; @@ -884,7 +884,7 @@ synproxy_send_client_synack_ipv6(struct net *net, struct ipv6hdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; - u16 mss = opts->mss; + u16 mss = opts->mss_encode; iph = ipv6_hdr(skb); @@ -1111,7 +1111,7 @@ ipv6_synproxy_hook(void *priv, struct sk_buff *skb, state = &ct->proto.tcp; switch (state->state) { case TCP_CONNTRACK_CLOSE: - if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq) + 1); break; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ed17a7c29b86..605a7cfe7ca7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1662,7 +1662,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, chain->flags |= NFT_BASE_CHAIN | flags; basechain->policy = NF_ACCEPT; - INIT_LIST_HEAD(&basechain->cb_list); + flow_block_init(&basechain->flow_block); } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) @@ -1900,6 +1900,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nla[NFTA_CHAIN_FLAGS]) flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS])); + else if (chain) + flags = chain->flags; nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 2c3302845f67..64f5fd5f240e 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -116,7 +116,7 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain, struct flow_block_cb *block_cb; int err; - list_for_each_entry(block_cb, &basechain->cb_list, list) { + list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err < 0) return err; @@ -154,7 +154,7 @@ static int nft_flow_offload_rule(struct nft_trans *trans, static int nft_flow_offload_bind(struct flow_block_offload *bo, struct nft_base_chain *basechain) { - list_splice(&bo->cb_list, &basechain->cb_list); + list_splice(&bo->cb_list, &basechain->flow_block.cb_list); return 0; } @@ -198,6 +198,7 @@ static int nft_flow_offload_chain(struct nft_trans *trans, return -EOPNOTSUPP; bo.command = cmd; + bo.block = &basechain->flow_block; bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo.extack = &extack; INIT_LIST_HEAD(&bo.cb_list); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 92077d459109..4abbb452cf6c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -578,7 +578,7 @@ static int nfnetlink_bind(struct net *net, int group) ss = nfnetlink_get_subsys(type << 8); rcu_read_unlock(); if (!ss) - request_module("nfnetlink-subsys-%d", type); + request_module_nowait("nfnetlink-subsys-%d", type); return 0; } #endif diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 3fd540b2c6ba..b5d5d071d765 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -193,7 +193,7 @@ static inline void nft_chain_filter_inet_init(void) {} static inline void nft_chain_filter_inet_fini(void) {} #endif /* CONFIG_NF_TABLES_IPV6 */ -#ifdef CONFIG_NF_TABLES_BRIDGE +#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) static unsigned int nft_do_chain_bridge(void *priv, struct sk_buff *skb, diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index 2f89bde3c61c..ff9ac8ae0031 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -142,3 +142,6 @@ MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); #ifdef CONFIG_NF_TABLES_IPV6 MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); #endif +#ifdef CONFIG_NF_TABLES_INET +MODULE_ALIAS_NFT_CHAIN(1, "nat"); /* NFPROTO_INET */ +#endif diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 827ab6196df9..46ca8bcca1bd 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1252,7 +1252,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj, priv->l4proto, NULL, &priv->dport); exp->timeout.expires = jiffies + priv->timeout * HZ; - if (nf_ct_expect_related(exp) != 0) + if (nf_ct_expect_related(exp, 0) != 0) regs->verdict.code = NF_DROP; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index fe93e731dc7f..b836d550b919 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -129,7 +129,7 @@ static int nft_symhash_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); - if (priv->modulus <= 1) + if (priv->modulus < 1) return -ERANGE; if (priv->offset + priv->modulus - 1 < priv->offset) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 76866f77e343..f1b1d948c07b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -546,7 +546,7 @@ nft_meta_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) return ERR_PTR(-EINVAL); -#ifdef CONFIG_NF_TABLES_BRIDGE +#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META) if (ctx->family == NFPROTO_BRIDGE) return ERR_PTR(-EAGAIN); #endif diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 8487eeff5c0e..43eeb1f609f1 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -291,4 +291,4 @@ module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <[email protected]>"); -MODULE_ALIAS_NFT_EXPR("nat"); +MODULE_ALIAS_NFT_EXPR("redir"); diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 80060ade8a5b..928e661d1517 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -31,6 +31,8 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts, opts->options |= NF_SYNPROXY_OPT_ECN; opts->options &= priv->info.options; + opts->mss_encode = opts->mss; + opts->mss = info->mss; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, opts); else diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dca3b1e2acf0..bc89e16e0505 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -59,7 +59,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, const struct sk_buff *skb) { - struct flow_stats *stats; + struct sw_flow_stats *stats; unsigned int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); @@ -87,7 +87,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, if (likely(flow->stats_last_writer != -1) && likely(!rcu_access_pointer(flow->stats[cpu]))) { /* Try to allocate CPU-specific stats. */ - struct flow_stats *new_stats; + struct sw_flow_stats *new_stats; new_stats = kmem_cache_alloc_node(flow_stats_cache, @@ -134,7 +134,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow, /* We open code this to make sure cpu 0 is always considered */ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { - struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); + struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { /* Local CPU may write on non-local stats, so we must @@ -158,7 +158,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow) /* We open code this to make sure cpu 0 is always considered */ for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { - struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); + struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { spin_lock_bh(&stats->lock); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 3e2cc2202d66..a5506e2d4b7a 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -194,7 +194,7 @@ struct sw_flow_actions { struct nlattr actions[]; }; -struct flow_stats { +struct sw_flow_stats { u64 packet_count; /* Number of packets matched. */ u64 byte_count; /* Number of bytes matched. */ unsigned long used; /* Last used time (in jiffies). */ @@ -216,7 +216,7 @@ struct sw_flow { struct cpumask cpu_used_mask; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct flow_stats __rcu *stats[]; /* One for each CPU. First one + struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one * is allocated at flow creation time, * the rest are allocated on demand * while holding the 'stats[0].lock'. diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 988fd8a94e43..cf3582c5ed70 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -66,7 +66,7 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; - struct flow_stats *stats; + struct sw_flow_stats *stats; flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); if (!flow) @@ -110,7 +110,7 @@ static void flow_free(struct sw_flow *flow) for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, - (struct flow_stats __force *)flow->stats[cpu]); + (struct sw_flow_stats __force *)flow->stats[cpu]); kmem_cache_free(flow_cache, flow); } @@ -712,13 +712,13 @@ int ovs_flow_init(void) flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) + (nr_cpu_ids - * sizeof(struct flow_stats *)), + * sizeof(struct sw_flow_stats *)), 0, 0, NULL); if (flow_cache == NULL) return -ENOMEM; flow_stats_cache - = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), + = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats), 0, SLAB_HWCACHE_ALIGN, NULL); if (flow_stats_cache == NULL) { kmem_cache_destroy(flow_cache); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d144233423c5..efd3cfb80a2a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -691,6 +691,8 @@ static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, if (!indr_dev->block) return; + bo.block = &indr_dev->block->flow_block; + indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, &bo); tcf_block_setup(indr_dev->block, &bo); @@ -775,6 +777,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, .command = command, .binder_type = ei->binder_type, .net = dev_net(dev), + .block = &block->flow_block, .block_shared = tcf_block_shared(block), .extack = extack, }; @@ -810,6 +813,7 @@ static int tcf_block_offload_cmd(struct tcf_block *block, bo.net = dev_net(dev); bo.command = command; bo.binder_type = ei->binder_type; + bo.block = &block->flow_block; bo.block_shared = tcf_block_shared(block); bo.extack = extack; INIT_LIST_HEAD(&bo.cb_list); @@ -987,8 +991,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); - INIT_LIST_HEAD(&block->cb_list); INIT_LIST_HEAD(&block->owner_list); INIT_LIST_HEAD(&block->chain0.filter_chain_list); @@ -1514,7 +1518,7 @@ void tcf_block_put(struct tcf_block *block) EXPORT_SYMBOL(tcf_block_put); static int -tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, +tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, void *cb_priv, bool add, bool offload_in_use, struct netlink_ext_ack *extack) { @@ -1570,7 +1574,7 @@ static int tcf_block_bind(struct tcf_block *block, i++; } - list_splice(&bo->cb_list, &block->cb_list); + list_splice(&bo->cb_list, &block->flow_block.cb_list); return 0; @@ -2152,7 +2156,9 @@ replay: tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held); tfilter_put(tp, fh); - q->flags &= ~TCQ_F_CAN_BYPASS; + /* q pointer is NULL for shared blocks */ + if (q) + q->flags &= ~TCQ_F_CAN_BYPASS; } errout: @@ -3156,7 +3162,7 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, if (block->nooffloaddevcnt && err_stop) return -EOPNOTSUPP; - list_for_each_entry(block_cb, &block->cb_list, list) { + list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { if (err_stop) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 691f71830134..3f7a9c02b70c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -651,7 +651,7 @@ skip: } } -static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, +static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 38d6e85693fc..054123742e32 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1800,7 +1800,7 @@ fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add) return NULL; } -static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, +static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a30d2f8feb32..455ea2793f9b 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -282,7 +282,7 @@ skip: arg->count++; } -static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, +static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index be9e46c77e8b..8614088edd1b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1152,7 +1152,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, } static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, - bool add, tc_setup_cb_t *cb, void *cb_priv, + bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct tc_cls_u32_offload cls_u32 = {}; @@ -1172,7 +1172,7 @@ static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, } static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, - bool add, tc_setup_cb_t *cb, void *cb_priv, + bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); @@ -1213,7 +1213,7 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, return 0; } -static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, +static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index f345662890a6..ca8ac96d22a9 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -476,7 +476,7 @@ static void tipc_topsrv_accept(struct work_struct *work) } } -/* tipc_toprsv_listener_data_ready - interrupt callback with connection request +/* tipc_topsrv_listener_data_ready - interrupt callback with connection request * The queued job is launched into tipc_topsrv_accept() */ static void tipc_topsrv_listener_data_ready(struct sock *sk) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 73e80b917f12..77c742fa4fb1 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -125,11 +125,6 @@ CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) cc-option = $(call __cc-option, $(CC),\ $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS),$(1),$(2)) -# hostcc-option -# Usage: cflags-y += $(call hostcc-option,-march=winchip-c6,-march=i586) -hostcc-option = $(call __cc-option, $(HOSTCC),\ - $(KBUILD_HOSTCFLAGS) $(HOST_EXTRACFLAGS),$(1),$(2)) - # cc-option-yn # Usage: flag := $(call cc-option-yn,-march=winchip-c6) cc-option-yn = $(call try-run,\ diff --git a/scripts/Makefile.build b/scripts/Makefile.build index be38198d98b2..0d434d0afc0b 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -63,14 +63,14 @@ ifneq ($(strip $(real-obj-y) $(need-builtin)),) builtin-target := $(obj)/built-in.a endif -ifdef CONFIG_MODULES +ifeq ($(CONFIG_MODULES)$(need-modorder),y1) modorder-target := $(obj)/modules.order endif -# We keep a list of all modules in $(MODVERDIR) +mod-targets := $(patsubst %.o, %.mod, $(obj-m)) __build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \ - $(if $(KBUILD_MODULES),$(obj-m) $(modorder-target)) \ + $(if $(KBUILD_MODULES),$(obj-m) $(mod-targets) $(modorder-target)) \ $(subdir-ym) $(always) @: @@ -87,11 +87,6 @@ ifneq ($(KBUILD_ENABLE_EXTRA_GCC_CHECKS),) cmd_checkdoc = $(srctree)/scripts/kernel-doc -none $< endif -# Do section mismatch analysis for each module/built-in.a -ifdef CONFIG_DEBUG_SECTION_MISMATCH - cmd_secanalysis = ; scripts/mod/modpost $@ -endif - # Compile C sources (.c) # --------------------------------------------------------------------------- @@ -268,7 +263,7 @@ endef # List module undefined symbols (or empty line if not enabled) ifdef CONFIG_TRIM_UNUSED_KSYMS -cmd_undef_syms = $(NM) $@ | sed -n 's/^ *U //p' | xargs echo +cmd_undef_syms = $(NM) $< | sed -n 's/^ *U //p' | xargs echo else cmd_undef_syms = echo endif @@ -278,13 +273,15 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) -# Single-part modules are special since we need to mark them in $(MODVERDIR) +cmd_mod = { \ + echo $(if $($*-objs)$($*-y)$($*-m), $(addprefix $(obj)/, $($*-objs) $($*-y) $($*-m)), $(@:.mod=.o)); \ + $(cmd_undef_syms); \ + } > $@ -$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE - $(call cmd,force_checksrc) - $(call if_changed_rule,cc_o_c) - @{ echo $(@:.o=.ko); echo $@; \ - $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod) +$(obj)/%.mod: $(obj)/%.o FORCE + $(call if_changed,mod) + +targets += $(mod-targets) quiet_cmd_cc_lst_c = MKLST $@ cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \ @@ -294,7 +291,7 @@ quiet_cmd_cc_lst_c = MKLST $@ $(obj)/%.lst: $(src)/%.c FORCE $(call if_changed_dep,cc_lst_c) -# header test (header-test-y target) +# header test (header-test-y, header-test-m target) # --------------------------------------------------------------------------- quiet_cmd_cc_s_h = CC $@ @@ -423,13 +420,10 @@ endif # builtin-target # # Create commands to either record .ko file or cat modules.order from # a subdirectory -modorder-cmds = \ - $(foreach m, $(modorder), \ - $(if $(filter %/modules.order, $m), \ - cat $m;, echo kernel/$m;)) - $(modorder-target): $(subdir-ym) FORCE - $(Q)(cat /dev/null; $(modorder-cmds)) > $@ + $(Q){ $(foreach m, $(modorder), \ + $(if $(filter %/modules.order, $m), cat $m, echo $m);) :; } \ + | $(AWK) '!x[$$0]++' - > $@ # # Rule to compile a set of .o files into one .a file (with symbol table) @@ -464,12 +458,10 @@ endif # module is turned into a multi object module, $^ will contain header file # dependencies recorded in the .*.cmd file. quiet_cmd_link_multi-m = LD [M] $@ -cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) $(cmd_secanalysis) + cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) $(multi-used-m): FORCE $(call if_changed,link_multi-m) - @{ echo $(@:.o=.ko); echo $(filter %.o,$^); \ - $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod) $(call multi_depend, $(multi-used-m), .o, -objs -y -m) targets += $(multi-used-m) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 6cb3aa5cbc79..5241d0751eb0 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -78,7 +78,7 @@ header-test-y += $(filter-out $(header-test-), \ $(wildcard $(addprefix $(srctree)/$(src)/, \ $(header-test-pattern-y))))) -extra-$(CONFIG_HEADER_TEST) += $(addsuffix .s, $(header-test-y)) +extra-$(CONFIG_HEADER_TEST) += $(addsuffix .s, $(header-test-y) $(header-test-m)) # Add subdir path diff --git a/scripts/Makefile.modbuiltin b/scripts/Makefile.modbuiltin index 50a9990760f3..7d4711b88656 100644 --- a/scripts/Makefile.modbuiltin +++ b/scripts/Makefile.modbuiltin @@ -40,7 +40,7 @@ __modbuiltin: $(modbuiltin-target) $(subdir-ym) @: $(modbuiltin-target): $(subdir-ym) FORCE - $(Q)(for m in $(modbuiltin-mods); do echo kernel/$$m; done; \ + $(Q)(for m in $(modbuiltin-mods); do echo $$m; done; \ cat /dev/null $(modbuiltin-subdirs)) > $@ PHONY += FORCE diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 0dae402661f3..5a4579e76485 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -8,10 +8,7 @@ __modinst: include scripts/Kbuild.include -# - -__modules := $(sort $(shell grep -h '\.ko$$' /dev/null $(wildcard $(MODVERDIR)/*.mod))) -modules := $(patsubst %.o,%.ko,$(wildcard $(__modules:.ko=.o))) +modules := $(sort $(shell cat $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order)) PHONY += $(modules) __modinst: $(modules) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index fec6ec2ffa47..6b19c1a4eae5 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -6,11 +6,12 @@ # Stage one of module building created the following: # a) The individual .o files used for the module # b) A <module>.o file which is the .o files above linked together -# c) A <module>.mod file in $(MODVERDIR)/, listing the name of the -# the preliminary <module>.o file, plus all .o files +# c) A <module>.mod file, listing the name of the preliminary <module>.o file, +# plus all .o files +# d) modules.order, which lists all the modules # Stage 2 is handled by this file and does the following -# 1) Find all modules from the files listed in $(MODVERDIR)/ +# 1) Find all modules listed in modules.order # 2) modpost is then used to # 3) create one <module>.mod.c file pr. module # 4) create one Module.symvers file with CRC for all exported symbols @@ -60,10 +61,12 @@ include scripts/Makefile.lib kernelsymfile := $(objtree)/Module.symvers modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers -# Step 1), find all modules listed in $(MODVERDIR)/ -MODLISTCMD := find $(MODVERDIR) -name '*.mod' | xargs -r grep -h '\.ko$$' | sort -u -__modules := $(shell $(MODLISTCMD)) -modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) +modorder := $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order + +# Step 1), find all modules listed in modules.order +ifdef CONFIG_MODULES +modules := $(sort $(shell cat $(modorder))) +endif # Stop after building .o files if NOFINAL is set. Makes compile tests quicker _modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules)) @@ -84,7 +87,7 @@ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) # We can go over command line length here, so be careful. quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules - cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T - + cmd_modpost = sed 's/ko$$/o/' $(modorder) | $(modpost) $(MODPOST_OPT) -s -T - PHONY += __modpost __modpost: $(modules:.ko=.o) FORCE diff --git a/scripts/Makefile.modsign b/scripts/Makefile.modsign index da56aa78d245..d7325cefe709 100644 --- a/scripts/Makefile.modsign +++ b/scripts/Makefile.modsign @@ -8,8 +8,7 @@ __modsign: include scripts/Kbuild.include -__modules := $(sort $(shell grep -h '\.ko$$' /dev/null $(wildcard $(MODVERDIR)/*.mod))) -modules := $(patsubst %.o,%.ko,$(wildcard $(__modules:.ko=.o))) +modules := $(sort $(shell cat modules.order)) PHONY += $(modules) __modsign: $(modules) diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh index aab4e299d7a2..a904bf1f5e67 100755 --- a/scripts/adjust_autoksyms.sh +++ b/scripts/adjust_autoksyms.sh @@ -8,8 +8,7 @@ # # Create/update the include/generated/autoksyms.h file from the list -# of all module's needed symbols as recorded on the third line of -# .tmp_versions/*.mod files. +# of all module's needed symbols as recorded on the second line of *.mod files. # # For each symbol being added or removed, the corresponding dependency # file's timestamp is updated to force a rebuild of the affected source @@ -47,13 +46,10 @@ cat > "$new_ksyms_file" << EOT */ EOT -[ "$(ls -A "$MODVERDIR")" ] && -for mod in "$MODVERDIR"/*.mod; do - sed -n -e '3{s/ /\n/g;/^$/!p;}' "$mod" -done | sort -u | -while read sym; do - echo "#define __KSYM_${sym} 1" -done >> "$new_ksyms_file" +sed 's/ko$/mod/' modules.order | +xargs -n1 sed -n -e '2{s/ /\n/g;/^$/!p;}' -- | +sort -u | +sed -e 's/\(.*\)/#define __KSYM_\1 1/' >> "$new_ksyms_file" # Special case for modversions (see modpost.c) if [ -n "$CONFIG_MODVERSIONS" ]; then diff --git a/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci new file mode 100644 index 000000000000..56a2e261d61d --- /dev/null +++ b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/// Use devm_platform_ioremap_resource helper which wraps +/// platform_get_resource() and devm_ioremap_resource() together. +/// +// Confidence: High +// Copyright: (C) 2019 Himanshu Jha GPLv2. +// Copyright: (C) 2019 Julia Lawall, Inria/LIP6. GPLv2. +// Keywords: platform_get_resource, devm_ioremap_resource, +// Keywords: devm_platform_ioremap_resource + +virtual patch +virtual report + +@r depends on patch && !report@ +expression e1, e2, arg1, arg2, arg3; +identifier id; +@@ + +( +- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2); +| +- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2); +) + ... when != id +- e1 = devm_ioremap_resource(arg3, id); ++ e1 = devm_platform_ioremap_resource(arg1, arg2); + ... when != id +? id = e2 + +@r1 depends on patch && !report@ +identifier r.id; +type T; +@@ + +- T *id; + ...when != id + +@r2 depends on report && !patch@ +identifier id; +expression e1, e2, arg1, arg2, arg3; +position j0; +@@ + +( + id = platform_get_resource(arg1, IORESOURCE_MEM, arg2); +| + struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2); +) + ... when != id + e1@j0 = devm_ioremap_resource(arg3, id); + ... when != id +? id = e2 + +@script:python depends on report && !patch@ +e1 << r2.e1; +j0 << r2.j0; +@@ + +msg = "WARNING: Use devm_platform_ioremap_resource for %s" % (e1) +coccilib.report.print_report(j0[0], msg) diff --git a/scripts/export_report.pl b/scripts/export_report.pl index 0f604f62f067..7d3030d03a25 100755 --- a/scripts/export_report.pl +++ b/scripts/export_report.pl @@ -52,13 +52,12 @@ sub usage { sub collectcfiles { my @file; - while (<.tmp_versions/*.mod>) { - open my $fh, '<', $_ or die "cannot open $_: $!\n"; - push (@file, - grep s/\.ko/.mod.c/, # change the suffix - grep m/.+\.ko/, # find the .ko path - <$fh>); # lines in opened file + open my $fh, '< modules.order' or die "cannot open modules.order: $!\n"; + while (<$fh>) { + s/\.ko$/.mod.c/; + push (@file, $_) } + close($fh); chomp @file; return @file; } diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index ab30fe724c43..7656e1137b6b 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -94,7 +94,7 @@ configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/c %.config: $(obj)/conf $(if $(call configfiles),, $(error No configuration exists for this target on this architecture)) $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles) - +$(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig + $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig PHONY += kvmconfig kvmconfig: kvm_guest.config diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 501fdcc5e999..1134892599da 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -895,7 +895,8 @@ int conf_write(const char *name) "# %s\n" "#\n", str); need_newline = false; - } else if (!(sym->flags & SYMBOL_CHOICE)) { + } else if (!(sym->flags & SYMBOL_CHOICE) && + !(sym->flags & SYMBOL_WRITTEN)) { sym_calc_value(sym); if (!(sym->flags & SYMBOL_WRITE)) goto next; @@ -903,7 +904,7 @@ int conf_write(const char *name) fprintf(out, "\n"); need_newline = false; } - sym->flags &= ~SYMBOL_WRITE; + sym->flags |= SYMBOL_WRITTEN; conf_write_symbol(out, sym, &kconfig_printer_cb, NULL); } @@ -1063,8 +1064,6 @@ int conf_write_autoconf(int overwrite) if (!overwrite && is_present(autoconf_name)) return 0; - sym_clear_all_valid(); - conf_write_dep("include/config/auto.conf.cmd"); if (conf_touch_deps()) diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h index 8dde65bc3165..017843c9a4f4 100644 --- a/scripts/kconfig/expr.h +++ b/scripts/kconfig/expr.h @@ -141,6 +141,7 @@ struct symbol { #define SYMBOL_OPTIONAL 0x0100 /* choice is optional - values can be 'n' */ #define SYMBOL_WRITE 0x0200 /* write symbol to file (KCONFIG_CONFIG) */ #define SYMBOL_CHANGED 0x0400 /* ? */ +#define SYMBOL_WRITTEN 0x0800 /* track info to avoid double-write to .config */ #define SYMBOL_NO_WRITE 0x1000 /* Symbol for internal use only; it will not be written */ #define SYMBOL_CHECKED 0x2000 /* used during dependency checking */ #define SYMBOL_WARNED 0x8000 /* warning has been issued */ diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 0f6dcb4011a8..63062024ce0e 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -396,34 +396,19 @@ void get_src_version(const char *modname, char sum[], unsigned sumlen) unsigned long len; struct md4_ctx md; char *sources, *end, *fname; - const char *basename; char filelist[PATH_MAX + 1]; - char *modverdir = getenv("MODVERDIR"); - if (!modverdir) - modverdir = "."; - - /* Source files for module are in .tmp_versions/modname.mod, - after the first line. */ - if (strrchr(modname, '/')) - basename = strrchr(modname, '/') + 1; - else - basename = modname; - snprintf(filelist, sizeof(filelist), "%s/%.*s.mod", modverdir, - (int) strlen(basename) - 2, basename); + /* objects for a module are listed in the first line of *.mod file. */ + snprintf(filelist, sizeof(filelist), "%.*smod", + (int)strlen(modname) - 1, modname); file = grab_file(filelist, &len); if (!file) /* not a module or .mod file missing - ignore */ return; - sources = strchr(file, '\n'); - if (!sources) { - warn("malformed versions file for %s\n", modname); - goto release; - } + sources = file; - sources++; end = strchr(sources, '\n'); if (!end) { warn("bad ending versions file for %s\n", modname); diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh index 39e8cb36ba19..f51f446707b8 100755 --- a/scripts/modules-check.sh +++ b/scripts/modules-check.sh @@ -9,7 +9,7 @@ check_same_name_modules() for m in $(sed 's:.*/::' modules.order | sort | uniq -d) do echo "warning: same module names found:" >&2 - sed -n "/\/$m/s:^kernel/: :p" modules.order >&2 + sed -n "/\/$m/s:^: :p" modules.order >&2 done } diff --git a/scripts/package/builddeb b/scripts/package/builddeb index e8ca6dc97e96..c4c580f547ef 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -132,6 +132,11 @@ fi if [ "$ARCH" != "um" ]; then $MAKE -f $srctree/Makefile headers $MAKE -f $srctree/Makefile headers_install INSTALL_HDR_PATH="$libc_headers_dir/usr" + # move asm headers to /usr/include/<libc-machine>/asm to match the structure + # used by Debian-based distros (to support multi-arch) + host_arch=$(dpkg-architecture -a$(cat debian/arch) -qDEB_HOST_MULTIARCH) + mkdir $libc_headers_dir/usr/include/$host_arch + mv $libc_headers_dir/usr/include/asm $libc_headers_dir/usr/include/$host_arch/ fi # Install the maintainer scripts diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 8351584cb24e..e0750b70453f 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -197,6 +197,7 @@ Architecture: $debarch Description: Linux support headers for userspace development This package provides userspaces headers from the Linux kernel. These headers are used by the installed headers for GNU glibc and other system libraries. +Multi-Arch: same Package: $dbg_packagename Section: debug diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 2d29df4a0a53..8640c278f1aa 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -29,7 +29,7 @@ fi PROVIDES="$PROVIDES kernel-$KERNELRELEASE" __KERNELRELEASE=$(echo $KERNELRELEASE | sed -e "s/-/_/g") -EXCLUDES="$RCS_TAR_IGNORE --exclude=.tmp_versions --exclude=*vmlinux* \ +EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \ --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \ --exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s" diff --git a/sound/ac97/bus.c b/sound/ac97/bus.c index 7b977b753a03..7985dd8198b6 100644 --- a/sound/ac97/bus.c +++ b/sound/ac97/bus.c @@ -122,17 +122,12 @@ static int ac97_codec_add(struct ac97_controller *ac97_ctrl, int idx, vendor_id); ret = device_add(&codec->dev); - if (ret) - goto err_free_codec; + if (ret) { + put_device(&codec->dev); + return ret; + } return 0; -err_free_codec: - of_node_put(codec->dev.of_node); - put_device(&codec->dev); - kfree(codec); - ac97_ctrl->codecs[idx] = NULL; - - return ret; } unsigned int snd_ac97_bus_scan_one(struct ac97_controller *adrv, diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 99b882158705..41905afada63 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -574,10 +574,7 @@ snd_compr_set_params(struct snd_compr_stream *stream, unsigned long arg) stream->metadata_set = false; stream->next_track = false; - if (stream->direction == SND_COMPRESS_PLAYBACK) - stream->runtime->state = SNDRV_PCM_STATE_SETUP; - else - stream->runtime->state = SNDRV_PCM_STATE_PREPARED; + stream->runtime->state = SNDRV_PCM_STATE_SETUP; } else { return -EPERM; } @@ -693,8 +690,17 @@ static int snd_compr_start(struct snd_compr_stream *stream) { int retval; - if (stream->runtime->state != SNDRV_PCM_STATE_PREPARED) + switch (stream->runtime->state) { + case SNDRV_PCM_STATE_SETUP: + if (stream->direction != SND_COMPRESS_CAPTURE) + return -EPERM; + break; + case SNDRV_PCM_STATE_PREPARED: + break; + default: return -EPERM; + } + retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_START); if (!retval) stream->runtime->state = SNDRV_PCM_STATE_RUNNING; @@ -705,9 +711,15 @@ static int snd_compr_stop(struct snd_compr_stream *stream) { int retval; - if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED || - stream->runtime->state == SNDRV_PCM_STATE_SETUP) + switch (stream->runtime->state) { + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_PREPARED: return -EPERM; + default: + break; + } + retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_STOP); if (!retval) { snd_compr_drain_notify(stream); @@ -795,9 +807,17 @@ static int snd_compr_drain(struct snd_compr_stream *stream) { int retval; - if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED || - stream->runtime->state == SNDRV_PCM_STATE_SETUP) + switch (stream->runtime->state) { + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_PREPARED: + case SNDRV_PCM_STATE_PAUSED: return -EPERM; + case SNDRV_PCM_STATE_XRUN: + return -EPIPE; + default: + break; + } retval = stream->ops->trigger(stream, SND_COMPR_TRIGGER_DRAIN); if (retval) { @@ -817,6 +837,10 @@ static int snd_compr_next_track(struct snd_compr_stream *stream) if (stream->runtime->state != SNDRV_PCM_STATE_RUNNING) return -EPERM; + /* next track doesn't have any meaning for capture streams */ + if (stream->direction == SND_COMPRESS_CAPTURE) + return -EPERM; + /* you can signal next track if this is intended to be a gapless stream * and current track metadata is set */ @@ -834,9 +858,23 @@ static int snd_compr_next_track(struct snd_compr_stream *stream) static int snd_compr_partial_drain(struct snd_compr_stream *stream) { int retval; - if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED || - stream->runtime->state == SNDRV_PCM_STATE_SETUP) + + switch (stream->runtime->state) { + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_PREPARED: + case SNDRV_PCM_STATE_PAUSED: + return -EPERM; + case SNDRV_PCM_STATE_XRUN: + return -EPIPE; + default: + break; + } + + /* partial drain doesn't have any meaning for capture streams */ + if (stream->direction == SND_COMPRESS_CAPTURE) return -EPERM; + /* stream can be drained only when next track has been signalled */ if (stream->next_track == false) return -EPERM; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 860543a4c840..12dd9b318db1 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -77,7 +77,7 @@ void snd_pcm_group_init(struct snd_pcm_group *group) spin_lock_init(&group->lock); mutex_init(&group->mutex); INIT_LIST_HEAD(&group->substreams); - refcount_set(&group->refs, 0); + refcount_set(&group->refs, 1); } /* define group lock helpers */ @@ -1096,8 +1096,7 @@ static void snd_pcm_group_unref(struct snd_pcm_group *group, if (!group) return; - do_free = refcount_dec_and_test(&group->refs) && - list_empty(&group->substreams); + do_free = refcount_dec_and_test(&group->refs); snd_pcm_group_unlock(group, substream->pcm->nonatomic); if (do_free) kfree(group); @@ -2020,6 +2019,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) snd_pcm_group_lock_irq(target_group, nonatomic); snd_pcm_stream_lock(substream1); snd_pcm_group_assign(substream1, target_group); + refcount_inc(&target_group->refs); snd_pcm_stream_unlock(substream1); snd_pcm_group_unlock_irq(target_group, nonatomic); _end: @@ -2056,13 +2056,14 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream) snd_pcm_group_lock_irq(group, nonatomic); relink_to_local(substream); + refcount_dec(&group->refs); /* detach the last stream, too */ if (list_is_singular(&group->substreams)) { relink_to_local(list_first_entry(&group->substreams, struct snd_pcm_substream, link_list)); - do_free = !refcount_read(&group->refs); + do_free = refcount_dec_and_test(&group->refs); } snd_pcm_group_unlock_irq(group, nonatomic); diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e30e86ca6b72..51f10ed9bc43 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2942,7 +2942,7 @@ static int hda_codec_runtime_resume(struct device *dev) static int hda_codec_force_resume(struct device *dev) { struct hda_codec *codec = dev_to_hda_codec(dev); - bool forced_resume = !codec->relaxed_resume; + bool forced_resume = !codec->relaxed_resume && codec->jacktbl.used; int ret; /* The get/put pair below enforces the runtime resume even if the diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index cb8b0945547c..1e14d7270adf 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -313,11 +313,10 @@ enum { #define AZX_DCAPS_INTEL_SKYLAKE \ (AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME |\ + AZX_DCAPS_SYNC_WRITE |\ AZX_DCAPS_SEPARATE_STREAM_TAG | AZX_DCAPS_I915_COMPONENT) -#define AZX_DCAPS_INTEL_BROXTON \ - (AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME |\ - AZX_DCAPS_SEPARATE_STREAM_TAG | AZX_DCAPS_I915_COMPONENT) +#define AZX_DCAPS_INTEL_BROXTON AZX_DCAPS_INTEL_SKYLAKE /* quirks for ATI SB / AMD Hudson */ #define AZX_DCAPS_PRESET_ATI_SB \ diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 4f8d0845ee1e..f299f137eaea 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1083,6 +1083,7 @@ static int patch_conexant_auto(struct hda_codec *codec) */ static const struct hda_device_id snd_hda_id_conexant[] = { + HDA_CODEC_ENTRY(0x14f11f86, "CX8070", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto), diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index f0662bd4e50f..27bf61c177c0 100644 --- a/sound/usb/line6/podhd.c +++ b/sound/usb/line6/podhd.c @@ -368,7 +368,7 @@ static const struct line6_properties podhd_properties_table[] = { .name = "POD HD500", .capabilities = LINE6_CAP_PCM | LINE6_CAP_HWMON, - .altsetting = 1, + .altsetting = 0, .ep_ctrl_r = 0x81, .ep_ctrl_w = 0x01, .ep_audio_r = 0x86, diff --git a/sound/usb/line6/variax.c b/sound/usb/line6/variax.c index 0d24c72c155f..ed158f04de80 100644 --- a/sound/usb/line6/variax.c +++ b/sound/usb/line6/variax.c @@ -244,5 +244,5 @@ static struct usb_driver variax_driver = { module_usb_driver(variax_driver); -MODULE_DESCRIPTION("Vairax Workbench USB driver"); +MODULE_DESCRIPTION("Variax Workbench USB driver"); MODULE_LICENSE("GPL"); diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index c2152f3dd02d..e7c67be7c15f 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -116,7 +116,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. - * For ARM: See Documentation/virtual/kvm/api.txt + * For ARM: See Documentation/virt/kvm/api.txt */ union { __u32 irq; @@ -1085,7 +1085,7 @@ struct kvm_xen_hvm_config { * * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies * the irqfd to operate in resampling mode for level triggered interrupt - * emulation. See Documentation/virtual/kvm/api.txt. + * emulation. See Documentation/virt/kvm/api.txt. */ #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index 580e344db3dd..ced3765c4f44 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -11,22 +11,24 @@ #include "elf.h" #include "cfi.h" -#define INSN_JUMP_CONDITIONAL 1 -#define INSN_JUMP_UNCONDITIONAL 2 -#define INSN_JUMP_DYNAMIC 3 -#define INSN_CALL 4 -#define INSN_CALL_DYNAMIC 5 -#define INSN_RETURN 6 -#define INSN_CONTEXT_SWITCH 7 -#define INSN_STACK 8 -#define INSN_BUG 9 -#define INSN_NOP 10 -#define INSN_STAC 11 -#define INSN_CLAC 12 -#define INSN_STD 13 -#define INSN_CLD 14 -#define INSN_OTHER 15 -#define INSN_LAST INSN_OTHER +enum insn_type { + INSN_JUMP_CONDITIONAL, + INSN_JUMP_UNCONDITIONAL, + INSN_JUMP_DYNAMIC, + INSN_JUMP_DYNAMIC_CONDITIONAL, + INSN_CALL, + INSN_CALL_DYNAMIC, + INSN_RETURN, + INSN_CONTEXT_SWITCH, + INSN_STACK, + INSN_BUG, + INSN_NOP, + INSN_STAC, + INSN_CLAC, + INSN_STD, + INSN_CLD, + INSN_OTHER, +}; enum op_dest_type { OP_DEST_REG, @@ -68,7 +70,7 @@ void arch_initial_func_cfi_state(struct cfi_state *state); int arch_decode_instruction(struct elf *elf, struct section *sec, unsigned long offset, unsigned int maxlen, - unsigned int *len, unsigned char *type, + unsigned int *len, enum insn_type *type, unsigned long *immediate, struct stack_op *op); bool arch_callee_saved_reg(unsigned char reg); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 584568f27a83..0567c47a91b1 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -68,7 +68,7 @@ bool arch_callee_saved_reg(unsigned char reg) int arch_decode_instruction(struct elf *elf, struct section *sec, unsigned long offset, unsigned int maxlen, - unsigned int *len, unsigned char *type, + unsigned int *len, enum insn_type *type, unsigned long *immediate, struct stack_op *op) { struct insn insn; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 172f99195726..5f26620f13f5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -18,6 +18,8 @@ #define FAKE_JUMP_OFFSET -1 +#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" + struct alternative { struct list_head list; struct instruction *insn; @@ -95,6 +97,20 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, for (insn = next_insn_same_sec(file, insn); insn; \ insn = next_insn_same_sec(file, insn)) +static bool is_sibling_call(struct instruction *insn) +{ + /* An indirect jump is either a sibling call or a jump to a table. */ + if (insn->type == INSN_JUMP_DYNAMIC) + return list_empty(&insn->alts); + + if (insn->type != INSN_JUMP_CONDITIONAL && + insn->type != INSN_JUMP_UNCONDITIONAL) + return false; + + /* add_jump_destinations() sets insn->call_dest for sibling calls. */ + return !!insn->call_dest; +} + /* * This checks to see if the given function is a "noreturn" function. * @@ -103,14 +119,9 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, * * For local functions, we have to detect them manually by simply looking for * the lack of a return instruction. - * - * Returns: - * -1: error - * 0: no dead end - * 1: dead end */ -static int __dead_end_function(struct objtool_file *file, struct symbol *func, - int recursion) +static bool __dead_end_function(struct objtool_file *file, struct symbol *func, + int recursion) { int i; struct instruction *insn; @@ -136,30 +147,33 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "rewind_stack_do_exit", }; + if (!func) + return false; + if (func->bind == STB_WEAK) - return 0; + return false; if (func->bind == STB_GLOBAL) for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) if (!strcmp(func->name, global_noreturns[i])) - return 1; + return true; if (!func->len) - return 0; + return false; insn = find_insn(file, func->sec, func->offset); if (!insn->func) - return 0; + return false; func_for_each_insn_all(file, func, insn) { empty = false; if (insn->type == INSN_RETURN) - return 0; + return false; } if (empty) - return 0; + return false; /* * A function can have a sibling call instead of a return. In that @@ -167,40 +181,31 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, * of the sibling call returns. */ func_for_each_insn_all(file, func, insn) { - if (insn->type == INSN_JUMP_UNCONDITIONAL) { + if (is_sibling_call(insn)) { struct instruction *dest = insn->jump_dest; if (!dest) /* sibling call to another file */ - return 0; - - if (dest->func && dest->func->pfunc != insn->func->pfunc) { + return false; - /* local sibling call */ - if (recursion == 5) { - /* - * Infinite recursion: two functions - * have sibling calls to each other. - * This is a very rare case. It means - * they aren't dead ends. - */ - return 0; - } - - return __dead_end_function(file, dest->func, - recursion + 1); + /* local sibling call */ + if (recursion == 5) { + /* + * Infinite recursion: two functions have + * sibling calls to each other. This is a very + * rare case. It means they aren't dead ends. + */ + return false; } - } - if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts)) - /* sibling call */ - return 0; + return __dead_end_function(file, dest->func, recursion+1); + } } - return 1; + return true; } -static int dead_end_function(struct objtool_file *file, struct symbol *func) +static bool dead_end_function(struct objtool_file *file, struct symbol *func) { return __dead_end_function(file, func, 0); } @@ -262,19 +267,12 @@ static int decode_instructions(struct objtool_file *file) if (ret) goto err; - if (!insn->type || insn->type > INSN_LAST) { - WARN_FUNC("invalid instruction type %d", - insn->sec, insn->offset, insn->type); - ret = -1; - goto err; - } - hash_add(file->insn_hash, &insn->hash, insn->offset); list_add_tail(&insn->list, &file->insn_list); } list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) + if (func->type != STT_FUNC || func->alias != func) continue; if (!find_insn(file, sec, func->offset)) { @@ -284,8 +282,7 @@ static int decode_instructions(struct objtool_file *file) } func_for_each_insn(file, func, insn) - if (!insn->func) - insn->func = func; + insn->func = func; } } @@ -488,6 +485,7 @@ static const char *uaccess_safe_builtin[] = { /* misc */ "csum_partial_copy_generic", "__memcpy_mcsafe", + "mcsafe_handle_tail", "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ NULL }; @@ -505,7 +503,7 @@ static void add_uaccess_safe(struct objtool_file *file) if (!func) continue; - func->alias->uaccess_safe = true; + func->uaccess_safe = true; } } @@ -577,13 +575,16 @@ static int add_jump_destinations(struct objtool_file *file) * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. */ - insn->type = INSN_JUMP_DYNAMIC; + if (insn->type == INSN_JUMP_UNCONDITIONAL) + insn->type = INSN_JUMP_DYNAMIC; + else + insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + insn->retpoline_safe = true; continue; } else { - /* sibling call */ + /* external sibling call */ insn->call_dest = rela->sym; - insn->jump_dest = NULL; continue; } @@ -623,7 +624,7 @@ static int add_jump_destinations(struct objtool_file *file) * However this code can't completely replace the * read_symbols() code because this doesn't detect the * case where the parent function's only reference to a - * subfunction is through a switch table. + * subfunction is through a jump table. */ if (!strstr(insn->func->name, ".cold.") && strstr(insn->jump_dest->func->name, ".cold.")) { @@ -633,9 +634,8 @@ static int add_jump_destinations(struct objtool_file *file) } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && insn->jump_dest->offset == insn->jump_dest->func->offset) { - /* sibling class */ + /* internal sibling call */ insn->call_dest = insn->jump_dest->func; - insn->jump_dest = NULL; } } } @@ -896,20 +896,26 @@ out: return ret; } -static int add_switch_table(struct objtool_file *file, struct instruction *insn, - struct rela *table, struct rela *next_table) +static int add_jump_table(struct objtool_file *file, struct instruction *insn, + struct rela *table) { struct rela *rela = table; - struct instruction *alt_insn; + struct instruction *dest_insn; struct alternative *alt; struct symbol *pfunc = insn->func->pfunc; unsigned int prev_offset = 0; - list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) { - if (rela == next_table) + /* + * Each @rela is a switch table relocation which points to the target + * instruction. + */ + list_for_each_entry_from(rela, &table->sec->rela_list, list) { + + /* Check for the end of the table: */ + if (rela != table && rela->jump_table_start) break; - /* Make sure the switch table entries are consecutive: */ + /* Make sure the table entries are consecutive: */ if (prev_offset && rela->offset != prev_offset + 8) break; @@ -918,12 +924,12 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, rela->addend == pfunc->offset) break; - alt_insn = find_insn(file, rela->sym->sec, rela->addend); - if (!alt_insn) + dest_insn = find_insn(file, rela->sym->sec, rela->addend); + if (!dest_insn) break; - /* Make sure the jmp dest is in the function or subfunction: */ - if (alt_insn->func->pfunc != pfunc) + /* Make sure the destination is in the same function: */ + if (!dest_insn->func || dest_insn->func->pfunc != pfunc) break; alt = malloc(sizeof(*alt)); @@ -932,7 +938,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, return -1; } - alt->insn = alt_insn; + alt->insn = dest_insn; list_add_tail(&alt->list, &insn->alts); prev_offset = rela->offset; } @@ -947,7 +953,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, } /* - * find_switch_table() - Given a dynamic jump, find the switch jump table in + * find_jump_table() - Given a dynamic jump, find the switch jump table in * .rodata associated with it. * * There are 3 basic patterns: @@ -989,13 +995,13 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, * * NOTE: RETPOLINE made it harder still to decode dynamic jumps. */ -static struct rela *find_switch_table(struct objtool_file *file, +static struct rela *find_jump_table(struct objtool_file *file, struct symbol *func, struct instruction *insn) { - struct rela *text_rela, *rodata_rela; + struct rela *text_rela, *table_rela; struct instruction *orig_insn = insn; - struct section *rodata_sec; + struct section *table_sec; unsigned long table_offset; /* @@ -1028,42 +1034,52 @@ static struct rela *find_switch_table(struct objtool_file *file, continue; table_offset = text_rela->addend; - rodata_sec = text_rela->sym->sec; + table_sec = text_rela->sym->sec; if (text_rela->type == R_X86_64_PC32) table_offset += 4; /* * Make sure the .rodata address isn't associated with a - * symbol. gcc jump tables are anonymous data. + * symbol. GCC jump tables are anonymous data. + * + * Also support C jump tables which are in the same format as + * switch jump tables. For objtool to recognize them, they + * need to be placed in the C_JUMP_TABLE_SECTION section. They + * have symbols associated with them. */ - if (find_symbol_containing(rodata_sec, table_offset)) + if (find_symbol_containing(table_sec, table_offset) && + strcmp(table_sec->name, C_JUMP_TABLE_SECTION)) continue; - rodata_rela = find_rela_by_dest(rodata_sec, table_offset); - if (rodata_rela) { - /* - * Use of RIP-relative switch jumps is quite rare, and - * indicates a rare GCC quirk/bug which can leave dead - * code behind. - */ - if (text_rela->type == R_X86_64_PC32) - file->ignore_unreachables = true; + /* Each table entry has a rela associated with it. */ + table_rela = find_rela_by_dest(table_sec, table_offset); + if (!table_rela) + continue; - return rodata_rela; - } + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead code + * behind. + */ + if (text_rela->type == R_X86_64_PC32) + file->ignore_unreachables = true; + + return table_rela; } return NULL; } - -static int add_func_switch_tables(struct objtool_file *file, - struct symbol *func) +/* + * First pass: Mark the head of each jump table so that in the next pass, + * we know when a given jump table ends and the next one starts. + */ +static void mark_func_jump_tables(struct objtool_file *file, + struct symbol *func) { - struct instruction *insn, *last = NULL, *prev_jump = NULL; - struct rela *rela, *prev_rela = NULL; - int ret; + struct instruction *insn, *last = NULL; + struct rela *rela; func_for_each_insn_all(file, func, insn) { if (!last) @@ -1071,7 +1087,7 @@ static int add_func_switch_tables(struct objtool_file *file, /* * Store back-pointers for unconditional forward jumps such - * that find_switch_table() can back-track using those and + * that find_jump_table() can back-track using those and * avoid some potentially confusing code. */ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && @@ -1086,27 +1102,25 @@ static int add_func_switch_tables(struct objtool_file *file, if (insn->type != INSN_JUMP_DYNAMIC) continue; - rela = find_switch_table(file, func, insn); - if (!rela) - continue; - - /* - * We found a switch table, but we don't know yet how big it - * is. Don't add it until we reach the end of the function or - * the beginning of another switch table in the same function. - */ - if (prev_jump) { - ret = add_switch_table(file, prev_jump, prev_rela, rela); - if (ret) - return ret; + rela = find_jump_table(file, func, insn); + if (rela) { + rela->jump_table_start = true; + insn->jump_table = rela; } - - prev_jump = insn; - prev_rela = rela; } +} + +static int add_func_jump_tables(struct objtool_file *file, + struct symbol *func) +{ + struct instruction *insn; + int ret; - if (prev_jump) { - ret = add_switch_table(file, prev_jump, prev_rela, NULL); + func_for_each_insn_all(file, func, insn) { + if (!insn->jump_table) + continue; + + ret = add_jump_table(file, insn, insn->jump_table); if (ret) return ret; } @@ -1119,7 +1133,7 @@ static int add_func_switch_tables(struct objtool_file *file, * section which contains a list of addresses within the function to jump to. * This finds these jump tables and adds them to the insn->alts lists. */ -static int add_switch_table_alts(struct objtool_file *file) +static int add_jump_table_alts(struct objtool_file *file) { struct section *sec; struct symbol *func; @@ -1133,7 +1147,8 @@ static int add_switch_table_alts(struct objtool_file *file) if (func->type != STT_FUNC) continue; - ret = add_func_switch_tables(file, func); + mark_func_jump_tables(file, func); + ret = add_func_jump_tables(file, func); if (ret) return ret; } @@ -1277,13 +1292,18 @@ static void mark_rodata(struct objtool_file *file) bool found = false; /* - * This searches for the .rodata section or multiple .rodata.func_name - * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8 - * rodata sections are ignored as they don't contain jump tables. + * Search for the following rodata sections, each of which can + * potentially contain jump tables: + * + * - .rodata: can contain GCC switch tables + * - .rodata.<func>: same, if -fdata-sections is being used + * - .rodata..c_jump_table: contains C annotated jump tables + * + * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if (!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) { + if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) || + !strcmp(sec->name, C_JUMP_TABLE_SECTION)) { sec->rodata = true; found = true; } @@ -1325,7 +1345,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = add_switch_table_alts(file); + ret = add_jump_table_alts(file); if (ret) return ret; @@ -1873,12 +1893,12 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state) static inline bool func_uaccess_safe(struct symbol *func) { if (func) - return func->alias->uaccess_safe; + return func->uaccess_safe; return false; } -static inline const char *insn_dest_name(struct instruction *insn) +static inline const char *call_dest_name(struct instruction *insn) { if (insn->call_dest) return insn->call_dest->name; @@ -1890,13 +1910,13 @@ static int validate_call(struct instruction *insn, struct insn_state *state) { if (state->uaccess && !func_uaccess_safe(insn->call_dest)) { WARN_FUNC("call to %s() with UACCESS enabled", - insn->sec, insn->offset, insn_dest_name(insn)); + insn->sec, insn->offset, call_dest_name(insn)); return 1; } if (state->df) { WARN_FUNC("call to %s() with DF set", - insn->sec, insn->offset, insn_dest_name(insn)); + insn->sec, insn->offset, call_dest_name(insn)); return 1; } @@ -1920,13 +1940,12 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st * each instruction and validate all the rules described in * tools/objtool/Documentation/stack-validation.txt. */ -static int validate_branch(struct objtool_file *file, struct instruction *first, - struct insn_state state) +static int validate_branch(struct objtool_file *file, struct symbol *func, + struct instruction *first, struct insn_state state) { struct alternative *alt; struct instruction *insn, *next_insn; struct section *sec; - struct symbol *func = NULL; int ret; insn = first; @@ -1947,9 +1966,6 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, return 1; } - if (insn->func) - func = insn->func->pfunc; - if (func && insn->ignore) { WARN_FUNC("BUG: why am I validating an ignored function?", sec, insn->offset); @@ -1971,7 +1987,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, i = insn; save_insn = NULL; - func_for_each_insn_continue_reverse(file, insn->func, i) { + func_for_each_insn_continue_reverse(file, func, i) { if (i->save) { save_insn = i; break; @@ -2017,7 +2033,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, if (alt->skip_orig) skip_orig = true; - ret = validate_branch(file, alt->insn, state); + ret = validate_branch(file, func, alt->insn, state); if (ret) { if (backtrace) BT_FUNC("(alt)", insn); @@ -2055,7 +2071,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, if (state.bp_scratch) { WARN("%s uses BP as a scratch register", - insn->func->name); + func->name); return 1; } @@ -2067,36 +2083,28 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, if (ret) return ret; - if (insn->type == INSN_CALL) { - if (is_fentry_call(insn)) - break; - - ret = dead_end_function(file, insn->call_dest); - if (ret == 1) - return 0; - if (ret == -1) - return 1; - } - - if (!no_fp && func && !has_valid_stack_frame(&state)) { + if (!no_fp && func && !is_fentry_call(insn) && + !has_valid_stack_frame(&state)) { WARN_FUNC("call without frame pointer save/setup", sec, insn->offset); return 1; } + + if (dead_end_function(file, insn->call_dest)) + return 0; + break; case INSN_JUMP_CONDITIONAL: case INSN_JUMP_UNCONDITIONAL: - if (func && !insn->jump_dest) { + if (func && is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; - } else if (insn->jump_dest && - (!func || !insn->jump_dest->func || - insn->jump_dest->func->pfunc == func)) { - ret = validate_branch(file, insn->jump_dest, - state); + } else if (insn->jump_dest) { + ret = validate_branch(file, func, + insn->jump_dest, state); if (ret) { if (backtrace) BT_FUNC("(branch)", insn); @@ -2110,13 +2118,17 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, break; case INSN_JUMP_DYNAMIC: - if (func && list_empty(&insn->alts)) { + case INSN_JUMP_DYNAMIC_CONDITIONAL: + if (func && is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; } - return 0; + if (insn->type == INSN_JUMP_DYNAMIC) + return 0; + + break; case INSN_CONTEXT_SWITCH: if (func && (!next_insn || !next_insn->hint)) { @@ -2162,7 +2174,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, break; case INSN_CLAC: - if (!state.uaccess && insn->func) { + if (!state.uaccess && func) { WARN_FUNC("redundant UACCESS disable", sec, insn->offset); return 1; } @@ -2183,7 +2195,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, break; case INSN_CLD: - if (!state.df && insn->func) + if (!state.df && func) WARN_FUNC("redundant CLD", sec, insn->offset); state.df = false; @@ -2222,7 +2234,7 @@ static int validate_unwind_hints(struct objtool_file *file) for_each_insn(file, insn) { if (insn->hint && !insn->visited) { - ret = validate_branch(file, insn, state); + ret = validate_branch(file, insn->func, insn, state); if (ret && backtrace) BT_FUNC("<=== (hint)", insn); warnings += ret; @@ -2345,16 +2357,25 @@ static int validate_functions(struct objtool_file *file) for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC || func->pfunc != func) + if (func->type != STT_FUNC) + continue; + + if (!func->len) { + WARN("%s() is missing an ELF size annotation", + func->name); + warnings++; + } + + if (func->pfunc != func || func->alias != func) continue; insn = find_insn(file, sec, func->offset); - if (!insn || insn->ignore) + if (!insn || insn->ignore || insn->visited) continue; - state.uaccess = func->alias->uaccess_safe; + state.uaccess = func->uaccess_safe; - ret = validate_branch(file, insn, state); + ret = validate_branch(file, func, insn, state); if (ret && backtrace) BT_FUNC("<=== (func)", insn); warnings += ret; @@ -2407,7 +2428,7 @@ int check(const char *_objname, bool orc) objname = _objname; - file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY); + file.elf = elf_read(objname, orc ? O_RDWR : O_RDONLY); if (!file.elf) return 1; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index cb60b9acf5cf..b881fafcf55d 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -31,13 +31,14 @@ struct instruction { struct section *sec; unsigned long offset; unsigned int len; - unsigned char type; + enum insn_type type; unsigned long immediate; bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; bool retpoline_safe; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; + struct rela *jump_table; struct list_head alts; struct symbol *func; struct stack_op stack_op; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index e99e1be19ad9..edba4745f25a 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -278,7 +278,7 @@ static int read_symbols(struct elf *elf) } if (sym->offset == s->offset) { - if (sym->len == s->len && alias == sym) + if (sym->len && sym->len == s->len && alias == sym) alias = s; if (sym->len >= s->len) { @@ -385,7 +385,7 @@ static int read_relas(struct elf *elf) rela->offset = rela->rela.r_offset; symndx = GELF_R_SYM(rela->rela.r_info); rela->sym = find_symbol_by_index(elf, symndx); - rela->rela_sec = sec; + rela->sec = sec; if (!rela->sym) { WARN("can't find rela entry symbol %d for %s", symndx, sec->name); @@ -401,7 +401,7 @@ static int read_relas(struct elf *elf) return 0; } -struct elf *elf_open(const char *name, int flags) +struct elf *elf_read(const char *name, int flags) { struct elf *elf; Elf_Cmd cmd; @@ -463,7 +463,7 @@ struct section *elf_create_section(struct elf *elf, const char *name, { struct section *sec, *shstrtab; size_t size = entsize * nr; - struct Elf_Scn *s; + Elf_Scn *s; Elf_Data *data; sec = malloc(sizeof(*sec)); diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index e44ca5d51871..44150204db4d 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -57,11 +57,12 @@ struct rela { struct list_head list; struct hlist_node hash; GElf_Rela rela; - struct section *rela_sec; + struct section *sec; struct symbol *sym; unsigned int type; unsigned long offset; int addend; + bool jump_table_start; }; struct elf { @@ -74,7 +75,7 @@ struct elf { }; -struct elf *elf_open(const char *name, int flags); +struct elf *elf_read(const char *name, int flags); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_name(struct elf *elf, const char *name); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 79367087bd18..8f24865596af 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2289,6 +2289,12 @@ static int process_switch_event(struct perf_tool *tool, if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; + if (scripting_ops && scripting_ops->process_switch) + scripting_ops->process_switch(event, sample, machine); + + if (!script->show_switch_events) + return 0; + thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) { @@ -2467,7 +2473,7 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap = process_mmap_event; script->tool.mmap2 = process_mmap2_event; } - if (script->show_switch_events) + if (script->show_switch_events || (scripting_ops && scripting_ops->process_switch)) script->tool.context_switch = process_switch_event; if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1aa2ed096f65..4f0bbffee05f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -19,6 +19,7 @@ #include <api/fs/tracing_path.h> #include <bpf/bpf.h> #include "util/bpf_map.h" +#include "util/rlimit.h" #include "builtin.h" #include "util/cgroup.h" #include "util/color.h" @@ -3864,6 +3865,15 @@ int cmd_trace(int argc, const char **argv) goto out; } + /* + * Parsing .perfconfig may entail creating a BPF event, that may need + * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting + * is too small. This affects just this process, not touching the + * global setting. If it fails we'll get something in 'perf trace -v' + * to help diagnose the problem. + */ + rlimit__bump_memlock(); + err = perf_config(trace__config, &trace); if (err) goto out; diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index f470144d1a70..bf114ca9ca87 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -19,6 +19,7 @@ static struct version version; static struct option version_options[] = { OPT_BOOLEAN(0, "build-options", &version.build_options, "display the build options"), + OPT_END(), }; static const char * const version_usage[] = { diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json new file mode 100644 index 000000000000..17fb5241928b --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json @@ -0,0 +1,58 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "0", + "EventName": "CPU_CYCLES", + "BriefDescription": "CPU Cycles", + "PublicDescription": "Cycle Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "1", + "EventName": "INSTRUCTIONS", + "BriefDescription": "Instructions", + "PublicDescription": "Instruction Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "2", + "EventName": "L1I_DIR_WRITES", + "BriefDescription": "L1I Directory Writes", + "PublicDescription": "Level-1 I-Cache Directory Write Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "3", + "EventName": "L1I_PENALTY_CYCLES", + "BriefDescription": "L1I Penalty Cycles", + "PublicDescription": "Level-1 I-Cache Penalty Cycle Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "4", + "EventName": "L1D_DIR_WRITES", + "BriefDescription": "L1D Directory Writes", + "PublicDescription": "Level-1 D-Cache Directory Write Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "5", + "EventName": "L1D_PENALTY_CYCLES", + "BriefDescription": "L1D Penalty Cycles", + "PublicDescription": "Level-1 D-Cache Penalty Cycle Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "32", + "EventName": "PROBLEM_STATE_CPU_CYCLES", + "BriefDescription": "Problem-State CPU Cycles", + "PublicDescription": "Problem-State Cycle Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "33", + "EventName": "PROBLEM_STATE_INSTRUCTIONS", + "BriefDescription": "Problem-State Instructions", + "PublicDescription": "Problem-State Instruction Count" + }, +] diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json new file mode 100644 index 000000000000..db286f19e7b6 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json @@ -0,0 +1,114 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "64", + "EventName": "PRNG_FUNCTIONS", + "BriefDescription": "PRNG Functions", + "PublicDescription": "Total number of the PRNG functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "65", + "EventName": "PRNG_CYCLES", + "BriefDescription": "PRNG Cycles", + "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "66", + "EventName": "PRNG_BLOCKED_FUNCTIONS", + "BriefDescription": "PRNG Blocked Functions", + "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "67", + "EventName": "PRNG_BLOCKED_CYCLES", + "BriefDescription": "PRNG Blocked Cycles", + "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "68", + "EventName": "SHA_FUNCTIONS", + "BriefDescription": "SHA Functions", + "PublicDescription": "Total number of SHA functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "69", + "EventName": "SHA_CYCLES", + "BriefDescription": "SHA Cycles", + "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "70", + "EventName": "SHA_BLOCKED_FUNCTIONS", + "BriefDescription": "SHA Blocked Functions", + "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "71", + "EventName": "SHA_BLOCKED_CYCLES", + "BriefDescription": "SHA Bloced Cycles", + "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "72", + "EventName": "DEA_FUNCTIONS", + "BriefDescription": "DEA Functions", + "PublicDescription": "Total number of the DEA functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "73", + "EventName": "DEA_CYCLES", + "BriefDescription": "DEA Cycles", + "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "74", + "EventName": "DEA_BLOCKED_FUNCTIONS", + "BriefDescription": "DEA Blocked Functions", + "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "75", + "EventName": "DEA_BLOCKED_CYCLES", + "BriefDescription": "DEA Blocked Cycles", + "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "76", + "EventName": "AES_FUNCTIONS", + "BriefDescription": "AES Functions", + "PublicDescription": "Total number of AES functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "77", + "EventName": "AES_CYCLES", + "BriefDescription": "AES Cycles", + "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "78", + "EventName": "AES_BLOCKED_FUNCTIONS", + "BriefDescription": "AES Blocked Functions", + "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "79", + "EventName": "AES_BLOCKED_CYCLES", + "BriefDescription": "AES Blocked Cycles", + "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" + }, +] diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json new file mode 100644 index 000000000000..5e36bc2468d0 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json @@ -0,0 +1,30 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "80", + "EventName": "ECC_FUNCTION_COUNT", + "BriefDescription": "ECC Function Count", + "PublicDescription": "Long ECC function Count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "81", + "EventName": "ECC_CYCLES_COUNT", + "BriefDescription": "ECC Cycles Count", + "PublicDescription": "Long ECC Function cycles count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "82", + "EventName": "ECC_BLOCKED_FUNCTION_COUNT", + "BriefDescription": "Ecc Blocked Function Count", + "PublicDescription": "Long ECC blocked function count" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "83", + "EventName": "ECC_BLOCKED_CYCLES_COUNT", + "BriefDescription": "ECC Blocked Cycles Count", + "PublicDescription": "Long ECC blocked cycles count" + }, +] diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json new file mode 100644 index 000000000000..89e070727e1b --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json @@ -0,0 +1,373 @@ +[ + { + "Unit": "CPU-M-CF", + "EventCode": "128", + "EventName": "L1D_RO_EXCL_WRITES", + "BriefDescription": "L1D Read-only Exclusive Writes", + "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "129", + "EventName": "DTLB2_WRITES", + "BriefDescription": "DTLB2 Writes", + "PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "130", + "EventName": "DTLB2_MISSES", + "BriefDescription": "DTLB2 Misses", + "PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "131", + "EventName": "DTLB2_HPAGE_WRITES", + "BriefDescription": "DTLB2 One-Megabyte Page Writes", + "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "132", + "EventName": "DTLB2_GPAGE_WRITES", + "BriefDescription": "DTLB2 Two-Gigabyte Page Writes", + "PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "133", + "EventName": "L1D_L2D_SOURCED_WRITES", + "BriefDescription": "L1D L2D Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "134", + "EventName": "ITLB2_WRITES", + "BriefDescription": "ITLB2 Writes", + "PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "135", + "EventName": "ITLB2_MISSES", + "BriefDescription": "ITLB2 Misses", + "PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "136", + "EventName": "L1I_L2I_SOURCED_WRITES", + "BriefDescription": "L1I L2I Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "137", + "EventName": "TLB2_PTE_WRITES", + "BriefDescription": "TLB2 PTE Writes", + "PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "138", + "EventName": "TLB2_CRSTE_WRITES", + "BriefDescription": "TLB2 CRSTE Writes", + "PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "139", + "EventName": "TLB2_ENGINES_BUSY", + "BriefDescription": "TLB2 Engines Busy", + "PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "140", + "EventName": "TX_C_TEND", + "BriefDescription": "Completed TEND instructions in constrained TX mode", + "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "141", + "EventName": "TX_NC_TEND", + "BriefDescription": "Completed TEND instructions in non-constrained TX mode", + "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "143", + "EventName": "L1C_TLB2_MISSES", + "BriefDescription": "L1C TLB2 Misses", + "PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "144", + "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES", + "BriefDescription": "L1D On-Chip L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "145", + "EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1D On-Chip Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "146", + "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "147", + "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES", + "BriefDescription": "L1D On-Cluster L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "148", + "EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1D On-Cluster Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "149", + "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "150", + "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES", + "BriefDescription": "L1D Off-Cluster L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "151", + "EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1D Off-Cluster Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "152", + "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "153", + "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES", + "BriefDescription": "L1D Off-Drawer L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "154", + "EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1D Off-Drawer Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "155", + "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "156", + "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES", + "BriefDescription": "L1D On-Drawer L4 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "157", + "EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES", + "BriefDescription": "L1D Off-Drawer L4 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "158", + "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO", + "BriefDescription": "L1D On-Chip L3 Sourced Writes read-only", + "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "162", + "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES", + "BriefDescription": "L1I On-Chip L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "163", + "EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1I On-Chip Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "164", + "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "165", + "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES", + "BriefDescription": "L1I On-Cluster L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "166", + "EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1I On-Cluster Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "167", + "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "168", + "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES", + "BriefDescription": "L1I Off-Cluster L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "169", + "EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1I Off-Cluster Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "170", + "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "171", + "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES", + "BriefDescription": "L1I Off-Drawer L3 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "172", + "EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES", + "BriefDescription": "L1I Off-Drawer Memory Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "173", + "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV", + "BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "174", + "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES", + "BriefDescription": "L1I On-Drawer L4 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "175", + "EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES", + "BriefDescription": "L1I Off-Drawer L4 Sourced Writes", + "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "224", + "EventName": "BCD_DFP_EXECUTION_SLOTS", + "BriefDescription": "BCD DFP Execution Slots", + "PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "225", + "EventName": "VX_BCD_EXECUTION_SLOTS", + "BriefDescription": "VX BCD Execution Slots", + "PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "226", + "EventName": "DECIMAL_INSTRUCTIONS", + "BriefDescription": "Decimal Instructions", + "PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "232", + "EventName": "LAST_HOST_TRANSLATIONS", + "BriefDescription": "Last host translation done", + "PublicDescription": "Last Host Translation done" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "243", + "EventName": "TX_NC_TABORT", + "BriefDescription": "Aborted transactions in non-constrained TX mode", + "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "244", + "EventName": "TX_C_TABORT_NO_SPECIAL", + "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic", + "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "245", + "EventName": "TX_C_TABORT_SPECIAL", + "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", + "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "448", + "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE", + "BriefDescription": "Cycle count with one thread active", + "PublicDescription": "Cycle count with one thread active" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "449", + "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", + "BriefDescription": "Cycle count with two threads active", + "PublicDescription": "Cycle count with two threads active" + }, +] diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index 78bcf7f8e206..bd3fc577139c 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -4,3 +4,4 @@ Family-model,Version,Filename,EventType ^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core ^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core +^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_m8561,core diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 92713d93e956..7bd73a904b4e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -353,7 +353,10 @@ do_query(query, 'CREATE TABLE threads (' 'tid integer)') do_query(query, 'CREATE TABLE comms (' 'id bigint NOT NULL,' - 'comm varchar(16))') + 'comm varchar(16),' + 'c_thread_id bigint,' + 'c_time bigint,' + 'exec_flag boolean)') do_query(query, 'CREATE TABLE comm_threads (' 'id bigint NOT NULL,' 'comm_id bigint,' @@ -479,6 +482,17 @@ do_query(query, 'CREATE TABLE pwrx (' 'last_cstate integer,' 'wake_reason integer)') +do_query(query, 'CREATE TABLE context_switches (' + 'id bigint NOT NULL,' + 'machine_id bigint,' + 'time bigint,' + 'cpu integer,' + 'thread_out_id bigint,' + 'comm_out_id bigint,' + 'thread_in_id bigint,' + 'comm_in_id bigint,' + 'flags integer)') + do_query(query, 'CREATE VIEW machines_view AS ' 'SELECT ' 'id,' @@ -692,6 +706,29 @@ do_query(query, 'CREATE VIEW power_events_view AS ' ' INNER JOIN selected_events ON selected_events.id = samples.evsel_id' ' ORDER BY samples.id') +do_query(query, 'CREATE VIEW context_switches_view AS ' + 'SELECT ' + 'context_switches.id,' + 'context_switches.machine_id,' + 'context_switches.time,' + 'context_switches.cpu,' + 'th_out.pid AS pid_out,' + 'th_out.tid AS tid_out,' + 'comm_out.comm AS comm_out,' + 'th_in.pid AS pid_in,' + 'th_in.tid AS tid_in,' + 'comm_in.comm AS comm_in,' + 'CASE WHEN context_switches.flags = 0 THEN \'in\'' + ' WHEN context_switches.flags = 1 THEN \'out\'' + ' WHEN context_switches.flags = 3 THEN \'out preempt\'' + ' ELSE CAST ( context_switches.flags AS VARCHAR(11) )' + 'END AS flags' + ' FROM context_switches' + ' INNER JOIN threads AS th_out ON th_out.id = context_switches.thread_out_id' + ' INNER JOIN threads AS th_in ON th_in.id = context_switches.thread_in_id' + ' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id' + ' INNER JOIN comms AS comm_in ON comm_in.id = context_switches.comm_in_id') + file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0) file_trailer = b"\377\377" @@ -756,6 +793,7 @@ mwait_file = open_output_file("mwait_table.bin") pwre_file = open_output_file("pwre_table.bin") exstop_file = open_output_file("exstop_table.bin") pwrx_file = open_output_file("pwrx_table.bin") +context_switches_file = open_output_file("context_switches_table.bin") def trace_begin(): printdate("Writing to intermediate files...") @@ -763,7 +801,7 @@ def trace_begin(): evsel_table(0, "unknown") machine_table(0, 0, "unknown") thread_table(0, 0, 0, -1, -1) - comm_table(0, "unknown") + comm_table(0, "unknown", 0, 0, 0) dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) @@ -804,6 +842,7 @@ def trace_end(): copy_output_file(pwre_file, "pwre") copy_output_file(exstop_file, "exstop") copy_output_file(pwrx_file, "pwrx") + copy_output_file(context_switches_file, "context_switches") printdate("Removing intermediate files...") remove_output_file(evsel_file) @@ -825,6 +864,7 @@ def trace_end(): remove_output_file(pwre_file) remove_output_file(exstop_file) remove_output_file(pwrx_file) + remove_output_file(context_switches_file) os.rmdir(output_dir_name) printdate("Adding primary keys") do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') @@ -846,11 +886,14 @@ def trace_end(): do_query(query, 'ALTER TABLE pwre ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE exstop ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE pwrx ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE context_switches ADD PRIMARY KEY (id)') printdate("Adding foreign keys") do_query(query, 'ALTER TABLE threads ' 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)') + do_query(query, 'ALTER TABLE comms ' + 'ADD CONSTRAINT threadfk FOREIGN KEY (c_thread_id) REFERENCES threads (id)') do_query(query, 'ALTER TABLE comm_threads ' 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),' 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id)') @@ -881,6 +924,8 @@ def trace_end(): 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)') do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') + do_query(query, 'ALTER TABLE comms ADD has_calls boolean') + do_query(query, 'UPDATE comms SET has_calls = TRUE WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)') do_query(query, 'ALTER TABLE ptwrite ' 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') do_query(query, 'ALTER TABLE cbr ' @@ -893,6 +938,12 @@ def trace_end(): 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') do_query(query, 'ALTER TABLE pwrx ' 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)') + do_query(query, 'ALTER TABLE context_switches ' + 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' + 'ADD CONSTRAINT toutfk FOREIGN KEY (thread_out_id) REFERENCES threads (id),' + 'ADD CONSTRAINT tinfk FOREIGN KEY (thread_in_id) REFERENCES threads (id),' + 'ADD CONSTRAINT coutfk FOREIGN KEY (comm_out_id) REFERENCES comms (id),' + 'ADD CONSTRAINT cinfk FOREIGN KEY (comm_in_id) REFERENCES comms (id)') printdate("Dropping unused tables") if is_table_empty("ptwrite"): @@ -905,6 +956,8 @@ def trace_end(): drop("pwrx") if is_table_empty("cbr"): drop("cbr") + if is_table_empty("context_switches"): + drop("context_switches") if (unhandled_count): printdate("Warning: ", unhandled_count, " unhandled events") @@ -935,11 +988,11 @@ def thread_table(thread_id, machine_id, process_id, pid, tid, *x): value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid) thread_file.write(value) -def comm_table(comm_id, comm_str, *x): +def comm_table(comm_id, comm_str, thread_id, time, exec_flag, *x): comm_str = toserverstr(comm_str) n = len(comm_str) - fmt = "!hiqi" + str(n) + "s" - value = struct.pack(fmt, 2, 8, comm_id, n, comm_str) + fmt = "!hiqi" + str(n) + "s" + "iqiqiB" + value = struct.pack(fmt, 5, 8, comm_id, n, comm_str, 8, thread_id, 8, time, 1, exec_flag) comm_file.write(value) def comm_thread_table(comm_thread_id, comm_id, thread_id, *x): @@ -1051,3 +1104,8 @@ def synth_data(id, config, raw_buf, *x): pwrx(id, raw_buf) elif config == 5: cbr(id, raw_buf) + +def context_switch_table(id, machine_id, time, cpu, thread_out_id, comm_out_id, thread_in_id, comm_in_id, flags, *x): + fmt = "!hiqiqiqiiiqiqiqiqii" + value = struct.pack(fmt, 9, 8, id, 8, machine_id, 8, time, 4, cpu, 8, thread_out_id, 8, comm_out_id, 8, thread_in_id, 8, comm_in_id, 4, flags) + context_switches_file.write(value) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 021326c46285..8043a7272a56 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -177,7 +177,10 @@ do_query(query, 'CREATE TABLE threads (' 'tid integer)') do_query(query, 'CREATE TABLE comms (' 'id integer NOT NULL PRIMARY KEY,' - 'comm varchar(16))') + 'comm varchar(16),' + 'c_thread_id bigint,' + 'c_time bigint,' + 'exec_flag boolean)') do_query(query, 'CREATE TABLE comm_threads (' 'id integer NOT NULL PRIMARY KEY,' 'comm_id bigint,' @@ -303,6 +306,17 @@ do_query(query, 'CREATE TABLE pwrx (' 'last_cstate integer,' 'wake_reason integer)') +do_query(query, 'CREATE TABLE context_switches (' + 'id integer NOT NULL PRIMARY KEY,' + 'machine_id bigint,' + 'time bigint,' + 'cpu integer,' + 'thread_out_id bigint,' + 'comm_out_id bigint,' + 'thread_in_id bigint,' + 'comm_in_id bigint,' + 'flags integer)') + # printf was added to sqlite in version 3.8.3 sqlite_has_printf = False try: @@ -527,6 +541,29 @@ do_query(query, 'CREATE VIEW power_events_view AS ' ' INNER JOIN selected_events ON selected_events.id = evsel_id' ' WHERE selected_events.name IN (\'cbr\',\'mwait\',\'exstop\',\'pwre\',\'pwrx\')') +do_query(query, 'CREATE VIEW context_switches_view AS ' + 'SELECT ' + 'context_switches.id,' + 'context_switches.machine_id,' + 'context_switches.time,' + 'context_switches.cpu,' + 'th_out.pid AS pid_out,' + 'th_out.tid AS tid_out,' + 'comm_out.comm AS comm_out,' + 'th_in.pid AS pid_in,' + 'th_in.tid AS tid_in,' + 'comm_in.comm AS comm_in,' + 'CASE WHEN context_switches.flags = 0 THEN \'in\'' + ' WHEN context_switches.flags = 1 THEN \'out\'' + ' WHEN context_switches.flags = 3 THEN \'out preempt\'' + ' ELSE context_switches.flags ' + 'END AS flags' + ' FROM context_switches' + ' INNER JOIN threads AS th_out ON th_out.id = context_switches.thread_out_id' + ' INNER JOIN threads AS th_in ON th_in.id = context_switches.thread_in_id' + ' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id' + ' INNER JOIN comms AS comm_in ON comm_in.id = context_switches.comm_in_id') + do_query(query, 'END TRANSACTION') evsel_query = QSqlQuery(db) @@ -536,7 +573,7 @@ machine_query.prepare("INSERT INTO machines VALUES (?, ?, ?)") thread_query = QSqlQuery(db) thread_query.prepare("INSERT INTO threads VALUES (?, ?, ?, ?, ?)") comm_query = QSqlQuery(db) -comm_query.prepare("INSERT INTO comms VALUES (?, ?)") +comm_query.prepare("INSERT INTO comms VALUES (?, ?, ?, ?, ?)") comm_thread_query = QSqlQuery(db) comm_thread_query.prepare("INSERT INTO comm_threads VALUES (?, ?, ?)") dso_query = QSqlQuery(db) @@ -568,6 +605,8 @@ exstop_query = QSqlQuery(db) exstop_query.prepare("INSERT INTO exstop VALUES (?, ?)") pwrx_query = QSqlQuery(db) pwrx_query.prepare("INSERT INTO pwrx VALUES (?, ?, ?, ?)") +context_switch_query = QSqlQuery(db) +context_switch_query.prepare("INSERT INTO context_switches VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)") def trace_begin(): printdate("Writing records...") @@ -576,7 +615,7 @@ def trace_begin(): evsel_table(0, "unknown") machine_table(0, 0, "unknown") thread_table(0, 0, 0, -1, -1) - comm_table(0, "unknown") + comm_table(0, "unknown", 0, 0, 0) dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) @@ -603,6 +642,8 @@ def trace_end(): if perf_db_export_calls: do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') + do_query(query, 'ALTER TABLE comms ADD has_calls boolean') + do_query(query, 'UPDATE comms SET has_calls = 1 WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)') printdate("Dropping unused tables") if is_table_empty("ptwrite"): @@ -615,6 +656,8 @@ def trace_end(): drop("pwrx") if is_table_empty("cbr"): drop("cbr") + if is_table_empty("context_switches"): + drop("context_switches") if (unhandled_count): printdate("Warning: ", unhandled_count, " unhandled events") @@ -642,7 +685,7 @@ def thread_table(*x): bind_exec(thread_query, 5, x) def comm_table(*x): - bind_exec(comm_query, 2, x) + bind_exec(comm_query, 5, x) def comm_thread_table(*x): bind_exec(comm_thread_query, 3, x) @@ -748,3 +791,6 @@ def synth_data(id, config, raw_buf, *x): pwrx(id, raw_buf) elif config == 5: cbr(id, raw_buf) + +def context_switch_table(*x): + bind_exec(context_switch_query, 9, x) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 6e7934f2ac9a..61b3911d91e6 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -392,7 +392,7 @@ class FindBar(): self.hbox.addWidget(self.close_button) self.bar = QWidget() - self.bar.setLayout(self.hbox); + self.bar.setLayout(self.hbox) self.bar.hide() def Widget(self): @@ -470,7 +470,7 @@ class CallGraphLevelItemBase(object): self.params = params self.row = row self.parent_item = parent_item - self.query_done = False; + self.query_done = False self.child_count = 0 self.child_items = [] if parent_item: @@ -517,7 +517,7 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): self.time = time def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) if self.params.have_ipc: ipc_str = ", SUM(insn_count), SUM(cyc_count)" @@ -604,7 +604,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase): self.dbid = comm_id def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) QueryExec(query, "SELECT thread_id, pid, tid" " FROM comm_threads" @@ -622,9 +622,12 @@ class CallGraphRootItem(CallGraphLevelItemBase): def __init__(self, glb, params): super(CallGraphRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 - self.query_done = True; + self.query_done = True + if_has_calls = "" + if IsSelectable(glb.db, "comms", columns = "has_calls"): + if_has_calls = " WHERE has_calls = TRUE" query = QSqlQuery(glb.db) - QueryExec(query, "SELECT id, comm FROM comms") + QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls) while query.next(): if not query.value(0): continue @@ -793,7 +796,7 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): self.time = time def Select(self): - self.query_done = True; + self.query_done = True if self.calls_id == 0: comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id) else: @@ -881,7 +884,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase): self.dbid = comm_id def Select(self): - self.query_done = True; + self.query_done = True query = QSqlQuery(self.glb.db) QueryExec(query, "SELECT thread_id, pid, tid" " FROM comm_threads" @@ -899,9 +902,12 @@ class CallTreeRootItem(CallGraphLevelItemBase): def __init__(self, glb, params): super(CallTreeRootItem, self).__init__(glb, params, 0, None) self.dbid = 0 - self.query_done = True; + self.query_done = True + if_has_calls = "" + if IsSelectable(glb.db, "comms", columns = "has_calls"): + if_has_calls = " WHERE has_calls = TRUE" query = QSqlQuery(glb.db) - QueryExec(query, "SELECT id, comm FROM comms") + QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls) while query.next(): if not query.value(0): continue @@ -971,7 +977,7 @@ class VBox(): def __init__(self, w1, w2, w3=None): self.vbox = QWidget() - self.vbox.setLayout(QVBoxLayout()); + self.vbox.setLayout(QVBoxLayout()) self.vbox.layout().setContentsMargins(0, 0, 0, 0) @@ -1391,7 +1397,7 @@ class FetchMoreRecordsBar(): self.hbox.addWidget(self.close_button) self.bar = QWidget() - self.bar.setLayout(self.hbox); + self.bar.setLayout(self.hbox) self.bar.show() self.in_progress = False @@ -2206,7 +2212,7 @@ class ReportDialogBase(QDialog): self.vbox.addLayout(self.grid) self.vbox.addLayout(self.hbox) - self.setLayout(self.vbox); + self.setLayout(self.vbox) def Ok(self): vars = self.report_vars @@ -3139,7 +3145,7 @@ class AboutDialog(QDialog): self.vbox = QVBoxLayout() self.vbox.addWidget(self.text) - self.setLayout(self.vbox); + self.setLayout(self.vbox) # Font resize diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 66a82badc1d1..c3bec9d2c201 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -21,6 +21,7 @@ #include <subcmd/parse-options.h> #include "string2.h" #include "symbol.h" +#include "util/rlimit.h" #include <linux/kernel.h> #include <linux/string.h> #include <subcmd/exec-cmd.h> @@ -727,6 +728,11 @@ int cmd_test(int argc, const char **argv) if (skip != NULL) skiplist = intlist__new(skip); + /* + * Tests that create BPF maps, for instance, need more than the 64K + * default: + */ + rlimit__bump_memlock(); return __cmd_test(argc, argv, skiplist); } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d7e3b008a613..14f812bb07a7 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -20,6 +20,7 @@ perf-y += parse-events.o perf-y += perf_regs.o perf-y += path.o perf-y += print_binary.o +perf-y += rlimit.o perf-y += argv_split.o perf-y += rbtree.o perf-y += libstring.o diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 67b88b599a53..3d1c34fc4d68 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2460,7 +2460,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* Something went wrong, no need to continue */ if (!inode) { - err = PTR_ERR(inode); + err = -ENOMEM; goto err_free_metadata; } @@ -2517,8 +2517,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event, session->auxtrace = &etm->auxtrace; etm->unknown_thread = thread__new(999999999, 999999999); - if (!etm->unknown_thread) + if (!etm->unknown_thread) { + err = -ENOMEM; goto err_free_queues; + } /* * Initialize list node so that at thread__zput() we can avoid @@ -2530,8 +2532,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_delete_thread; - if (thread__init_map_groups(etm->unknown_thread, etm->machine)) + if (thread__init_map_groups(etm->unknown_thread, etm->machine)) { + err = -ENOMEM; goto err_delete_thread; + } if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); @@ -2575,5 +2579,5 @@ err_free_traceid_list: err_free_hdr: zfree(&hdr); - return -EINVAL; + return err; } diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 2394c7506abe..ffbb3e7d3288 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -20,70 +20,14 @@ #include "db-export.h" #include <linux/zalloc.h> -struct deferred_export { - struct list_head node; - struct comm *comm; -}; - -static int db_export__deferred(struct db_export *dbe) -{ - struct deferred_export *de; - int err; - - while (!list_empty(&dbe->deferred)) { - de = list_entry(dbe->deferred.next, struct deferred_export, - node); - err = dbe->export_comm(dbe, de->comm); - list_del_init(&de->node); - free(de); - if (err) - return err; - } - - return 0; -} - -static void db_export__free_deferred(struct db_export *dbe) -{ - struct deferred_export *de; - - while (!list_empty(&dbe->deferred)) { - de = list_entry(dbe->deferred.next, struct deferred_export, - node); - list_del_init(&de->node); - free(de); - } -} - -static int db_export__defer_comm(struct db_export *dbe, struct comm *comm) -{ - struct deferred_export *de; - - de = zalloc(sizeof(struct deferred_export)); - if (!de) - return -ENOMEM; - - de->comm = comm; - list_add_tail(&de->node, &dbe->deferred); - - return 0; -} - int db_export__init(struct db_export *dbe) { memset(dbe, 0, sizeof(struct db_export)); - INIT_LIST_HEAD(&dbe->deferred); return 0; } -int db_export__flush(struct db_export *dbe) -{ - return db_export__deferred(dbe); -} - void db_export__exit(struct db_export *dbe) { - db_export__free_deferred(dbe); call_return_processor__free(dbe->crp); dbe->crp = NULL; } @@ -115,71 +59,73 @@ int db_export__machine(struct db_export *dbe, struct machine *machine) } int db_export__thread(struct db_export *dbe, struct thread *thread, - struct machine *machine, struct comm *comm) + struct machine *machine, struct thread *main_thread) { - struct thread *main_thread; u64 main_thread_db_id = 0; - int err; if (thread->db_id) return 0; thread->db_id = ++dbe->thread_last_db_id; - if (thread->pid_ != -1) { - if (thread->pid_ == thread->tid) { - main_thread = thread; - } else { - main_thread = machine__findnew_thread(machine, - thread->pid_, - thread->pid_); - if (!main_thread) - return -ENOMEM; - err = db_export__thread(dbe, main_thread, machine, - comm); - if (err) - goto out_put; - if (comm) { - err = db_export__comm_thread(dbe, comm, thread); - if (err) - goto out_put; - } - } + if (main_thread) main_thread_db_id = main_thread->db_id; - if (main_thread != thread) - thread__put(main_thread); - } if (dbe->export_thread) return dbe->export_thread(dbe, thread, main_thread_db_id, machine); return 0; +} -out_put: - thread__put(main_thread); - return err; +static int __db_export__comm(struct db_export *dbe, struct comm *comm, + struct thread *thread) +{ + comm->db_id = ++dbe->comm_last_db_id; + + if (dbe->export_comm) + return dbe->export_comm(dbe, comm, thread); + + return 0; } int db_export__comm(struct db_export *dbe, struct comm *comm, - struct thread *main_thread) + struct thread *thread) +{ + if (comm->db_id) + return 0; + + return __db_export__comm(dbe, comm, thread); +} + +/* + * Export the "exec" comm. The "exec" comm is the program / application command + * name at the time it first executes. It is used to group threads for the same + * program. Note that the main thread pid (or thread group id tgid) cannot be + * used because it does not change when a new program is exec'ed. + */ +int db_export__exec_comm(struct db_export *dbe, struct comm *comm, + struct thread *main_thread) { int err; if (comm->db_id) return 0; - comm->db_id = ++dbe->comm_last_db_id; - - if (dbe->export_comm) { - if (main_thread->comm_set) - err = dbe->export_comm(dbe, comm); - else - err = db_export__defer_comm(dbe, comm); - if (err) - return err; - } + err = __db_export__comm(dbe, comm, main_thread); + if (err) + return err; + /* + * Record the main thread for this comm. Note that the main thread can + * have many "exec" comms because there will be a new one every time it + * exec's. An "exec" comm however will only ever have 1 main thread. + * That is different to any other threads for that same program because + * exec() will effectively kill them, so the relationship between the + * "exec" comm and non-main threads is 1-to-1. That is why + * db_export__comm_thread() is called here for the main thread, but it + * is called for non-main threads when they are exported. + */ return db_export__comm_thread(dbe, comm, main_thread); } @@ -340,11 +286,65 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, return 0; } +static int db_export__threads(struct db_export *dbe, struct thread *thread, + struct thread *main_thread, + struct machine *machine, struct comm **comm_ptr) +{ + struct comm *comm = NULL; + struct comm *curr_comm; + int err; + + if (main_thread) { + /* + * A thread has a reference to the main thread, so export the + * main thread first. + */ + err = db_export__thread(dbe, main_thread, machine, main_thread); + if (err) + return err; + /* + * Export comm before exporting the non-main thread because + * db_export__comm_thread() can be called further below. + */ + comm = machine__thread_exec_comm(machine, main_thread); + if (comm) { + err = db_export__exec_comm(dbe, comm, main_thread); + if (err) + return err; + *comm_ptr = comm; + } + } + + if (thread != main_thread) { + /* + * For a non-main thread, db_export__comm_thread() must be + * called only if thread has not previously been exported. + */ + bool export_comm_thread = comm && !thread->db_id; + + err = db_export__thread(dbe, thread, machine, main_thread); + if (err) + return err; + + if (export_comm_thread) { + err = db_export__comm_thread(dbe, comm, thread); + if (err) + return err; + } + } + + curr_comm = thread__comm(thread); + if (curr_comm) + return db_export__comm(dbe, curr_comm, thread); + + return 0; +} + int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { - struct thread* thread = al->thread; + struct thread *thread = al->thread; struct export_sample es = { .event = event, .sample = sample, @@ -364,19 +364,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, return err; main_thread = thread__main_thread(al->machine, thread); - if (main_thread) - comm = machine__thread_exec_comm(al->machine, main_thread); - err = db_export__thread(dbe, thread, al->machine, comm); + err = db_export__threads(dbe, thread, main_thread, al->machine, &comm); if (err) goto out_put; - if (comm) { - err = db_export__comm(dbe, comm, main_thread); - if (err) - goto out_put; + if (comm) es.comm_db_id = comm->db_id; - } es.db_id = ++dbe->sample_last_db_id; @@ -525,3 +519,92 @@ int db_export__call_return(struct db_export *dbe, struct call_return *cr, return 0; } + +static int db_export__pid_tid(struct db_export *dbe, struct machine *machine, + pid_t pid, pid_t tid, u64 *db_id, + struct comm **comm_ptr, bool *is_idle) +{ + struct thread *thread = machine__find_thread(machine, pid, tid); + struct thread *main_thread; + int err = 0; + + if (!thread || !thread->comm_set) + goto out_put; + + *is_idle = !thread->pid_ && !thread->tid; + + main_thread = thread__main_thread(machine, thread); + + err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr); + + *db_id = thread->db_id; + + thread__put(main_thread); +out_put: + thread__put(thread); + + return err; +} + +int db_export__switch(struct db_export *dbe, union perf_event *event, + struct perf_sample *sample, struct machine *machine) +{ + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + bool out_preempt = out && + (event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT); + int flags = out | (out_preempt << 1); + bool is_idle_a = false, is_idle_b = false; + u64 th_a_id = 0, th_b_id = 0; + u64 comm_out_id, comm_in_id; + struct comm *comm_a = NULL; + struct comm *comm_b = NULL; + u64 th_out_id, th_in_id; + u64 db_id; + int err; + + err = db_export__machine(dbe, machine); + if (err) + return err; + + err = db_export__pid_tid(dbe, machine, sample->pid, sample->tid, + &th_a_id, &comm_a, &is_idle_a); + if (err) + return err; + + if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) { + pid_t pid = event->context_switch.next_prev_pid; + pid_t tid = event->context_switch.next_prev_tid; + + err = db_export__pid_tid(dbe, machine, pid, tid, &th_b_id, + &comm_b, &is_idle_b); + if (err) + return err; + } + + /* + * Do not export if both threads are unknown (i.e. not being traced), + * or one is unknown and the other is the idle task. + */ + if ((!th_a_id || is_idle_a) && (!th_b_id || is_idle_b)) + return 0; + + db_id = ++dbe->context_switch_last_db_id; + + if (out) { + th_out_id = th_a_id; + th_in_id = th_b_id; + comm_out_id = comm_a ? comm_a->db_id : 0; + comm_in_id = comm_b ? comm_b->db_id : 0; + } else { + th_out_id = th_b_id; + th_in_id = th_a_id; + comm_out_id = comm_b ? comm_b->db_id : 0; + comm_in_id = comm_a ? comm_a->db_id : 0; + } + + if (dbe->export_context_switch) + return dbe->export_context_switch(dbe, db_id, machine, sample, + th_out_id, comm_out_id, + th_in_id, comm_in_id, flags); + return 0; +} diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index e8a64028a386..ba1f62a5fe10 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -43,7 +43,8 @@ struct db_export { int (*export_machine)(struct db_export *dbe, struct machine *machine); int (*export_thread)(struct db_export *dbe, struct thread *thread, u64 main_thread_db_id, struct machine *machine); - int (*export_comm)(struct db_export *dbe, struct comm *comm); + int (*export_comm)(struct db_export *dbe, struct comm *comm, + struct thread *thread); int (*export_comm_thread)(struct db_export *dbe, u64 db_id, struct comm *comm, struct thread *thread); int (*export_dso)(struct db_export *dbe, struct dso *dso, @@ -56,6 +57,11 @@ struct db_export { int (*export_call_path)(struct db_export *dbe, struct call_path *cp); int (*export_call_return)(struct db_export *dbe, struct call_return *cr); + int (*export_context_switch)(struct db_export *dbe, u64 db_id, + struct machine *machine, + struct perf_sample *sample, + u64 th_out_id, u64 comm_out_id, + u64 th_in_id, u64 comm_in_id, int flags); struct call_return_processor *crp; struct call_path_root *cpr; u64 evsel_last_db_id; @@ -68,18 +74,19 @@ struct db_export { u64 sample_last_db_id; u64 call_path_last_db_id; u64 call_return_last_db_id; - struct list_head deferred; + u64 context_switch_last_db_id; }; int db_export__init(struct db_export *dbe); -int db_export__flush(struct db_export *dbe); void db_export__exit(struct db_export *dbe); int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); int db_export__machine(struct db_export *dbe, struct machine *machine); int db_export__thread(struct db_export *dbe, struct thread *thread, - struct machine *machine, struct comm *comm); + struct machine *machine, struct thread *main_thread); int db_export__comm(struct db_export *dbe, struct comm *comm, - struct thread *main_thread); + struct thread *thread); +int db_export__exec_comm(struct db_export *dbe, struct comm *comm, + struct thread *main_thread); int db_export__comm_thread(struct db_export *dbe, struct comm *comm, struct thread *thread); int db_export__dso(struct db_export *dbe, struct dso *dso, @@ -97,5 +104,7 @@ int db_export__branch_types(struct db_export *dbe); int db_export__call_path(struct db_export *dbe, struct call_path *cp); int db_export__call_return(struct db_export *dbe, struct call_return *cr, u64 *parent_db_id); +int db_export__switch(struct db_export *dbe, union perf_event *event, + struct perf_sample *sample, struct machine *machine); #endif diff --git a/tools/perf/util/rlimit.c b/tools/perf/util/rlimit.c new file mode 100644 index 000000000000..13521d392a22 --- /dev/null +++ b/tools/perf/util/rlimit.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + +#include "util/debug.h" +#include "util/rlimit.h" +#include <sys/time.h> +#include <sys/resource.h> + +/* + * Bump the memlock so that we can get bpf maps of a reasonable size, + * like the ones used with 'perf trace' and with 'perf test bpf', + * improve this to some specific request if needed. + */ +void rlimit__bump_memlock(void) +{ + struct rlimit rlim; + + if (getrlimit(RLIMIT_MEMLOCK, &rlim) == 0) { + rlim.rlim_cur *= 4; + rlim.rlim_max *= 4; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) { + rlim.rlim_cur /= 2; + rlim.rlim_max /= 2; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) + pr_debug("Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc\n"); + } + } +} diff --git a/tools/perf/util/rlimit.h b/tools/perf/util/rlimit.h new file mode 100644 index 000000000000..9f59d8e710a3 --- /dev/null +++ b/tools/perf/util/rlimit.h @@ -0,0 +1,6 @@ +#ifndef __PERF_RLIMIT_H_ +#define __PERF_RLIMIT_H_ +/* SPDX-License-Identifier: LGPL-2.1 */ + +void rlimit__bump_memlock(void); +#endif // __PERF_RLIMIT_H_ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 112bed65232f..25dc1d765553 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -113,6 +113,7 @@ struct tables { PyObject *call_path_handler; PyObject *call_return_handler; PyObject *synth_handler; + PyObject *context_switch_handler; bool db_export_mode; }; @@ -1011,15 +1012,19 @@ static int python_export_thread(struct db_export *dbe, struct thread *thread, return 0; } -static int python_export_comm(struct db_export *dbe, struct comm *comm) +static int python_export_comm(struct db_export *dbe, struct comm *comm, + struct thread *thread) { struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(2); + t = tuple_new(5); tuple_set_u64(t, 0, comm->db_id); tuple_set_string(t, 1, comm__str(comm)); + tuple_set_u64(t, 2, thread->db_id); + tuple_set_u64(t, 3, comm->start); + tuple_set_s32(t, 4, comm->exec); call_object(tables->comm_handler, t, "comm_table"); @@ -1233,6 +1238,34 @@ static int python_export_call_return(struct db_export *dbe, return 0; } +static int python_export_context_switch(struct db_export *dbe, u64 db_id, + struct machine *machine, + struct perf_sample *sample, + u64 th_out_id, u64 comm_out_id, + u64 th_in_id, u64 comm_in_id, int flags) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(9); + + tuple_set_u64(t, 0, db_id); + tuple_set_u64(t, 1, machine->db_id); + tuple_set_u64(t, 2, sample->time); + tuple_set_s32(t, 3, sample->cpu); + tuple_set_u64(t, 4, th_out_id); + tuple_set_u64(t, 5, comm_out_id); + tuple_set_u64(t, 6, th_in_id); + tuple_set_u64(t, 7, comm_in_id); + tuple_set_s32(t, 8, flags); + + call_object(tables->context_switch_handler, t, "context_switch"); + + Py_DECREF(t); + + return 0; +} + static int python_process_call_return(struct call_return *cr, u64 *parent_db_id, void *data) { @@ -1296,6 +1329,16 @@ static void python_process_event(union perf_event *event, } } +static void python_process_switch(union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct tables *tables = &tables_global; + + if (tables->db_export_mode) + db_export__switch(&tables->dbe, event, sample, machine); +} + static void get_handler_name(char *str, size_t size, struct perf_evsel *evsel) { @@ -1511,6 +1554,7 @@ static void set_table_handlers(struct tables *tables) SET_TABLE_HANDLER(sample); SET_TABLE_HANDLER(call_path); SET_TABLE_HANDLER(call_return); + SET_TABLE_HANDLER(context_switch); /* * Synthesized events are samples but with architecture-specific data @@ -1620,9 +1664,7 @@ error: static int python_flush_script(void) { - struct tables *tables = &tables_global; - - return db_export__flush(&tables->dbe); + return 0; } /* @@ -1831,6 +1873,7 @@ struct scripting_ops python_scripting_ops = { .flush_script = python_flush_script, .stop_script = python_stop_script, .process_event = python_process_event, + .process_switch = python_process_switch, .process_stat = python_process_stat, .process_stat_interval = python_process_stat_interval, .generate_script = python_generate_script, diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d9b0a942090a..c7002fe11673 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -81,6 +81,9 @@ struct scripting_ops { struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al); + void (*process_switch)(union perf_event *event, + struct perf_sample *sample, + struct machine *machine); void (*process_stat)(struct perf_stat_config *config, struct perf_evsel *evsel, u64 tstamp); void (*process_stat_interval)(u64 tstamp); diff --git a/tools/power/cpupower/debug/kernel/Makefile b/tools/power/cpupower/debug/kernel/Makefile index c23e5a6ceb7e..7b5c43684be1 100644 --- a/tools/power/cpupower/debug/kernel/Makefile +++ b/tools/power/cpupower/debug/kernel/Makefile @@ -12,8 +12,8 @@ default: $(MAKE) -C $(KDIR) M=$(CURDIR) clean: - - rm -rf *.o *.ko .tmp-versions .*.cmd .*.mod.* *.mod.c - - rm -rf .tmp_versions* Module.symvers modules.order + - rm -rf *.o *.ko .*.cmd .*.mod.* *.mod.c + - rm -rf Module.symvers modules.order install: default install -d $(KMISC) diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl index 72525426654b..6fd864935319 100755 --- a/tools/testing/ktest/config-bisect.pl +++ b/tools/testing/ktest/config-bisect.pl @@ -663,7 +663,7 @@ while ($#ARGV >= 0) { } else { - die "Unknow option $opt\n"; + die "Unknown option $opt\n"; } } @@ -732,7 +732,7 @@ if ($start) { } } run_command "cp $good_start $good" or die "failed to copy to $good\n"; - run_command "cp $bad_start $bad" or die "faield to copy to $bad\n"; + run_command "cp $bad_start $bad" or die "failed to copy to $bad\n"; } else { if ( ! -f $good ) { die "Can not find file $good\n"; diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 62afd0b43074..ba7849751989 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -10,11 +10,11 @@ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c LIBKVM_aarch64 = lib/aarch64/processor.c +LIBKVM_s390x = lib/s390x/processor.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test @@ -26,9 +26,14 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_test +TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus + +TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) @@ -43,7 +48,12 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) -LDFLAGS += -pthread $(no-pie-option) +# On s390, build the testcases KVM-enabled +pgste-option = $(call try-run, echo 'int main() { return 0; }' | \ + $(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste) + + +LDFLAGS += -pthread $(no-pie-option) $(pgste-option) # After inclusion, $(OUTPUT) is defined and # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 00235f5932f0..e0e66b115ef2 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -41,6 +41,12 @@ enum vm_guest_mode { NUM_VM_MODES, }; +#ifdef __aarch64__ +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#else +#define VM_MODE_DEFAULT VM_MODE_P52V48_4K +#endif + #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; @@ -111,10 +117,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); +#ifdef __KVM_HAVE_VCPU_EVENTS void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); +#endif #ifdef __x86_64__ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state); diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h new file mode 100644 index 000000000000..e0e96a5f608c --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * s390x processor specific defines + */ +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +/* Bits in the region/segment table entry */ +#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */ +#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */ +#define REGION_ENTRY_OFFSET 0xc0 /* region table offset */ +#define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ +#define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */ +#define REGION_ENTRY_LENGTH 0x03 /* region third length */ + +/* Bits in the page table entry */ +#define PAGE_INVALID 0x400 /* HW invalid bit */ +#define PAGE_PROTECT 0x200 /* HW read-only bit */ +#define PAGE_NOEXEC 0x100 /* HW no-execute bit */ + +#endif diff --git a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 429226bc6a92..231d79e57774 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -27,7 +27,7 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus) printf("Testing creating %d vCPUs, with IDs %d...%d.\n", num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1); - vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); for (i = 0; i < num_vcpus; i++) { int vcpu_id = first_vcpu_id + i; diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index af2023d818a5..486400a97374 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -227,7 +227,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; struct kvm_vm *vm; - vm = vm_create(VM_MODE_P40V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); vm_vcpu_add_default(vm, vcpuid, guest_code); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 221e3fa46680..6e49bb039376 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -556,6 +556,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, int ret; struct userspace_mem_region *region; size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; + size_t alignment; TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " "address not on a page boundary.\n" @@ -605,9 +606,20 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, TEST_ASSERT(region != NULL, "Insufficient Memory"); region->mmap_size = npages * vm->page_size; - /* Enough memory to align up to a huge page. */ +#ifdef __s390x__ + /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ + alignment = 0x100000; +#else + alignment = 1; +#endif + if (src_type == VM_MEM_SRC_ANONYMOUS_THP) - region->mmap_size += huge_page_size; + alignment = max(huge_page_size, alignment); + + /* Add enough memory to align up if necessary */ + if (alignment > 1) + region->mmap_size += alignment; + region->mmap_start = mmap(NULL, region->mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS @@ -617,9 +629,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "test_malloc failed, mmap_start: %p errno: %i", region->mmap_start, errno); - /* Align THP allocation up to start of a huge page. */ - region->host_mem = align(region->mmap_start, - src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1); + /* Align host address */ + region->host_mem = align(region->mmap_start, alignment); /* As needed perform madvise */ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { @@ -1218,6 +1229,7 @@ void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) ret, errno); } +#ifdef __KVM_HAVE_VCPU_EVENTS void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events) { @@ -1243,6 +1255,7 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", ret, errno); } +#endif #ifdef __x86_64__ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c new file mode 100644 index 000000000000..32a02360b1eb --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM selftest s390x library code - CPU-related functions (page tables...) + * + * Copyright (C) 2019, Red Hat, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "processor.h" +#include "kvm_util.h" +#include "../kvm_util_internal.h" + +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +#define PAGES_PER_REGION 4 + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot) +{ + vm_paddr_t paddr; + + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + vm->page_size); + + if (vm->pgd_created) + return; + + paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot); + memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); + + vm->pgd = paddr; + vm->pgd_created = true; +} + +/* + * Allocate 4 pages for a region/segment table (ri < 4), or one page for + * a page table (ri == 4). Returns a suitable region/segment table entry + * which points to the freshly allocated pages. + */ +static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot) +{ + uint64_t taddr; + + taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot); + memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size); + + return (taddr & REGION_ENTRY_ORIGIN) + | (((4 - ri) << 2) & REGION_ENTRY_TYPE) + | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); +} + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * gva - VM Virtual Address + * gpa - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a virtual translation for the page + * starting at vaddr to the page starting at paddr. + */ +void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa, + uint32_t memslot) +{ + int ri, idx; + uint64_t *entry; + + TEST_ASSERT((gva % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (gva >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", + gva); + TEST_ASSERT((gpa % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gva, vm->max_gfn, vm->page_size); + + /* Walk through region and segment tables */ + entry = addr_gpa2hva(vm, vm->pgd); + for (ri = 1; ri <= 4; ri++) { + idx = (gva >> (64 - 11 * ri)) & 0x7ffu; + if (entry[idx] & REGION_ENTRY_INVALID) + entry[idx] = virt_alloc_region(vm, ri, memslot); + entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); + } + + /* Fill in page table entry */ + idx = (gva >> 12) & 0x0ffu; /* page index */ + if (!(entry[idx] & PAGE_INVALID)) + fprintf(stderr, + "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); + entry[idx] = gpa; +} + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gpa - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Translates the VM virtual address given by gva to a VM physical + * address and then locates the memory region containing the VM + * physical address, within the VM given by vm. When found, the host + * virtual address providing the memory to the vm physical address is + * returned. + * A TEST_ASSERT failure occurs if no region containing translated + * VM virtual address exists. + */ +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + int ri, idx; + uint64_t *entry; + + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + vm->page_size); + + entry = addr_gpa2hva(vm, vm->pgd); + for (ri = 1; ri <= 4; ri++) { + idx = (gva >> (64 - 11 * ri)) & 0x7ffu; + TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), + "No region mapping for vm virtual address 0x%lx", + gva); + entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); + } + + idx = (gva >> 12) & 0x0ffu; /* page index */ + + TEST_ASSERT(!(entry[idx] & PAGE_INVALID), + "No page mapping for vm virtual address 0x%lx", gva); + + return (entry[idx] & ~0xffful) + (gva & 0xffful); +} + +static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, + uint64_t ptea_start) +{ + uint64_t *pte, ptea; + + for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) { + pte = addr_gpa2hva(vm, ptea); + if (*pte & PAGE_INVALID) + continue; + fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n", + indent, "", ptea, *pte); + } +} + +static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, + uint64_t reg_tab_addr) +{ + uint64_t addr, *entry; + + for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) { + entry = addr_gpa2hva(vm, addr); + if (*entry & REGION_ENTRY_INVALID) + continue; + fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n", + indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2), + addr, *entry); + if (*entry & REGION_ENTRY_TYPE) { + virt_dump_region(stream, vm, indent + 2, + *entry & REGION_ENTRY_ORIGIN); + } else { + virt_dump_ptes(stream, vm, indent + 2, + *entry & REGION_ENTRY_ORIGIN); + } + } +} + +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + if (!vm->pgd_created) + return; + + virt_dump_region(stream, vm, indent, vm->pgd); +} + +/* + * Create a VM with reasonable defaults + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * extra_mem_pages - The size of extra memories to add (this will + * decide how much extra space we will need to + * setup the page tables using mem slot 0) + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + /* + * The additional amount of pages required for the page tables is: + * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ... + * which is definitely smaller than (n / 256) * 2. + */ + uint64_t extra_pg_pages = extra_mem_pages / 256 * 2; + struct kvm_vm *vm; + + vm = vm_create(VM_MODE_DEFAULT, + DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} + +/* + * Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW) + * + * Input Args: + * vcpuid - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); + uint64_t stack_vaddr; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_run *run; + + TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + vm->page_size); + + stack_vaddr = vm_vaddr_alloc(vm, stack_size, + DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + + vm_vcpu_add(vm, vcpuid); + + /* Setup guest registers */ + vcpu_regs_get(vm, vcpuid, ®s); + regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; + vcpu_regs_set(vm, vcpuid, ®s); + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ + sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ + vcpu_sregs_set(vm, vcpuid, &sregs); + + run = vcpu_state(vm, vcpuid); + run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ + run->psw_addr = (uintptr_t)guest_code; +} + +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + struct vcpu *vcpu = vm->vcpu_head; + + fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", + indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index b430f962e323..6cb34a0fa200 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -821,7 +821,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; /* Create VM */ - vm = vm_create(VM_MODE_P52V48_4K, + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index fe56d159d65f..204f847bd065 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -5,8 +5,6 @@ * Copyright (C) 2018, Google LLC. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include "test_util.h" #include "kvm_util.h" #include "processor.h" diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c new file mode 100644 index 000000000000..e85ff0d69548 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for s390x KVM_CAP_SYNC_REGS + * + * Based on the same test for x86: + * Copyright (C) 2018, Google LLC. + * + * Adaptions for s390x: + * Copyright (C) 2019, Red Hat, Inc. + * + * Test expected behavior of the KVM_CAP_SYNC_REGS functionality. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" + +#define VCPU_ID 5 + +static void guest_code(void) +{ + for (;;) { + asm volatile ("diag 0,0,0x501"); + asm volatile ("ahi 11,1"); + } +} + +#define REG_COMPARE(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%llx, 0x%llx\n", \ + left->reg, right->reg) + +static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right) +{ + int i; + + for (i = 0; i < 16; i++) + REG_COMPARE(gprs[i]); +} + +static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) +{ + int i; + + for (i = 0; i < 16; i++) + REG_COMPARE(acrs[i]); + + for (i = 0; i < 16; i++) + REG_COMPARE(crs[i]); +} + +#undef REG_COMPARE + +#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS) +#define INVALID_SYNC_FIELD 0x80000000 + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_regs regs; + struct kvm_sregs sregs; + int rv, cap; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + cap = kvm_check_cap(KVM_CAP_SYNC_REGS); + if (!cap) { + fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n"); + exit(KSFT_SKIP); + } + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + run = vcpu_state(vm, VCPU_ID); + + /* Request and verify all valid register sets. */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "Unexpected exit reason: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s390_sieic.icptcode == 4 && + (run->s390_sieic.ipa >> 8) == 0x83 && + (run->s390_sieic.ipb >> 16) == 0x501, + "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n", + run->s390_sieic.icptcode, run->s390_sieic.ipa, + run->s390_sieic.ipb); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs); + + /* Set and verify various register values */ + run->s.regs.gprs[11] = 0xBAD1DEA; + run->s.regs.acrs[0] = 1 << 11; + + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "Unexpected exit reason: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.gprs[11]); + TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11, + "acr0 sync regs value incorrect 0x%llx.", + run->s.regs.acrs[0]); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs); + + /* Clear kvm_dirty_regs bits, verify new s.regs values are + * overwritten with existing guest values. + */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = 0; + run->s.regs.gprs[11] = 0xDEADBEEF; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "Unexpected exit reason: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.gprs[11]); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 8a20e03d4cb7..9c60337317c6 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -78,10 +78,10 @@ set -e function _modprobe() { - modprobe "$@" + modprobe "$@" || return 1 if [[ "$REMOTE_HOST" != "" ]]; then - ssh "$REMOTE_HOST" modprobe "$@" + ssh "$REMOTE_HOST" modprobe "$@" || return 1 fi } @@ -442,6 +442,30 @@ function pingpong_test() echo " Passed" } +function msi_test() +{ + LOC=$1 + REM=$2 + + write_file 1 $LOC/ready + + echo "Running MSI interrupt tests on: $(subdirname $LOC) / $(subdirname $REM)" + + CNT=$(read_file "$LOC/count") + for ((i = 0; i < $CNT; i++)); do + START=$(read_file $REM/../irq${i}_occurrences) + write_file $i $LOC/trigger + END=$(read_file $REM/../irq${i}_occurrences) + + if [[ $(($END - $START)) != 1 ]]; then + echo "MSI did not trigger the interrupt on the remote side!" >&2 + exit 1 + fi + done + + echo " Passed" +} + function perf_test() { USE_DMA=$1 @@ -520,6 +544,29 @@ function ntb_pingpong_tests() _modprobe -r ntb_pingpong } +function ntb_msi_tests() +{ + LOCAL_MSI="$DEBUGFS/ntb_msi_test/$LOCAL_DEV" + REMOTE_MSI="$REMOTE_HOST:$DEBUGFS/ntb_msi_test/$REMOTE_DEV" + + echo "Starting ntb_msi_test tests..." + + if ! _modprobe ntb_msi_test 2> /dev/null; then + echo " Not doing MSI tests seeing the module is not available." + return + fi + + port_test $LOCAL_MSI $REMOTE_MSI + + LOCAL_PEER="$LOCAL_MSI/peer$LOCAL_PIDX" + REMOTE_PEER="$REMOTE_MSI/peer$REMOTE_PIDX" + + msi_test $LOCAL_PEER $REMOTE_PEER + msi_test $REMOTE_PEER $LOCAL_PEER + + _modprobe -r ntb_msi_test +} + function ntb_perf_tests() { LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV" @@ -541,6 +588,7 @@ function cleanup() _modprobe -r ntb_perf 2> /dev/null _modprobe -r ntb_pingpong 2> /dev/null _modprobe -r ntb_transport 2> /dev/null + _modprobe -r ntb_msi_test 2> /dev/null set -e } @@ -577,5 +625,7 @@ ntb_tool_tests echo ntb_pingpong_tests echo +ntb_msi_tests +echo ntb_perf_tests echo diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 5ab4c60c100e..15a329da59fa 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -489,25 +489,11 @@ static void test_ptrace_write_gsbase(void) * selector value is changed or not by the GSBASE write in * a ptracer. */ - if (gs != *shared_scratch) { - nerrs++; - printf("[FAIL]\tGS changed to %lx\n", gs); - - /* - * On older kernels, poking a nonzero value into the - * base would zero the selector. On newer kernels, - * this behavior has changed -- poking the base - * changes only the base and, if FSGSBASE is not - * available, this may have no effect. - */ - if (gs == 0) - printf("\tNote: this is expected behavior on older kernels.\n"); - } else if (have_fsgsbase && (base != 0xFF)) { - nerrs++; - printf("[FAIL]\tGSBASE changed to %lx\n", base); + if (gs == 0 && base == 0xFF) { + printf("[OK]\tGS was reset as expected\n"); } else { - printf("[OK]\tGS remained 0x%hx%s", *shared_scratch, have_fsgsbase ? " and GSBASE changed to 0xFF" : ""); - printf("\n"); + nerrs++; + printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base); } } diff --git a/usr/include/Makefile b/usr/include/Makefile index cd8daa20d487..aa316d99e035 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -30,8 +30,6 @@ header-test-$(CONFIG_CPU_BIG_ENDIAN) += linux/byteorder/big_endian.h header-test-$(CONFIG_CPU_LITTLE_ENDIAN) += linux/byteorder/little_endian.h header-test- += linux/coda.h header-test- += linux/coda_psdev.h -header-test- += linux/dvb/audio.h -header-test- += linux/dvb/osd.h header-test- += linux/elfcore.h header-test- += linux/errqueue.h header-test- += linux/fsmap.h @@ -44,7 +42,6 @@ header-test- += linux/netfilter_bridge/ebtables.h header-test- += linux/netfilter_ipv4/ipt_LOG.h header-test- += linux/netfilter_ipv6/ip6t_LOG.h header-test- += linux/nfc.h -header-test- += linux/nilfs2_ondisk.h header-test- += linux/omap3isp.h header-test- += linux/omapfb.h header-test- += linux/patchkey.h @@ -59,9 +56,6 @@ header-test- += linux/v4l2-mediabus.h header-test- += linux/v4l2-subdev.h header-test- += linux/videodev2.h header-test- += linux/vm_sockets.h -header-test- += misc/ocxl.h -header-test- += mtd/mtd-abi.h -header-test- += mtd/mtd-user.h header-test- += scsi/scsi_bsg_fc.h header-test- += scsi/scsi_netlink.h header-test- += scsi/scsi_netlink_fc.h @@ -108,7 +102,6 @@ header-test- += linux/bpf_perf_event.h endif ifeq ($(SRCARCH),s390) -header-test- += asm/runtime_instr.h header-test- += asm/zcrypt.h endif @@ -116,7 +109,6 @@ ifeq ($(SRCARCH),sparc) header-test- += asm/stat.h header-test- += asm/uctx.h header-test- += asm/fbio.h -header-test- += asm/openpromio.h endif # asm-generic/*.h is used by asm/*.h, and should not be included directly diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index f645c0fbf7ec..acc43242a310 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -727,7 +727,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. * See the comment in kvm_vcpu_exiting_guest_mode() and - * Documentation/virtual/kvm/vcpu-requests.rst + * Documentation/virt/kvm/vcpu-requests.rst */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 936962abc38d..c45e2d7e942f 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -250,7 +250,7 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, * pending state of interrupt is latched in pending_latch variable. * Userspace will save and restore pending state and line_level * separately. - * Refer to Documentation/virtual/kvm/devices/arm-vgic-v3.txt + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.txt * for handling of ISPENDR and ICPENDR. */ for (i = 0; i < len * 8; i++) { diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 57205beaa981..3b7525deec80 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -42,7 +42,7 @@ VGIC_AFFINITY_LEVEL(val, 3)) /* - * As per Documentation/virtual/kvm/devices/arm-vgic-v3.txt, + * As per Documentation/virt/kvm/devices/arm-vgic-v3.txt, * below macros are defined for CPUREG encoding. */ #define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000 @@ -63,7 +63,7 @@ KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) /* - * As per Documentation/virtual/kvm/devices/arm-vgic-its.txt, + * As per Documentation/virt/kvm/devices/arm-vgic-its.txt, * below macros are defined for ITS table entry encoding. */ #define KVM_ITS_CTE_VALID_SHIFT 63 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b4ab59dd6846..887f3b0c2b60 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -314,6 +314,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; + vcpu->ready = false; r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -2387,6 +2388,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) wqp = kvm_arch_vcpu_wq(vcpu); if (swq_has_sleeper(wqp)) { swake_up_one(wqp); + WRITE_ONCE(vcpu->ready, true); ++vcpu->stat.halt_wakeup; return true; } @@ -2500,7 +2502,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; } else if (pass && i > last_boosted_vcpu) break; - if (!READ_ONCE(vcpu->preempted)) + if (!READ_ONCE(vcpu->ready)) continue; if (vcpu == me) continue; @@ -4203,8 +4205,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - if (vcpu->preempted) - vcpu->preempted = false; + vcpu->preempted = false; + WRITE_ONCE(vcpu->ready, false); kvm_arch_sched_in(vcpu, cpu); @@ -4216,8 +4218,10 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - if (current->state == TASK_RUNNING) + if (current->state == TASK_RUNNING) { vcpu->preempted = true; + WRITE_ONCE(vcpu->ready, true); + } kvm_arch_vcpu_put(vcpu); } |