diff options
520 files changed, 32593 insertions, 8314 deletions
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index d0d0c578324c..0a378a88217a 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -20,17 +20,19 @@ Description: action: measure | dont_measure | appraise | dont_appraise | audit condition:= base | lsm [option] base: [[func=] [mask=] [fsmagic=] [fsuuid=] [uid=] - [fowner]] + [euid=] [fowner=]] lsm: [[subj_user=] [subj_role=] [subj_type=] [obj_user=] [obj_role=] [obj_type=]] option: [[appraise_type=]] [permit_directio] base: func:= [BPRM_CHECK][MMAP_CHECK][FILE_CHECK][MODULE_CHECK] [FIRMWARE_CHECK] - mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC] + mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND] + [[^]MAY_EXEC] fsmagic:= hex value fsuuid:= file system UUID (e.g 8bcbe394-4f13-4144-be8e-5aa9ea2ce2f6) uid:= decimal value + euid:= decimal value fowner:=decimal value lsm: are LSM specific option: appraise_type:= [imasig] @@ -49,11 +51,25 @@ Description: dont_measure fsmagic=0x01021994 dont_appraise fsmagic=0x01021994 # RAMFS_MAGIC - dont_measure fsmagic=0x858458f6 dont_appraise fsmagic=0x858458f6 + # DEVPTS_SUPER_MAGIC + dont_measure fsmagic=0x1cd1 + dont_appraise fsmagic=0x1cd1 + # BINFMTFS_MAGIC + dont_measure fsmagic=0x42494e4d + dont_appraise fsmagic=0x42494e4d # SECURITYFS_MAGIC dont_measure fsmagic=0x73636673 dont_appraise fsmagic=0x73636673 + # SELINUX_MAGIC + dont_measure fsmagic=0xf97cff8c + dont_appraise fsmagic=0xf97cff8c + # CGROUP_SUPER_MAGIC + dont_measure fsmagic=0x27e0eb + dont_appraise fsmagic=0x27e0eb + # NSFS_MAGIC + dont_measure fsmagic=0x6e736673 + dont_appraise fsmagic=0x6e736673 measure func=BPRM_CHECK measure func=FILE_MMAP mask=MAY_EXEC @@ -70,10 +86,6 @@ Description: Examples of LSM specific definitions: SELinux: - # SELINUX_MAGIC - dont_measure fsmagic=0xf97cff8c - dont_appraise fsmagic=0xf97cff8c - dont_measure obj_type=var_log_t dont_appraise obj_type=var_log_t dont_measure obj_type=auditd_log_t diff --git a/Documentation/devicetree/bindings/clock/ingenic,cgu.txt b/Documentation/devicetree/bindings/clock/ingenic,cgu.txt new file mode 100644 index 000000000000..f8d4134ae409 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/ingenic,cgu.txt @@ -0,0 +1,53 @@ +Ingenic SoC CGU binding + +The CGU in an Ingenic SoC provides all the clocks generated on-chip. It +typically includes a variety of PLLs, multiplexers, dividers & gates in order +to provide many different clock signals derived from only 2 external source +clocks. + +Required properties: +- compatible : Should be "ingenic,<soctype>-cgu". + For example "ingenic,jz4740-cgu" or "ingenic,jz4780-cgu". +- reg : The address & length of the CGU registers. +- clocks : List of phandle & clock specifiers for clocks external to the CGU. + Two such external clocks should be specified - first the external crystal + "ext" and second the RTC clock source "rtc". +- clock-names : List of name strings for the external clocks. +- #clock-cells: Should be 1. + Clock consumers specify this argument to identify a clock. The valid values + may be found in <dt-bindings/clock/<soctype>-cgu.h>. + +Example SoC include file: + +/ { + cgu: jz4740-cgu { + compatible = "ingenic,jz4740-cgu"; + reg = <0x10000000 0x100>; + #clock-cells = <1>; + }; + + uart0: serial@10030000 { + clocks = <&cgu JZ4740_CLK_UART0>; + }; +}; + +Example board file: + +/ { + ext: clock@0 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <12000000>; + }; + + rtc: clock@1 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + + &cgu { + clocks = <&ext> <&rtc>; + clock-names: "ext", "rtc"; + }; +}; diff --git a/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt b/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt new file mode 100644 index 000000000000..e0fc2c11dd00 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt @@ -0,0 +1,33 @@ +Binding for Qualcomm Atheros AR7xxx/AR9XXX PLL controller + +The PPL controller provides the 3 main clocks of the SoC: CPU, DDR and AHB. + +Required Properties: +- compatible: has to be "qca,<soctype>-cpu-intc" and one of the following + fallbacks: + - "qca,ar7100-pll" + - "qca,ar7240-pll" + - "qca,ar9130-pll" + - "qca,ar9330-pll" + - "qca,ar9340-pll" + - "qca,qca9550-pll" +- reg: Base address and size of the controllers memory area +- clock-names: Name of the input clock, has to be "ref" +- clocks: phandle of the external reference clock +- #clock-cells: has to be one + +Optional properties: +- clock-output-names: should be "cpu", "ddr", "ahb" + +Example: + + memory-controller@18050000 { + compatible = "qca,ar9132-ppl", "qca,ar9130-pll"; + reg = <0x18050000 0x20>; + + clock-names = "ref"; + clocks = <&extosc>; + + #clock-cells = <1>; + clock-output-names = "cpu", "ddr", "ahb"; + }; diff --git a/Documentation/devicetree/bindings/dma/dma.txt b/Documentation/devicetree/bindings/dma/dma.txt index 82104271e754..6312fb00ce8d 100644 --- a/Documentation/devicetree/bindings/dma/dma.txt +++ b/Documentation/devicetree/bindings/dma/dma.txt @@ -31,6 +31,34 @@ Example: dma-requests = <127>; }; +* DMA router + +DMA routers are transparent IP blocks used to route DMA request lines from +devices to the DMA controller. Some SoCs (like TI DRA7x) have more peripherals +integrated with DMA requests than what the DMA controller can handle directly. + +Required property: +- dma-masters: phandle of the DMA controller or list of phandles for + the DMA controllers the router can direct the signal to. +- #dma-cells: Must be at least 1. Used to provide DMA router specific + information. See DMA client binding below for more + details. + +Optional properties: +- dma-requests: Number of incoming request lines the router can handle. +- In the node pointed by the dma-masters: + - dma-requests: The router driver might need to look for this in order + to configure the routing. + +Example: + sdma_xbar: dma-router@4a002b78 { + compatible = "ti,dra7-dma-crossbar"; + reg = <0x4a002b78 0xfc>; + #dma-cells = <1>; + dma-requests = <205>; + ti,dma-safe-map = <0>; + dma-masters = <&sdma>; + }; * DMA client diff --git a/Documentation/devicetree/bindings/dma/mv-xor.txt b/Documentation/devicetree/bindings/dma/mv-xor.txt index 7c6cb7fcecd2..cc29c35266e2 100644 --- a/Documentation/devicetree/bindings/dma/mv-xor.txt +++ b/Documentation/devicetree/bindings/dma/mv-xor.txt @@ -1,7 +1,7 @@ * Marvell XOR engines Required properties: -- compatible: Should be "marvell,orion-xor" +- compatible: Should be "marvell,orion-xor" or "marvell,armada-380-xor" - reg: Should contain registers location and length (two sets) the first set is the low registers, the second set the high registers for the XOR engine. diff --git a/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt b/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt index ecbc96ad36f8..ccd52d6a231a 100644 --- a/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt +++ b/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt @@ -3,7 +3,8 @@ See dma.txt first Required properties: -- compatible: Should be "sirf,prima2-dmac" or "sirf,marco-dmac" +- compatible: Should be "sirf,prima2-dmac", "sirf,atlas7-dmac" or + "sirf,atlas7-dmac-v2" - reg: Should contain DMA registers location and length. - interrupts: Should contain one interrupt shared by all channel - #dma-cells: must be <1>. used to represent the number of integer diff --git a/Documentation/devicetree/bindings/dma/sun6i-dma.txt b/Documentation/devicetree/bindings/dma/sun6i-dma.txt index 9cdcba24d7c3..d13c136cef8c 100644 --- a/Documentation/devicetree/bindings/dma/sun6i-dma.txt +++ b/Documentation/devicetree/bindings/dma/sun6i-dma.txt @@ -4,7 +4,10 @@ This driver follows the generic DMA bindings defined in dma.txt. Required properties: -- compatible: Must be "allwinner,sun6i-a31-dma" or "allwinner,sun8i-a23-dma" +- compatible: Must be one of + "allwinner,sun6i-a31-dma" + "allwinner,sun8i-a23-dma" + "allwinner,sun8i-h3-dma" - reg: Should contain the registers base address and length - interrupts: Should contain a reference to the interrupt used by this device - clocks: Should contain a reference to the parent AHB clock diff --git a/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt new file mode 100644 index 000000000000..63a48928f3a8 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt @@ -0,0 +1,52 @@ +Texas Instruments DMA Crossbar (DMA request router) + +Required properties: +- compatible: "ti,dra7-dma-crossbar" for DRA7xx DMA crossbar +- reg: Memory map for accessing module +- #dma-cells: Should be set to <1>. + Clients should use the crossbar request number (input) +- dma-requests: Number of DMA requests the crossbar can receive +- dma-masters: phandle pointing to the DMA controller + +The DMA controller node need to have the following poroperties: +- dma-requests: Number of DMA requests the controller can handle + +Optional properties: +- ti,dma-safe-map: Safe routing value for unused request lines + +Example: + +/* DMA controller */ +sdma: dma-controller@4a056000 { + compatible = "ti,omap4430-sdma"; + reg = <0x4a056000 0x1000>; + interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; + #dma-cells = <1>; + dma-channels = <32>; + dma-requests = <127>; +}; + +/* DMA crossbar */ +sdma_xbar: dma-router@4a002b78 { + compatible = "ti,dra7-dma-crossbar"; + reg = <0x4a002b78 0xfc>; + #dma-cells = <1>; + dma-requests = <205>; + ti,dma-safe-map = <0>; + dma-masters = <&sdma>; +}; + +/* DMA client */ +uart1: serial@4806a000 { + compatible = "ti,omap4-uart"; + reg = <0x4806a000 0x100>; + interrupts-extended = <&gic GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; + ti,hwmods = "uart1"; + clock-frequency = <48000000>; + status = "disabled"; + dmas = <&sdma_xbar 49>, <&sdma_xbar 50>; + dma-names = "tx", "rx"; +}; diff --git a/Documentation/devicetree/bindings/gpio/gpio-ath79.txt b/Documentation/devicetree/bindings/gpio/gpio-ath79.txt new file mode 100644 index 000000000000..c522851017ae --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-ath79.txt @@ -0,0 +1,38 @@ +Binding for Qualcomm Atheros AR7xxx/AR9xxx GPIO controller + +Required properties: +- compatible: has to be "qca,<soctype>-gpio" and one of the following + fallbacks: + - "qca,ar7100-gpio" + - "qca,ar9340-gpio" +- reg: Base address and size of the controllers memory area +- gpio-controller : Marks the device node as a GPIO controller. +- #gpio-cells : Should be two. The first cell is the pin number and the + second cell is used to specify optional parameters. +- ngpios: Should be set to the number of GPIOs available on the SoC. + +Optional properties: +- interrupt-parent: phandle of the parent interrupt controller. +- interrupts: Interrupt specifier for the controllers interrupt. +- interrupt-controller : Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode interrupt + source, should be 2 + +Please refer to interrupts.txt in this directory for details of the common +Interrupt Controllers bindings used by client devices. + +Example: + + gpio@18040000 { + compatible = "qca,ar9132-gpio", "qca,ar7100-gpio"; + reg = <0x18040000 0x30>; + interrupts = <2>; + + ngpios = <22>; + + gpio-controller; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt new file mode 100644 index 000000000000..5f89fb635a1b --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.txt @@ -0,0 +1,28 @@ +Ingenic SoC Interrupt Controller + +Required properties: + +- compatible : should be "ingenic,<socname>-intc". Valid strings are: + ingenic,jz4740-intc + ingenic,jz4770-intc + ingenic,jz4775-intc + ingenic,jz4780-intc +- reg : Specifies base physical address and size of the registers. +- interrupt-controller : Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode an + interrupt source. The value shall be 1. +- interrupt-parent : phandle of the CPU interrupt controller. +- interrupts : Specifies the CPU interrupt the controller is connected to. + +Example: + +intc: interrupt-controller@10001000 { + compatible = "ingenic,jz4740-intc"; + reg = <0x10001000 0x14>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <2>; +}; diff --git a/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-cpu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-cpu-intc.txt new file mode 100644 index 000000000000..aabce7810d29 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-cpu-intc.txt @@ -0,0 +1,44 @@ +Binding for Qualcomm Atheros AR7xxx/AR9XXX CPU interrupt controller + +On most SoC the IRQ controller need to flush the DDR FIFO before running +the interrupt handler of some devices. This is configured using the +qca,ddr-wb-channels and qca,ddr-wb-channel-interrupts properties. + +Required Properties: + +- compatible: has to be "qca,<soctype>-cpu-intc", "qca,ar7100-cpu-intc" + as fallback +- interrupt-controller : Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode interrupt + source, should be 1 for intc + +Please refer to interrupts.txt in this directory for details of the common +Interrupt Controllers bindings used by client devices. + +Optional Properties: + +- qca,ddr-wb-channel-interrupts: List of the interrupts needing a write + buffer flush +- qca,ddr-wb-channels: List of phandles to the write buffer channels for + each interrupt. If qca,ddr-wb-channel-interrupts is not present the interrupt + default to the entry's index. + +Example: + + interrupt-controller { + compatible = "qca,ar9132-cpu-intc", "qca,ar7100-cpu-intc"; + + interrupt-controller; + #interrupt-cells = <1>; + + qca,ddr-wb-channel-interrupts = <2>, <3>, <4>, <5>; + qca,ddr-wb-channels = <&ddr_ctrl 3>, <&ddr_ctrl 2>, + <&ddr_ctrl 0>, <&ddr_ctrl 1>; + }; + + ... + + ddr_ctrl: memory-controller@18000000 { + ... + #qca,ddr-wb-channel-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt new file mode 100644 index 000000000000..391717a68f3b --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt @@ -0,0 +1,30 @@ +Binding for Qualcomm Atheros AR7xxx/AR9XXX MISC interrupt controller + +The MISC interrupt controller is a secondary controller for lower priority +interrupt. + +Required Properties: +- compatible: has to be "qca,<soctype>-cpu-intc", "qca,ar7100-misc-intc" + as fallback +- reg: Base address and size of the controllers memory area +- interrupt-parent: phandle of the parent interrupt controller. +- interrupts: Interrupt specifier for the controllers interrupt. +- interrupt-controller : Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode interrupt + source, should be 1 + +Please refer to interrupts.txt in this directory for details of the common +Interrupt Controllers bindings used by client devices. + +Example: + + interrupt-controller@18060010 { + compatible = "qca,ar9132-misc-intc", qca,ar7100-misc-intc"; + reg = <0x18060010 0x4>; + + interrupt-parent = <&cpuintc>; + interrupts = <6>; + + interrupt-controller; + #interrupt-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/memory-controllers/ath79-ddr-controller.txt b/Documentation/devicetree/bindings/memory-controllers/ath79-ddr-controller.txt new file mode 100644 index 000000000000..efe35a065714 --- /dev/null +++ b/Documentation/devicetree/bindings/memory-controllers/ath79-ddr-controller.txt @@ -0,0 +1,35 @@ +Binding for Qualcomm Atheros AR7xxx/AR9xxx DDR controller + +The DDR controller of the ARxxx and AR9xxx families provides an interface +to flush the FIFO between various devices and the DDR. This is mainly used +by the IRQ controller to flush the FIFO before running the interrupt handler +of such devices. + +Required properties: + +- compatible: has to be "qca,<soc-type>-ddr-controller", + "qca,[ar7100|ar7240]-ddr-controller" as fallback. + On SoC with PCI support "qca,ar7100-ddr-controller" should be used as + fallback, otherwise "qca,ar7240-ddr-controller" should be used. +- reg: Base address and size of the controllers memory area +- #qca,ddr-wb-channel-cells: has to be 1, the index of the write buffer + channel + +Example: + + ddr_ctrl: memory-controller@18000000 { + compatible = "qca,ar9132-ddr-controller", + "qca,ar7240-ddr-controller"; + reg = <0x18000000 0x100>; + + #qca,ddr-wb-channel-cells = <1>; + }; + + ... + + interrupt-controller { + ... + qca,ddr-wb-channel-interrupts = <2>, <3>, <4>, <5>; + qca,ddr-wb-channels = <&ddr_ctrl 3>, <&ddr_ctrl 2>, + <&ddr_ctrl 0>, <&ddr_ctrl 1>; + }; diff --git a/Documentation/devicetree/bindings/mips/ath79-soc.txt b/Documentation/devicetree/bindings/mips/ath79-soc.txt new file mode 100644 index 000000000000..88a12a43e44e --- /dev/null +++ b/Documentation/devicetree/bindings/mips/ath79-soc.txt @@ -0,0 +1,21 @@ +Binding for Qualcomm Atheros AR7xxx/AR9XXX SoC + +Each device tree must specify a compatible value for the AR SoC +it uses in the compatible property of the root node. The compatible +value must be one of the following values: + +- qca,ar7130 +- qca,ar7141 +- qca,ar7161 +- qca,ar7240 +- qca,ar7241 +- qca,ar7242 +- qca,ar9130 +- qca,ar9132 +- qca,ar9330 +- qca,ar9331 +- qca,ar9341 +- qca,ar9342 +- qca,ar9344 +- qca,qca9556 +- qca,qca9558 diff --git a/Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt b/Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt new file mode 100644 index 000000000000..afbc7e24a3de --- /dev/null +++ b/Documentation/devicetree/bindings/phy/pistachio-usb-phy.txt @@ -0,0 +1,29 @@ +IMG Pistachio USB PHY +===================== + +Required properties: +-------------------- + - compatible: Must be "img,pistachio-usb-phy". + - #phy-cells: Must be 0. See ./phy-bindings.txt for details. + - clocks: Must contain an entry for each entry in clock-names. + See ../clock/clock-bindings.txt for details. + - clock-names: Must include "usb_phy". + - img,cr-top: Must constain a phandle to the CR_TOP syscon node. + - img,refclk: Indicates the reference clock source for the USB PHY. + See <dt-bindings/phy/phy-pistachio-usb.h> for a list of valid values. + +Optional properties: +-------------------- + - phy-supply: USB VBUS supply. Must supply 5.0V. + +Example: +-------- +usb_phy: usb-phy { + compatible = "img,pistachio-usb-phy"; + clocks = <&clk_core CLK_USB_PHY>; + clock-names = "usb_phy"; + phy-supply = <&usb_vbus>; + img,refclk = <REFCLK_CLK_CORE>; + img,cr-top = <&cr_top>; + #phy-cells = <0>; +}; diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.txt b/Documentation/devicetree/bindings/serial/ingenic,uart.txt new file mode 100644 index 000000000000..c2d3b3abe7d9 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/ingenic,uart.txt @@ -0,0 +1,22 @@ +* Ingenic SoC UART + +Required properties: +- compatible : "ingenic,jz4740-uart" or "ingenic,jz4780-uart" +- reg : offset and length of the register set for the device. +- interrupts : should contain uart interrupt. +- clocks : phandles to the module & baud clocks. +- clock-names: tuple listing input clock names. + Required elements: "baud", "module" + +Example: + +uart0: serial@10030000 { + compatible = "ingenic,jz4740-uart"; + reg = <0x10030000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <9>; + + clocks = <&ext>, <&cgu JZ4740_CLK_UART0>; + clock-names = "baud", "module"; +}; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 8e8f4bc6fcf1..7b607761b774 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -106,6 +106,7 @@ ibm International Business Machines (IBM) idt Integrated Device Technologies, Inc. iom Iomega Corporation img Imagination Technologies Ltd. +ingenic Ingenic Semiconductor innolux Innolux Corporation intel Intel Corporation intercontrol Inter Control Group @@ -161,6 +162,7 @@ powervr PowerVR (deprecated, use img) qca Qualcomm Atheros, Inc. qcom Qualcomm Technologies, Inc qemu QEMU, a generic and open source machine emulator and virtualizer +qi Qi Hardware qnap QNAP Systems, Inc. radxa Radxa raidsonic RaidSonic Technology GmbH @@ -206,6 +208,7 @@ tlm Trusted Logic Mobility toradex Toradex AG toshiba Toshiba Corporation toumaz Toumaz +tplink TP-LINK Technologies Co., Ltd. truly Truly Semiconductors Limited usi Universal Scientific Industrial Co., Ltd. v3 V3 Semiconductor diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt index 05d2280190f1..ca67b0f04c6e 100644 --- a/Documentation/dmaengine/provider.txt +++ b/Documentation/dmaengine/provider.txt @@ -345,11 +345,12 @@ where to put them) that abstracts it away. * DMA_CTRL_ACK - - Undocumented feature - - No one really has an idea of what it's about, besides being - related to reusing the DMA transaction descriptors or having - additional transactions added to it in the async-tx API - - Useless in the case of the slave API + - If set, the transfer can be reused after being completed. + - There is a guarantee the transfer won't be freed until it is acked + by async_tx_ack(). + - As a consequence, if a device driver wants to skip the dma_map_sg() and + dma_unmap_sg() in between 2 transfers, because the DMA'd data wasn't used, + it can resubmit the transfer right after its completion. General Design Notes -------------------- diff --git a/Documentation/dmaengine/pxa_dma.txt b/Documentation/dmaengine/pxa_dma.txt new file mode 100644 index 000000000000..413ef9cfaa4d --- /dev/null +++ b/Documentation/dmaengine/pxa_dma.txt @@ -0,0 +1,153 @@ +PXA/MMP - DMA Slave controller +============================== + +Constraints +----------- + a) Transfers hot queuing + A driver submitting a transfer and issuing it should be granted the transfer + is queued even on a running DMA channel. + This implies that the queuing doesn't wait for the previous transfer end, + and that the descriptor chaining is not only done in the irq/tasklet code + triggered by the end of the transfer. + A transfer which is submitted and issued on a phy doesn't wait for a phy to + stop and restart, but is submitted on a "running channel". The other + drivers, especially mmp_pdma waited for the phy to stop before relaunching + a new transfer. + + b) All transfers having asked for confirmation should be signaled + Any issued transfer with DMA_PREP_INTERRUPT should trigger a callback call. + This implies that even if an irq/tasklet is triggered by end of tx1, but + at the time of irq/dma tx2 is already finished, tx1->complete() and + tx2->complete() should be called. + + c) Channel running state + A driver should be able to query if a channel is running or not. For the + multimedia case, such as video capture, if a transfer is submitted and then + a check of the DMA channel reports a "stopped channel", the transfer should + not be issued until the next "start of frame interrupt", hence the need to + know if a channel is in running or stopped state. + + d) Bandwidth guarantee + The PXA architecture has 4 levels of DMAs priorities : high, normal, low. + The high prorities get twice as much bandwidth as the normal, which get twice + as much as the low priorities. + A driver should be able to request a priority, especially the real-time + ones such as pxa_camera with (big) throughputs. + +Design +------ + a) Virtual channels + Same concept as in sa11x0 driver, ie. a driver was assigned a "virtual + channel" linked to the requestor line, and the physical DMA channel is + assigned on the fly when the transfer is issued. + + b) Transfer anatomy for a scatter-gather transfer + +------------+-----+---------------+----------------+-----------------+ + | desc-sg[0] | ... | desc-sg[last] | status updater | finisher/linker | + +------------+-----+---------------+----------------+-----------------+ + + This structure is pointed by dma->sg_cpu. + The descriptors are used as follows : + - desc-sg[i]: i-th descriptor, transferring the i-th sg + element to the video buffer scatter gather + - status updater + Transfers a single u32 to a well known dma coherent memory to leave + a trace that this transfer is done. The "well known" is unique per + physical channel, meaning that a read of this value will tell which + is the last finished transfer at that point in time. + - finisher: has ddadr=DADDR_STOP, dcmd=ENDIRQEN + - linker: has ddadr= desc-sg[0] of next transfer, dcmd=0 + + c) Transfers hot-chaining + Suppose the running chain is : + Buffer 1 Buffer 2 + +---------+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+---+ + | | + +----+ + + After a call to dmaengine_submit(b3), the chain will look like : + Buffer 1 Buffer 2 Buffer 3 + +---------+----+---+ +----+----+----+---+ +----+----+----+---+ + | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f | + +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+ + | | | | + +----+ +----+ + new_link + + If while new_link was created the DMA channel stopped, it is _not_ + restarted. Hot-chaining doesn't break the assumption that + dma_async_issue_pending() is to be used to ensure the transfer is actually started. + + One exception to this rule : + - if Buffer1 and Buffer2 had all their addresses 8 bytes aligned + - and if Buffer3 has at least one address not 4 bytes aligned + - then hot-chaining cannot happen, as the channel must be stopped, the + "align bit" must be set, and the channel restarted As a consequence, + such a transfer tx_submit() will be queued on the submitted queue, and + this specific case if the DMA is already running in aligned mode. + + d) Transfers completion updater + Each time a transfer is completed on a channel, an interrupt might be + generated or not, up to the client's request. But in each case, the last + descriptor of a transfer, the "status updater", will write the latest + transfer being completed into the physical channel's completion mark. + + This will speed up residue calculation, for large transfers such as video + buffers which hold around 6k descriptors or more. This also allows without + any lock to find out what is the latest completed transfer in a running + DMA chain. + + e) Transfers completion, irq and tasklet + When a transfer flagged as "DMA_PREP_INTERRUPT" is finished, the dma irq + is raised. Upon this interrupt, a tasklet is scheduled for the physical + channel. + The tasklet is responsible for : + - reading the physical channel last updater mark + - calling all the transfer callbacks of finished transfers, based on + that mark, and each transfer flags. + If a transfer is completed while this handling is done, a dma irq will + be raised, and the tasklet will be scheduled once again, having a new + updater mark. + + f) Residue + Residue granularity will be descriptor based. The issued but not completed + transfers will be scanned for all of their descriptors against the + currently running descriptor. + + g) Most complicated case of driver's tx queues + The most tricky situation is when : + - there are not "acked" transfers (tx0) + - a driver submitted an aligned tx1, not chained + - a driver submitted an aligned tx2 => tx2 is cold chained to tx1 + - a driver issued tx1+tx2 => channel is running in aligned mode + - a driver submitted an aligned tx3 => tx3 is hot-chained + - a driver submitted an unaligned tx4 => tx4 is put in submitted queue, + not chained + - a driver issued tx4 => tx4 is put in issued queue, not chained + - a driver submitted an aligned tx5 => tx5 is put in submitted queue, not + chained + - a driver submitted an aligned tx6 => tx6 is put in submitted queue, + cold chained to tx5 + + This translates into (after tx4 is issued) : + - issued queue + +-----+ +-----+ +-----+ +-----+ + | tx1 | | tx2 | | tx3 | | tx4 | + +---|-+ ^---|-+ ^-----+ +-----+ + | | | | + +---+ +---+ + - submitted queue + +-----+ +-----+ + | tx5 | | tx6 | + +---|-+ ^-----+ + | | + +---+ + - completed queue : empty + - allocated queue : tx0 + + It should be noted that after tx3 is completed, the channel is stopped, and + restarted in "unaligned mode" to handle tx4. + +Author: Robert Jarzmik <[email protected]> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8bb54c95cece..afe7e2bbbc23 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1413,7 +1413,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. The list of supported hash algorithms is defined in crypto/hash_info.h. - ima_tcb [IMA] + ima_policy= [IMA] + The builtin measurement policy to load during IMA + setup. Specyfing "tcb" as the value, measures all + programs exec'd, files mmap'd for exec, and all files + opened with the read mode bit set by either the + effective uid (euid=0) or uid=0. + Format: "tcb" + + ima_tcb [IMA] Deprecated. Use ima_policy= instead. Load a policy which meets the needs of the Trusted Computing Base. This means IMA will measure all programs exec'd, files mmap'd for exec, and all files @@ -1421,7 +1429,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ima_template= [IMA] Select one of defined IMA measurements template formats. - Formats: { "ima" | "ima-ng" } + Formats: { "ima" | "ima-ng" | "ima-sig" } Default: "ima-ng" ima_template_fmt= diff --git a/Documentation/nvdimm/btt.txt b/Documentation/nvdimm/btt.txt new file mode 100644 index 000000000000..b91443f577dc --- /dev/null +++ b/Documentation/nvdimm/btt.txt @@ -0,0 +1,283 @@ +BTT - Block Translation Table +============================= + + +1. Introduction +--------------- + +Persistent memory based storage is able to perform IO at byte (or more +accurately, cache line) granularity. However, we often want to expose such +storage as traditional block devices. The block drivers for persistent memory +will do exactly this. However, they do not provide any atomicity guarantees. +Traditional SSDs typically provide protection against torn sectors in hardware, +using stored energy in capacitors to complete in-flight block writes, or perhaps +in firmware. We don't have this luxury with persistent memory - if a write is in +progress, and we experience a power failure, the block will contain a mix of old +and new data. Applications may not be prepared to handle such a scenario. + +The Block Translation Table (BTT) provides atomic sector update semantics for +persistent memory devices, so that applications that rely on sector writes not +being torn can continue to do so. The BTT manifests itself as a stacked block +device, and reserves a portion of the underlying storage for its metadata. At +the heart of it, is an indirection table that re-maps all the blocks on the +volume. It can be thought of as an extremely simple file system that only +provides atomic sector updates. + + +2. Static Layout +---------------- + +The underlying storage on which a BTT can be laid out is not limited in any way. +The BTT, however, splits the available space into chunks of up to 512 GiB, +called "Arenas". + +Each arena follows the same layout for its metadata, and all references in an +arena are internal to it (with the exception of one field that points to the +next arena). The following depicts the "On-disk" metadata layout: + + + Backing Store +-------> Arena ++---------------+ | +------------------+ +| | | | Arena info block | +| Arena 0 +---+ | 4K | +| 512G | +------------------+ +| | | | ++---------------+ | | +| | | | +| Arena 1 | | Data Blocks | +| 512G | | | +| | | | ++---------------+ | | +| . | | | +| . | | | +| . | | | +| | | | +| | | | ++---------------+ +------------------+ + | | + | BTT Map | + | | + | | + +------------------+ + | | + | BTT Flog | + | | + +------------------+ + | Info block copy | + | 4K | + +------------------+ + + +3. Theory of Operation +---------------------- + + +a. The BTT Map +-------------- + +The map is a simple lookup/indirection table that maps an LBA to an internal +block. Each map entry is 32 bits. The two most significant bits are special +flags, and the remaining form the internal block number. + +Bit Description +31 - 30 : Error and Zero flags - Used in the following way: + Bit Description + 31 30 + ----------------------------------------------------------------------- + 00 Initial state. Reads return zeroes; Premap = Postmap + 01 Zero state: Reads return zeroes + 10 Error state: Reads fail; Writes clear 'E' bit + 11 Normal Block – has valid postmap + + +29 - 0 : Mappings to internal 'postmap' blocks + + +Some of the terminology that will be subsequently used: + +External LBA : LBA as made visible to upper layers. +ABA : Arena Block Address - Block offset/number within an arena +Premap ABA : The block offset into an arena, which was decided upon by range + checking the External LBA +Postmap ABA : The block number in the "Data Blocks" area obtained after + indirection from the map +nfree : The number of free blocks that are maintained at any given time. + This is the number of concurrent writes that can happen to the + arena. + + +For example, after adding a BTT, we surface a disk of 1024G. We get a read for +the external LBA at 768G. This falls into the second arena, and of the 512G +worth of blocks that this arena contributes, this block is at 256G. Thus, the +premap ABA is 256G. We now refer to the map, and find out the mapping for block +'X' (256G) points to block 'Y', say '64'. Thus the postmap ABA is 64. + + +b. The BTT Flog +--------------- + +The BTT provides sector atomicity by making every write an "allocating write", +i.e. Every write goes to a "free" block. A running list of free blocks is +maintained in the form of the BTT flog. 'Flog' is a combination of the words +"free list" and "log". The flog contains 'nfree' entries, and an entry contains: + +lba : The premap ABA that is being written to +old_map : The old postmap ABA - after 'this' write completes, this will be a + free block. +new_map : The new postmap ABA. The map will up updated to reflect this + lba->postmap_aba mapping, but we log it here in case we have to + recover. +seq : Sequence number to mark which of the 2 sections of this flog entry is + valid/newest. It cycles between 01->10->11->01 (binary) under normal + operation, with 00 indicating an uninitialized state. +lba' : alternate lba entry +old_map': alternate old postmap entry +new_map': alternate new postmap entry +seq' : alternate sequence number. + +Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also +padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are +done such that for any entry being written, it: +a. overwrites the 'old' section in the entry based on sequence numbers +b. writes the 'new' section such that the sequence number is written last. + + +c. The concept of lanes +----------------------- + +While 'nfree' describes the number of concurrent IOs an arena can process +concurrently, 'nlanes' is the number of IOs the BTT device as a whole can +process. + nlanes = min(nfree, num_cpus) +A lane number is obtained at the start of any IO, and is used for indexing into +all the on-disk and in-memory data structures for the duration of the IO. If +there are more CPUs than the max number of available lanes, than lanes are +protected by spinlocks. + + +d. In-memory data structure: Read Tracking Table (RTT) +------------------------------------------------------ + +Consider a case where we have two threads, one doing reads and the other, +writes. We can hit a condition where the writer thread grabs a free block to do +a new IO, but the (slow) reader thread is still reading from it. In other words, +the reader consulted a map entry, and started reading the corresponding block. A +writer started writing to the same external LBA, and finished the write updating +the map for that external LBA to point to its new postmap ABA. At this point the +internal, postmap block that the reader is (still) reading has been inserted +into the list of free blocks. If another write comes in for the same LBA, it can +grab this free block, and start writing to it, causing the reader to read +incorrect data. To prevent this, we introduce the RTT. + +The RTT is a simple, per arena table with 'nfree' entries. Every reader inserts +into rtt[lane_number], the postmap ABA it is reading, and clears it after the +read is complete. Every writer thread, after grabbing a free block, checks the +RTT for its presence. If the postmap free block is in the RTT, it waits till the +reader clears the RTT entry, and only then starts writing to it. + + +e. In-memory data structure: map locks +-------------------------------------- + +Consider a case where two writer threads are writing to the same LBA. There can +be a race in the following sequence of steps: + +free[lane] = map[premap_aba] +map[premap_aba] = postmap_aba + +Both threads can update their respective free[lane] with the same old, freed +postmap_aba. This has made the layout inconsistent by losing a free entry, and +at the same time, duplicating another free entry for two lanes. + +To solve this, we could have a single map lock (per arena) that has to be taken +before performing the above sequence, but we feel that could be too contentious. +Instead we use an array of (nfree) map_locks that is indexed by +(premap_aba modulo nfree). + + +f. Reconstruction from the Flog +------------------------------- + +On startup, we analyze the BTT flog to create our list of free blocks. We walk +through all the entries, and for each lane, of the set of two possible +'sections', we always look at the most recent one only (based on the sequence +number). The reconstruction rules/steps are simple: +- Read map[log_entry.lba]. +- If log_entry.new matches the map entry, then log_entry.old is free. +- If log_entry.new does not match the map entry, then log_entry.new is free. + (This case can only be caused by power-fails/unsafe shutdowns) + + +g. Summarizing - Read and Write flows +------------------------------------- + +Read: + +1. Convert external LBA to arena number + pre-map ABA +2. Get a lane (and take lane_lock) +3. Read map to get the entry for this pre-map ABA +4. Enter post-map ABA into RTT[lane] +5. If TRIM flag set in map, return zeroes, and end IO (go to step 8) +6. If ERROR flag set in map, end IO with EIO (go to step 8) +7. Read data from this block +8. Remove post-map ABA entry from RTT[lane] +9. Release lane (and lane_lock) + +Write: + +1. Convert external LBA to Arena number + pre-map ABA +2. Get a lane (and take lane_lock) +3. Use lane to index into in-memory free list and obtain a new block, next flog + index, next sequence number +4. Scan the RTT to check if free block is present, and spin/wait if it is. +5. Write data to this free block +6. Read map to get the existing post-map ABA entry for this pre-map ABA +7. Write flog entry: [premap_aba / old postmap_aba / new postmap_aba / seq_num] +8. Write new post-map ABA into map. +9. Write old post-map entry into the free list +10. Calculate next sequence number and write into the free list entry +11. Release lane (and lane_lock) + + +4. Error Handling +================= + +An arena would be in an error state if any of the metadata is corrupted +irrecoverably, either due to a bug or a media error. The following conditions +indicate an error: +- Info block checksum does not match (and recovering from the copy also fails) +- All internal available blocks are not uniquely and entirely addressed by the + sum of mapped blocks and free blocks (from the BTT flog). +- Rebuilding free list from the flog reveals missing/duplicate/impossible + entries +- A map entry is out of bounds + +If any of these error conditions are encountered, the arena is put into a read +only state using a flag in the info block. + + +5. In-kernel usage +================== + +Any block driver that supports byte granularity IO to the storage may register +with the BTT. It will have to provide the rw_bytes interface in its +block_device_operations struct: + + int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw); + +It may register with the BTT after it adds its own gendisk, using btt_init: + + struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize, + u32 lbasize, u8 uuid[], int maxlane); + +note that maxlane is the maximum amount of concurrency the driver wishes to +allow the BTT to use. + +The BTT 'disk' appears as a stacked block device that grabs the underlying block +device in the O_EXCL mode. + +When the driver wishes to remove the backing disk, it should similarly call +btt_fini using the same struct btt* handle that was provided to it by btt_init. + + void btt_fini(struct btt *btt); + diff --git a/Documentation/nvdimm/nvdimm.txt b/Documentation/nvdimm/nvdimm.txt new file mode 100644 index 000000000000..197a0b6b0582 --- /dev/null +++ b/Documentation/nvdimm/nvdimm.txt @@ -0,0 +1,808 @@ + LIBNVDIMM: Non-Volatile Devices + libnvdimm - kernel / libndctl - userspace helper library + v13 + + + Glossary + Overview + Supporting Documents + Git Trees + LIBNVDIMM PMEM and BLK + Why BLK? + PMEM vs BLK + BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX + Example NVDIMM Platform + LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API + LIBNDCTL: Context + libndctl: instantiate a new library context example + LIBNVDIMM/LIBNDCTL: Bus + libnvdimm: control class device in /sys/class + libnvdimm: bus + libndctl: bus enumeration example + LIBNVDIMM/LIBNDCTL: DIMM (NMEM) + libnvdimm: DIMM (NMEM) + libndctl: DIMM enumeration example + LIBNVDIMM/LIBNDCTL: Region + libnvdimm: region + libndctl: region enumeration example + Why Not Encode the Region Type into the Region Name? + How Do I Determine the Major Type of a Region? + LIBNVDIMM/LIBNDCTL: Namespace + libnvdimm: namespace + libndctl: namespace enumeration example + libndctl: namespace creation example + Why the Term "namespace"? + LIBNVDIMM/LIBNDCTL: Block Translation Table "btt" + libnvdimm: btt layout + libndctl: btt creation example + Summary LIBNDCTL Diagram + + +Glossary +-------- + +PMEM: A system-physical-address range where writes are persistent. A +block device composed of PMEM is capable of DAX. A PMEM address range +may span an interleave of several DIMMs. + +BLK: A set of one or more programmable memory mapped apertures provided +by a DIMM to access its media. This indirection precludes the +performance benefit of interleaving, but enables DIMM-bounded failure +modes. + +DPA: DIMM Physical Address, is a DIMM-relative offset. With one DIMM in +the system there would be a 1:1 system-physical-address:DPA association. +Once more DIMMs are added a memory controller interleave must be +decoded to determine the DPA associated with a given +system-physical-address. BLK capacity always has a 1:1 relationship +with a single-DIMM's DPA range. + +DAX: File system extensions to bypass the page cache and block layer to +mmap persistent memory, from a PMEM block device, directly into a +process address space. + +BTT: Block Translation Table: Persistent memory is byte addressable. +Existing software may have an expectation that the power-fail-atomicity +of writes is at least one sector, 512 bytes. The BTT is an indirection +table with atomic update semantics to front a PMEM/BLK block device +driver and present arbitrary atomic sector sizes. + +LABEL: Metadata stored on a DIMM device that partitions and identifies +(persistently names) storage between PMEM and BLK. It also partitions +BLK storage to host BTTs with different parameters per BLK-partition. +Note that traditional partition tables, GPT/MBR, are layered on top of a +BLK or PMEM device. + + +Overview +-------- + +The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely, +PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM +and BLK mode access. These three modes of operation are described by +the "NVDIMM Firmware Interface Table" (NFIT) in ACPI 6. While the LIBNVDIMM +implementation is generic and supports pre-NFIT platforms, it was guided +by the superset of capabilities need to support this ACPI 6 definition +for NVDIMM resources. The bulk of the kernel implementation is in place +to handle the case where DPA accessible via PMEM is aliased with DPA +accessible via BLK. When that occurs a LABEL is needed to reserve DPA +for exclusive access via one mode a time. + +Supporting Documents +ACPI 6: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf +NVDIMM Namespace: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf +DSM Interface Example: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf +Driver Writer's Guide: http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf + +Git Trees +LIBNVDIMM: https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git +LIBNDCTL: https://github.com/pmem/ndctl.git +PMEM: https://github.com/01org/prd + + +LIBNVDIMM PMEM and BLK +------------------ + +Prior to the arrival of the NFIT, non-volatile memory was described to a +system in various ad-hoc ways. Usually only the bare minimum was +provided, namely, a single system-physical-address range where writes +are expected to be durable after a system power loss. Now, the NFIT +specification standardizes not only the description of PMEM, but also +BLK and platform message-passing entry points for control and +configuration. + +For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block +device driver: + + 1. PMEM (nd_pmem.ko): Drives a system-physical-address range. This + range is contiguous in system memory and may be interleaved (hardware + memory controller striped) across multiple DIMMs. When interleaved the + platform may optionally provide details of which DIMMs are participating + in the interleave. + + Note that while LIBNVDIMM describes system-physical-address ranges that may + alias with BLK access as ND_NAMESPACE_PMEM ranges and those without + alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no + distinction. The different device-types are an implementation detail + that userspace can exploit to implement policies like "only interface + with address ranges from certain DIMMs". It is worth noting that when + aliasing is present and a DIMM lacks a label, then no block device can + be created by default as userspace needs to do at least one allocation + of DPA to the PMEM range. In contrast ND_NAMESPACE_IO ranges, once + registered, can be immediately attached to nd_pmem. + + 2. BLK (nd_blk.ko): This driver performs I/O using a set of platform + defined apertures. A set of apertures will all access just one DIMM. + Multiple windows allow multiple concurrent accesses, much like + tagged-command-queuing, and would likely be used by different threads or + different CPUs. + + The NFIT specification defines a standard format for a BLK-aperture, but + the spec also allows for vendor specific layouts, and non-NFIT BLK + implementations may other designs for BLK I/O. For this reason "nd_blk" + calls back into platform-specific code to perform the I/O. One such + implementation is defined in the "Driver Writer's Guide" and "DSM + Interface Example". + + +Why BLK? +-------- + +While PMEM provides direct byte-addressable CPU-load/store access to +NVDIMM storage, it does not provide the best system RAS (recovery, +availability, and serviceability) model. An access to a corrupted +system-physical-address address causes a cpu exception while an access +to a corrupted address through an BLK-aperture causes that block window +to raise an error status in a register. The latter is more aligned with +the standard error model that host-bus-adapter attached disks present. +Also, if an administrator ever wants to replace a memory it is easier to +service a system at DIMM module boundaries. Compare this to PMEM where +data could be interleaved in an opaque hardware specific manner across +several DIMMs. + +PMEM vs BLK +BLK-apertures solve this RAS problem, but their presence is also the +major contributing factor to the complexity of the ND subsystem. They +complicate the implementation because PMEM and BLK alias in DPA space. +Any given DIMM's DPA-range may contribute to one or more +system-physical-address sets of interleaved DIMMs, *and* may also be +accessed in its entirety through its BLK-aperture. Accessing a DPA +through a system-physical-address while simultaneously accessing the +same DPA through a BLK-aperture has undefined results. For this reason, +DIMMs with this dual interface configuration include a DSM function to +store/retrieve a LABEL. The LABEL effectively partitions the DPA-space +into exclusive system-physical-address and BLK-aperture accessible +regions. For simplicity a DIMM is allowed a PMEM "region" per each +interleave set in which it is a member. The remaining DPA space can be +carved into an arbitrary number of BLK devices with discontiguous +extents. + +BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX +-------------------------------------------------- + +One of the few +reasons to allow multiple BLK namespaces per REGION is so that each +BLK-namespace can be configured with a BTT with unique atomic sector +sizes. While a PMEM device can host a BTT the LABEL specification does +not provide for a sector size to be specified for a PMEM namespace. +This is due to the expectation that the primary usage model for PMEM is +via DAX, and the BTT is incompatible with DAX. However, for the cases +where an application or filesystem still needs atomic sector update +guarantees it can register a BTT on a PMEM device or partition. See +LIBNVDIMM/NDCTL: Block Translation Table "btt" + + +Example NVDIMM Platform +----------------------- + +For the remainder of this document the following diagram will be +referenced for any example sysfs layouts. + + + (a) (b) DIMM BLK-REGION + +-------------------+--------+--------+--------+ ++------+ | pm0.0 | blk2.0 | pm1.0 | blk2.1 | 0 region2 +| imc0 +--+- - - region0- - - +--------+ +--------+ ++--+---+ | pm0.0 | blk3.0 | pm1.0 | blk3.1 | 1 region3 + | +-------------------+--------v v--------+ ++--+---+ | | +| cpu0 | region1 ++--+---+ | | + | +----------------------------^ ^--------+ ++--+---+ | blk4.0 | pm1.0 | blk4.0 | 2 region4 +| imc1 +--+----------------------------| +--------+ ++------+ | blk5.0 | pm1.0 | blk5.0 | 3 region5 + +----------------------------+--------+--------+ + +In this platform we have four DIMMs and two memory controllers in one +socket. Each unique interface (BLK or PMEM) to DPA space is identified +by a region device with a dynamically assigned id (REGION0 - REGION5). + + 1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A + single PMEM namespace is created in the REGION0-SPA-range that spans + DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that + interleaved system-physical-address range is reclaimed as BLK-aperture + accessed space starting at DPA-offset (a) into each DIMM. In that + reclaimed space we create two BLK-aperture "namespaces" from REGION2 and + REGION3 where "blk2.0" and "blk3.0" are just human readable names that + could be set to any user-desired name in the LABEL. + + 2. In the last portion of DIMM0 and DIMM1 we have an interleaved + system-physical-address range, REGION1, that spans those two DIMMs as + well as DIMM2 and DIMM3. Some of REGION1 allocated to a PMEM namespace + named "pm1.0" the rest is reclaimed in 4 BLK-aperture namespaces (for + each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and + "blk5.0". + + 3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1 + interleaved system-physical-address range (i.e. the DPA address below + offset (b) are also included in the "blk4.0" and "blk5.0" namespaces. + Note, that this example shows that BLK-aperture namespaces don't need to + be contiguous in DPA-space. + + This bus is provided by the kernel under the device + /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and + the nfit_test.ko module is loaded. This not only test LIBNVDIMM but the + acpi_nfit.ko driver as well. + + +LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API +---------------------------------------------------- + +What follows is a description of the LIBNVDIMM sysfs layout and a +corresponding object hierarchy diagram as viewed through the LIBNDCTL +api. The example sysfs paths and diagrams are relative to the Example +NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit +test. + +LIBNDCTL: Context +Every api call in the LIBNDCTL library requires a context that holds the +logging parameters and other library instance state. The library is +based on the libabc template: +https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git/ + +LIBNDCTL: instantiate a new library context example + + struct ndctl_ctx *ctx; + + if (ndctl_new(&ctx) == 0) + return ctx; + else + return NULL; + +LIBNVDIMM/LIBNDCTL: Bus +------------------- + +A bus has a 1:1 relationship with an NFIT. The current expectation for +ACPI based systems is that there is only ever one platform-global NFIT. +That said, it is trivial to register multiple NFITs, the specification +does not preclude it. The infrastructure supports multiple busses and +we we use this capability to test multiple NFIT configurations in the +unit test. + +LIBNVDIMM: control class device in /sys/class + +This character device accepts DSM messages to be passed to DIMM +identified by its NFIT handle. + + /sys/class/nd/ndctl0 + |-- dev + |-- device -> ../../../ndbus0 + |-- subsystem -> ../../../../../../../class/nd + + + +LIBNVDIMM: bus + + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nfit_desc); + + /sys/devices/platform/nfit_test.0/ndbus0 + |-- commands + |-- nd + |-- nfit + |-- nmem0 + |-- nmem1 + |-- nmem2 + |-- nmem3 + |-- power + |-- provider + |-- region0 + |-- region1 + |-- region2 + |-- region3 + |-- region4 + |-- region5 + |-- uevent + `-- wait_probe + +LIBNDCTL: bus enumeration example +Find the bus handle that describes the bus from Example NVDIMM Platform + + static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx, + const char *provider) + { + struct ndctl_bus *bus; + + ndctl_bus_foreach(ctx, bus) + if (strcmp(provider, ndctl_bus_get_provider(bus)) == 0) + return bus; + + return NULL; + } + + bus = get_bus_by_provider(ctx, "nfit_test.0"); + + +LIBNVDIMM/LIBNDCTL: DIMM (NMEM) +--------------------------- + +The DIMM device provides a character device for sending commands to +hardware, and it is a container for LABELs. If the DIMM is defined by +NFIT then an optional 'nfit' attribute sub-directory is available to add +NFIT-specifics. + +Note that the kernel device name for "DIMMs" is "nmemX". The NFIT +describes these devices via "Memory Device to System Physical Address +Range Mapping Structure", and there is no requirement that they actually +be physical DIMMs, so we use a more generic name. + +LIBNVDIMM: DIMM (NMEM) + + struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, + const struct attribute_group **groups, unsigned long flags, + unsigned long *dsm_mask); + + /sys/devices/platform/nfit_test.0/ndbus0 + |-- nmem0 + | |-- available_slots + | |-- commands + | |-- dev + | |-- devtype + | |-- driver -> ../../../../../bus/nd/drivers/nvdimm + | |-- modalias + | |-- nfit + | | |-- device + | | |-- format + | | |-- handle + | | |-- phys_id + | | |-- rev_id + | | |-- serial + | | `-- vendor + | |-- state + | |-- subsystem -> ../../../../../bus/nd + | `-- uevent + |-- nmem1 + [..] + + +LIBNDCTL: DIMM enumeration example + +Note, in this example we are assuming NFIT-defined DIMMs which are +identified by an "nfit_handle" a 32-bit value where: +Bit 3:0 DIMM number within the memory channel +Bit 7:4 memory channel number +Bit 11:8 memory controller ID +Bit 15:12 socket ID (within scope of a Node controller if node controller is present) +Bit 27:16 Node Controller ID +Bit 31:28 Reserved + + static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus, + unsigned int handle) + { + struct ndctl_dimm *dimm; + + ndctl_dimm_foreach(bus, dimm) + if (ndctl_dimm_get_handle(dimm) == handle) + return dimm; + + return NULL; + } + + #define DIMM_HANDLE(n, s, i, c, d) \ + (((n & 0xfff) << 16) | ((s & 0xf) << 12) | ((i & 0xf) << 8) \ + | ((c & 0xf) << 4) | (d & 0xf)) + + dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0)); + +LIBNVDIMM/LIBNDCTL: Region +---------------------- + +A generic REGION device is registered for each PMEM range orBLK-aperture +set. Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture +sets on the "nfit_test.0" bus. The primary role of regions are to be a +container of "mappings". A mapping is a tuple of <DIMM, +DPA-start-offset, length>. + +LIBNVDIMM provides a built-in driver for these REGION devices. This driver +is responsible for reconciling the aliased DPA mappings across all +regions, parsing the LABEL, if present, and then emitting NAMESPACE +devices with the resolved/exclusive DPA-boundaries for the nd_pmem or +nd_blk device driver to consume. + +In addition to the generic attributes of "mapping"s, "interleave_ways" +and "size" the REGION device also exports some convenience attributes. +"nstype" indicates the integer type of namespace-device this region +emits, "devtype" duplicates the DEVTYPE variable stored by udev at the +'add' event, "modalias" duplicates the MODALIAS variable stored by udev +at the 'add' event, and finally, the optional "spa_index" is provided in +the case where the region is defined by a SPA. + +LIBNVDIMM: region + + struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); + struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); + + /sys/devices/platform/nfit_test.0/ndbus0 + |-- region0 + | |-- available_size + | |-- btt0 + | |-- btt_seed + | |-- devtype + | |-- driver -> ../../../../../bus/nd/drivers/nd_region + | |-- init_namespaces + | |-- mapping0 + | |-- mapping1 + | |-- mappings + | |-- modalias + | |-- namespace0.0 + | |-- namespace_seed + | |-- numa_node + | |-- nfit + | | `-- spa_index + | |-- nstype + | |-- set_cookie + | |-- size + | |-- subsystem -> ../../../../../bus/nd + | `-- uevent + |-- region1 + [..] + +LIBNDCTL: region enumeration example + +Sample region retrieval routines based on NFIT-unique data like +"spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for +BLK. + + static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus, + unsigned int spa_index) + { + struct ndctl_region *region; + + ndctl_region_foreach(bus, region) { + if (ndctl_region_get_type(region) != ND_DEVICE_REGION_PMEM) + continue; + if (ndctl_region_get_spa_index(region) == spa_index) + return region; + } + return NULL; + } + + static struct ndctl_region *get_blk_region_by_dimm_handle(struct ndctl_bus *bus, + unsigned int handle) + { + struct ndctl_region *region; + + ndctl_region_foreach(bus, region) { + struct ndctl_mapping *map; + + if (ndctl_region_get_type(region) != ND_DEVICE_REGION_BLOCK) + continue; + ndctl_mapping_foreach(region, map) { + struct ndctl_dimm *dimm = ndctl_mapping_get_dimm(map); + + if (ndctl_dimm_get_handle(dimm) == handle) + return region; + } + } + return NULL; + } + + +Why Not Encode the Region Type into the Region Name? +---------------------------------------------------- + +At first glance it seems since NFIT defines just PMEM and BLK interface +types that we should simply name REGION devices with something derived +from those type names. However, the ND subsystem explicitly keeps the +REGION name generic and expects userspace to always consider the +region-attributes for 4 reasons: + + 1. There are already more than two REGION and "namespace" types. For + PMEM there are two subtypes. As mentioned previously we have PMEM where + the constituent DIMM devices are known and anonymous PMEM. For BLK + regions the NFIT specification already anticipates vendor specific + implementations. The exact distinction of what a region contains is in + the region-attributes not the region-name or the region-devtype. + + 2. A region with zero child-namespaces is a possible configuration. For + example, the NFIT allows for a DCR to be published without a + corresponding BLK-aperture. This equates to a DIMM that can only accept + control/configuration messages, but no i/o through a descendant block + device. Again, this "type" is advertised in the attributes ('mappings' + == 0) and the name does not tell you much. + + 3. What if a third major interface type arises in the future? Outside + of vendor specific implementations, it's not difficult to envision a + third class of interface type beyond BLK and PMEM. With a generic name + for the REGION level of the device-hierarchy old userspace + implementations can still make sense of new kernel advertised + region-types. Userspace can always rely on the generic region + attributes like "mappings", "size", etc and the expected child devices + named "namespace". This generic format of the device-model hierarchy + allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and + future-proof. + + 4. There are more robust mechanisms for determining the major type of a + region than a device name. See the next section, How Do I Determine the + Major Type of a Region? + +How Do I Determine the Major Type of a Region? +---------------------------------------------- + +Outside of the blanket recommendation of "use libndctl", or simply +looking at the kernel header (/usr/include/linux/ndctl.h) to decode the +"nstype" integer attribute, here are some other options. + + 1. module alias lookup: + + The whole point of region/namespace device type differentiation is to + decide which block-device driver will attach to a given LIBNVDIMM namespace. + One can simply use the modalias to lookup the resulting module. It's + important to note that this method is robust in the presence of a + vendor-specific driver down the road. If a vendor-specific + implementation wants to supplant the standard nd_blk driver it can with + minimal impact to the rest of LIBNVDIMM. + + In fact, a vendor may also want to have a vendor-specific region-driver + (outside of nd_region). For example, if a vendor defined its own LABEL + format it would need its own region driver to parse that LABEL and emit + the resulting namespaces. The output from module resolution is more + accurate than a region-name or region-devtype. + + 2. udev: + + The kernel "devtype" is registered in the udev database + # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0 + P: /devices/platform/nfit_test.0/ndbus0/region0 + E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0 + E: DEVTYPE=nd_pmem + E: MODALIAS=nd:t2 + E: SUBSYSTEM=nd + + # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4 + P: /devices/platform/nfit_test.0/ndbus0/region4 + E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4 + E: DEVTYPE=nd_blk + E: MODALIAS=nd:t3 + E: SUBSYSTEM=nd + + ...and is available as a region attribute, but keep in mind that the + "devtype" does not indicate sub-type variations and scripts should + really be understanding the other attributes. + + 3. type specific attributes: + + As it currently stands a BLK-aperture region will never have a + "nfit/spa_index" attribute, but neither will a non-NFIT PMEM region. A + BLK region with a "mappings" value of 0 is, as mentioned above, a DIMM + that does not allow I/O. A PMEM region with a "mappings" value of zero + is a simple system-physical-address range. + + +LIBNVDIMM/LIBNDCTL: Namespace +------------------------- + +A REGION, after resolving DPA aliasing and LABEL specified boundaries, +surfaces one or more "namespace" devices. The arrival of a "namespace" +device currently triggers either the nd_blk or nd_pmem driver to load +and register a disk/block device. + +LIBNVDIMM: namespace +Here is a sample layout from the three major types of NAMESPACE where +namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid' +attribute), namespace2.0 represents a BLK namespace (note it has a +'sector_size' attribute) that, and namespace6.0 represents an anonymous +PMEM namespace (note that has no 'uuid' attribute due to not support a +LABEL). + + /sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0 + |-- alt_name + |-- devtype + |-- dpa_extents + |-- force_raw + |-- modalias + |-- numa_node + |-- resource + |-- size + |-- subsystem -> ../../../../../../bus/nd + |-- type + |-- uevent + `-- uuid + /sys/devices/platform/nfit_test.0/ndbus0/region2/namespace2.0 + |-- alt_name + |-- devtype + |-- dpa_extents + |-- force_raw + |-- modalias + |-- numa_node + |-- sector_size + |-- size + |-- subsystem -> ../../../../../../bus/nd + |-- type + |-- uevent + `-- uuid + /sys/devices/platform/nfit_test.1/ndbus1/region6/namespace6.0 + |-- block + | `-- pmem0 + |-- devtype + |-- driver -> ../../../../../../bus/nd/drivers/pmem + |-- force_raw + |-- modalias + |-- numa_node + |-- resource + |-- size + |-- subsystem -> ../../../../../../bus/nd + |-- type + `-- uevent + +LIBNDCTL: namespace enumeration example +Namespaces are indexed relative to their parent region, example below. +These indexes are mostly static from boot to boot, but subsystem makes +no guarantees in this regard. For a static namespace identifier use its +'uuid' attribute. + +static struct ndctl_namespace *get_namespace_by_id(struct ndctl_region *region, + unsigned int id) +{ + struct ndctl_namespace *ndns; + + ndctl_namespace_foreach(region, ndns) + if (ndctl_namespace_get_id(ndns) == id) + return ndns; + + return NULL; +} + +LIBNDCTL: namespace creation example +Idle namespaces are automatically created by the kernel if a given +region has enough available capacity to create a new namespace. +Namespace instantiation involves finding an idle namespace and +configuring it. For the most part the setting of namespace attributes +can occur in any order, the only constraint is that 'uuid' must be set +before 'size'. This enables the kernel to track DPA allocations +internally with a static identifier. + +static int configure_namespace(struct ndctl_region *region, + struct ndctl_namespace *ndns, + struct namespace_parameters *parameters) +{ + char devname[50]; + + snprintf(devname, sizeof(devname), "namespace%d.%d", + ndctl_region_get_id(region), paramaters->id); + + ndctl_namespace_set_alt_name(ndns, devname); + /* 'uuid' must be set prior to setting size! */ + ndctl_namespace_set_uuid(ndns, paramaters->uuid); + ndctl_namespace_set_size(ndns, paramaters->size); + /* unlike pmem namespaces, blk namespaces have a sector size */ + if (parameters->lbasize) + ndctl_namespace_set_sector_size(ndns, parameters->lbasize); + ndctl_namespace_enable(ndns); +} + + +Why the Term "namespace"? + + 1. Why not "volume" for instance? "volume" ran the risk of confusing ND + as a volume manager like device-mapper. + + 2. The term originated to describe the sub-devices that can be created + within a NVME controller (see the nvme specification: + http://www.nvmexpress.org/specifications/), and NFIT namespaces are + meant to parallel the capabilities and configurability of + NVME-namespaces. + + +LIBNVDIMM/LIBNDCTL: Block Translation Table "btt" +--------------------------------------------- + +A BTT (design document: http://pmem.io/2014/09/23/btt.html) is a stacked +block device driver that fronts either the whole block device or a +partition of a block device emitted by either a PMEM or BLK NAMESPACE. + +LIBNVDIMM: btt layout +Every region will start out with at least one BTT device which is the +seed device. To activate it set the "namespace", "uuid", and +"sector_size" attributes and then bind the device to the nd_pmem or +nd_blk driver depending on the region type. + + /sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/ + |-- namespace + |-- delete + |-- devtype + |-- modalias + |-- numa_node + |-- sector_size + |-- subsystem -> ../../../../../bus/nd + |-- uevent + `-- uuid + +LIBNDCTL: btt creation example +Similar to namespaces an idle BTT device is automatically created per +region. Each time this "seed" btt device is configured and enabled a new +seed is created. Creating a BTT configuration involves two steps of +finding and idle BTT and assigning it to consume a PMEM or BLK namespace. + + static struct ndctl_btt *get_idle_btt(struct ndctl_region *region) + { + struct ndctl_btt *btt; + + ndctl_btt_foreach(region, btt) + if (!ndctl_btt_is_enabled(btt) + && !ndctl_btt_is_configured(btt)) + return btt; + + return NULL; + } + + static int configure_btt(struct ndctl_region *region, + struct btt_parameters *parameters) + { + btt = get_idle_btt(region); + + ndctl_btt_set_uuid(btt, parameters->uuid); + ndctl_btt_set_sector_size(btt, parameters->sector_size); + ndctl_btt_set_namespace(btt, parameters->ndns); + /* turn off raw mode device */ + ndctl_namespace_disable(parameters->ndns); + /* turn on btt access */ + ndctl_btt_enable(btt); + } + +Once instantiated a new inactive btt seed device will appear underneath +the region. + +Once a "namespace" is removed from a BTT that instance of the BTT device +will be deleted or otherwise reset to default values. This deletion is +only at the device model level. In order to destroy a BTT the "info +block" needs to be destroyed. Note, that to destroy a BTT the media +needs to be written in raw mode. By default, the kernel will autodetect +the presence of a BTT and disable raw mode. This autodetect behavior +can be suppressed by enabling raw mode for the namespace via the +ndctl_namespace_set_raw_mode() api. + + +Summary LIBNDCTL Diagram +------------------------ + +For the given example above, here is the view of the objects as seen by the LIBNDCTL api: + +---+ + |CTX| +---------+ +--------------+ +---------------+ + +-+-+ +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" | + | | +---------+ +--------------+ +---------------+ ++-------+ | | +---------+ +--------------+ +---------------+ +| DIMM0 <-+ | +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" | ++-------+ | | | +---------+ +--------------+ +---------------+ +| DIMM1 <-+ +-v--+ | +---------+ +--------------+ +---------------+ ++-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6 "blk2.0" | +| DIMM2 <-+ +----+ | +---------+ | +--------------+ +----------------------+ ++-------+ | | +-> NAMESPACE2.1 +--> ND5 "blk2.1" | BTT2 | +| DIMM3 <-+ | +--------------+ +----------------------+ ++-------+ | +---------+ +--------------+ +---------------+ + +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4 "blk3.0" | + | +---------+ | +--------------+ +----------------------+ + | +-> NAMESPACE3.1 +--> ND3 "blk3.1" | BTT1 | + | +--------------+ +----------------------+ + | +---------+ +--------------+ +---------------+ + +-> REGION4 +---> NAMESPACE4.0 +--> ND2 "blk4.0" | + | +---------+ +--------------+ +---------------+ + | +---------+ +--------------+ +----------------------+ + +-> REGION5 +---> NAMESPACE5.0 +--> ND1 "blk5.0" | BTT0 | + +---------+ +--------------+ +---------------+------+ + + diff --git a/Documentation/security/Smack.txt b/Documentation/security/Smack.txt index abc82f85215b..de5e1aeca7fb 100644 --- a/Documentation/security/Smack.txt +++ b/Documentation/security/Smack.txt @@ -206,11 +206,11 @@ netlabel label. The format accepted on write is: "%d.%d.%d.%d label" or "%d.%d.%d.%d/%d label". onlycap - This contains the label processes must have for CAP_MAC_ADMIN + This contains labels processes must have for CAP_MAC_ADMIN and CAP_MAC_OVERRIDE to be effective. If this file is empty these capabilities are effective at for processes with any - label. The value is set by writing the desired label to the - file or cleared by writing "-" to the file. + label. The values are set by writing the desired labels, separated + by spaces, to the file or cleared by writing "-" to the file. ptrace This is used to define the current ptrace policy 0 - default: this is the policy that relies on Smack access rules. diff --git a/MAINTAINERS b/MAINTAINERS index c54a67434048..0e6b09150aad 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2229,6 +2229,14 @@ F: arch/mips/bcm3384/* F: arch/mips/include/asm/mach-bcm3384/* F: arch/mips/kernel/*bmips* +BROADCOM BCM47XX MIPS ARCHITECTURE +M: Hauke Mehrtens <[email protected]> +M: RafaÅ‚ MiÅ‚ecki <[email protected]> +S: Maintained +F: arch/mips/bcm47xx/* +F: arch/mips/include/asm/mach-bcm47xx/* + BROADCOM BCM5301X ARM ARCHITECTURE M: Hauke Mehrtens <[email protected]> @@ -2333,6 +2341,12 @@ S: Supported F: drivers/gpio/gpio-bcm-kona.c F: Documentation/devicetree/bindings/gpio/gpio-bcm-kona.txt +BROADCOM NVRAM DRIVER +M: RafaÅ‚ MiÅ‚ecki <[email protected]> +S: Maintained +F: drivers/firmware/broadcom/* + BROADCOM STB NAND FLASH DRIVER M: Brian Norris <[email protected]> @@ -6088,6 +6102,39 @@ M: Sasha Levin <[email protected]> S: Maintained F: tools/lib/lockdep/ +LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM +M: Dan Williams <[email protected]> +Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ +S: Supported +F: drivers/nvdimm/* +F: include/linux/nd.h +F: include/linux/libnvdimm.h +F: include/uapi/linux/ndctl.h + +LIBNVDIMM BLK: MMIO-APERTURE DRIVER +M: Ross Zwisler <[email protected]> +Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ +S: Supported +F: drivers/nvdimm/blk.c +F: drivers/nvdimm/region_devs.c +F: drivers/acpi/nfit* + +LIBNVDIMM BTT: BLOCK TRANSLATION TABLE +M: Vishal Verma <[email protected]> +Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ +S: Supported +F: drivers/nvdimm/btt* + +LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER +M: Ross Zwisler <[email protected]> +Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ +S: Supported +F: drivers/nvdimm/pmem.c + LINUX FOR IBM pSERIES (RS/6000) M: Paul Mackerras <[email protected]> W: http://www.ibm.com/linux/ltc/projects/ppc @@ -8160,6 +8207,7 @@ T: git git://github.com/hzhuang1/linux.git T: git git://github.com/rjarzmik/linux.git S: Maintained F: arch/arm/mach-pxa/ +F: drivers/dma/pxa* F: drivers/pcmcia/pxa2xx* F: drivers/spi/spi-pxa2xx* F: drivers/usb/gadget/udc/pxa2* @@ -8348,12 +8396,6 @@ S: Maintained F: Documentation/blockdev/ramdisk.txt F: drivers/block/brd.c -PERSISTENT MEMORY DRIVER -M: Ross Zwisler <[email protected]> -S: Supported -F: drivers/block/pmem.c - RANDOM NUMBER DRIVER M: "Theodore Ts'o" <[email protected]> S: Maintained @@ -8972,6 +9014,7 @@ S: Supported F: kernel/seccomp.c F: include/uapi/linux/seccomp.h F: include/linux/seccomp.h +F: tools/testing/selftests/seccomp/* K: \bsecure_computing K: \bTIF_SECCOMP\b @@ -10375,11 +10418,15 @@ S: Maintained F: Documentation/filesystems/ubifs.txt F: fs/ubifs/ -UCLINUX (AND M68KNOMMU) +UCLINUX (M68KNOMMU AND COLDFIRE) M: Greg Ungerer <[email protected]> W: http://www.uclinux.org/ L: [email protected] (subscribers-only) +T: git git://git.kernel.org/pub/scm/linux/kernel/git/gerg/m68knommu.git S: Maintained +F: arch/m68k/coldfire/ +F: arch/m68k/68*/ F: arch/m68k/*/*_no.* F: arch/m68k/include/asm/*_no.* @@ -10772,6 +10819,12 @@ F: drivers/vfio/ F: include/linux/vfio.h F: include/uapi/linux/vfio.h +VFIO PLATFORM DRIVER +M: Baptiste Reynal <[email protected]> +S: Maintained +F: drivers/vfio/platform/ + VIDEOBUF2 FRAMEWORK M: Pawel Osciak <[email protected]> M: Marek Szyprowski <[email protected]> diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ab21e0d58278..9d4aa18f2a82 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -158,6 +158,7 @@ static __init int is_reserve_region(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_CODE: case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: + case EFI_PERSISTENT_MEMORY: return 0; default: break; diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 47e962f7ed5a..caae3f4e4341 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1222,6 +1222,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, flags |= IORESOURCE_DISABLED; break; + case EFI_PERSISTENT_MEMORY: + name = "Persistent Memory"; + break; + case EFI_RESERVED_TYPE: case EFI_RUNTIME_SERVICES_CODE: case EFI_RUNTIME_SERVICES_DATA: diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 5b7791dd3965..096731049538 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -96,3 +96,6 @@ EXPORT_SYMBOL(ia64_ivt); /* mcount is defined in assembly */ EXPORT_SYMBOL(_mcount); #endif + +#include <asm/cacheflush.h> +EXPORT_SYMBOL_GPL(flush_icache_range); diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index dd5801eb4c69..2889412e03eb 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -2117,8 +2117,7 @@ ia64_mca_late_init(void) register_hotcpu_notifier(&mca_cpu_notifier); /* Setup the CMCI/P vector and handler */ - init_timer(&cmc_poll_timer); - cmc_poll_timer.function = ia64_mca_cmc_poll; + setup_timer(&cmc_poll_timer, ia64_mca_cmc_poll, 0UL); /* Unmask/enable the vector */ cmc_polling_enabled = 0; @@ -2129,8 +2128,7 @@ ia64_mca_late_init(void) #ifdef CONFIG_ACPI /* Setup the CPEI/P vector and handler */ cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); - init_timer(&cpe_poll_timer); - cpe_poll_timer.function = ia64_mca_cpe_poll; + setup_timer(&cpe_poll_timer, ia64_mca_cpe_poll, 0UL); { unsigned int irq; diff --git a/arch/m68k/68000/m68EZ328.c b/arch/m68k/68000/m68EZ328.c index 21952906e9e2..e6ab321f93f8 100644 --- a/arch/m68k/68000/m68EZ328.c +++ b/arch/m68k/68000/m68EZ328.c @@ -62,8 +62,7 @@ void __init config_BSP(char *command, int len) #ifdef CONFIG_UCSIMM printk(KERN_INFO "uCsimm serial string [%s]\n",getserialnum()); p = cs8900a_hwaddr = gethwaddr(0); - printk(KERN_INFO "uCsimm hwaddr %.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n", - p[0], p[1], p[2], p[3], p[4], p[5]); + printk(KERN_INFO "uCsimm hwaddr %pM\n", p); p = getbenv("APPEND"); if (p) strcpy(p,command); diff --git a/arch/m68k/68000/m68VZ328.c b/arch/m68k/68000/m68VZ328.c index 0e5e5a10a021..1154bdb220a0 100644 --- a/arch/m68k/68000/m68VZ328.c +++ b/arch/m68k/68000/m68VZ328.c @@ -152,8 +152,7 @@ static void __init init_hardware(char *command, int size) printk(KERN_INFO "uCdimm serial string [%s]\n", getserialnum()); p = cs8900a_hwaddr = gethwaddr(0); - printk(KERN_INFO "uCdimm hwaddr %.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n", - p[0], p[1], p[2], p[3], p[4], p[5]); + printk(KERN_INFO "uCdimm hwaddr %pM\n", p); p = getbenv("APPEND"); if (p) strcpy(p, command); diff --git a/arch/m68k/68360/config.c b/arch/m68k/68360/config.c index fd1f948c7129..b65fe4eed38e 100644 --- a/arch/m68k/68360/config.c +++ b/arch/m68k/68360/config.c @@ -154,8 +154,7 @@ void __init config_BSP(char *command, int len) #if defined(CONFIG_UCQUICC) && 0 printk(KERN_INFO "uCquicc serial string [%s]\n",getserialnum()); p = scc1_hwaddr = gethwaddr(0); - printk(KERN_INFO "uCquicc hwaddr %.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n", - p[0], p[1], p[2], p[3], p[4], p[5]); + printk(KERN_INFO "uCquicc hwaddr %pM\n", p); p = getbenv("APPEND"); if (p) diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index 39cf40da5f14..a424e46b50af 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -15,8 +15,8 @@ platforms += jazz platforms += jz4740 platforms += lantiq platforms += lasat -platforms += loongson -platforms += loongson1 +platforms += loongson32 +platforms += loongson64 platforms += mti-malta platforms += mti-sead3 platforms += netlogic diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index b65edf514b40..2a14585c90d2 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -21,11 +21,12 @@ config MIPS select HAVE_FUNCTION_GRAPH_TRACER select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_SYSCALL_TRACEPOINTS select HAVE_DEBUG_KMEMLEAK select HAVE_SYSCALL_TRACEPOINTS select ARCH_HAS_ELF_RANDOMIZE select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT - select RTC_LIB if !MACH_LOONGSON + select RTC_LIB if !MACH_LOONGSON64 select GENERIC_ATOMIC64 if !64BIT select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_DMA_ATTRS @@ -70,7 +71,7 @@ config MIPS_ALCHEMY select ARCH_PHYS_ADDR_T_64BIT select CEVT_R4K select CSRC_R4K - select IRQ_CPU + select IRQ_MIPS_CPU select DMA_MAYBE_COHERENT # Au1000,1500,1100 aren't, rest is select SYS_HAS_CPU_MIPS32_R1 select SYS_SUPPORTS_32BIT_KERNEL @@ -85,7 +86,7 @@ config AR7 select DMA_NONCOHERENT select CEVT_R4K select CSRC_R4K - select IRQ_CPU + select IRQ_MIPS_CPU select NO_EXCEPT_FILL select SWAP_IO_SPACE select SYS_HAS_CPU_MIPS32_R1 @@ -106,7 +107,7 @@ config ATH25 select CEVT_R4K select CSRC_R4K select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select IRQ_DOMAIN select SYS_HAS_CPU_MIPS32_R1 select SYS_SUPPORTS_BIG_ENDIAN @@ -123,14 +124,17 @@ config ATH79 select CSRC_R4K select DMA_NONCOHERENT select HAVE_CLK + select COMMON_CLK select CLKDEV_LOOKUP - select IRQ_CPU + select IRQ_MIPS_CPU select MIPS_MACHINE select SYS_HAS_CPU_MIPS32_R2 select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_MIPS16 + select SYS_SUPPORTS_ZBOOT + select USE_OF help Support for the Atheros AR71XX/AR724X/AR913X SoCs. @@ -146,7 +150,7 @@ config BMIPS_GENERIC select BCM7038_L1_IRQ select BCM7120_L2_IRQ select BRCMSTB_L2_IRQ - select IRQ_CPU + select IRQ_MIPS_CPU select RAW_IRQ_ACCESSORS select DMA_NONCOHERENT select SYS_SUPPORTS_32BIT_KERNEL @@ -176,7 +180,7 @@ config BCM47XX select CSRC_R4K select DMA_NONCOHERENT select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select SYS_HAS_CPU_MIPS32_R1 select NO_EXCEPT_FILL select SYS_SUPPORTS_32BIT_KERNEL @@ -186,6 +190,7 @@ config BCM47XX select USE_GENERIC_EARLY_PRINTK_8250 select GPIOLIB select LEDS_GPIO_REGISTER + select BCM47XX_NVRAM help Support for BCM47XX based boards @@ -196,7 +201,7 @@ config BCM63XX select CSRC_R4K select SYNC_R4K select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_HAS_EARLY_PRINTK @@ -216,7 +221,7 @@ config MIPS_COBALT select HW_HAS_PCI select I8253 select I8259 - select IRQ_CPU + select IRQ_MIPS_CPU select IRQ_GT641XX select PCI_GT64XXX_PCI0 select PCI @@ -239,7 +244,7 @@ config MACH_DECSTATION select CPU_R4400_WORKAROUNDS if 64BIT select DMA_NONCOHERENT select NO_IOPORT_MAP - select IRQ_CPU + select IRQ_MIPS_CPU select SYS_HAS_CPU_R3000 select SYS_HAS_CPU_R4X00 select SYS_SUPPORTS_32BIT_KERNEL @@ -274,7 +279,7 @@ config MACH_JAZZ select DEFAULT_SGI_PARTITION if CPU_BIG_ENDIAN select GENERIC_ISA_DMA select HAVE_PCSPKR_PLATFORM - select IRQ_CPU + select IRQ_MIPS_CPU select I8253 select I8259 select ISA @@ -288,23 +293,24 @@ config MACH_JAZZ Members include the Acer PICA, MIPS Magnum 4000, MIPS Millennium and Olivetti M700-10 workstations. -config MACH_JZ4740 - bool "Ingenic JZ4740 based machines" - select SYS_HAS_CPU_MIPS32_R1 +config MACH_INGENIC + bool "Ingenic SoC based machines" select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_ZBOOT_UART16550 select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select ARCH_REQUIRE_GPIOLIB - select SYS_HAS_EARLY_PRINTK - select HAVE_CLK + select COMMON_CLK select GENERIC_IRQ_CHIP + select BUILTIN_DTB + select USE_OF + select LIBFDT config LANTIQ bool "Lantiq based platforms" select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select CEVT_R4K select CSRC_R4K select SYS_HAS_CPU_MIPS32_R1 @@ -333,7 +339,7 @@ config LASAT select DMA_NONCOHERENT select SYS_HAS_EARLY_PRINTK select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select PCI_GT64XXX_PCI0 select MIPS_NILE4 select R5000_CPU_SCACHE @@ -342,26 +348,28 @@ config LASAT select SYS_SUPPORTS_64BIT_KERNEL if BROKEN select SYS_SUPPORTS_LITTLE_ENDIAN -config MACH_LOONGSON - bool "Loongson family of machines" +config MACH_LOONGSON32 + bool "Loongson-1 family of machines" select SYS_SUPPORTS_ZBOOT help - This enables the support of Loongson family of machines. + This enables support for the Loongson-1 family of machines. - Loongson is a family of general-purpose MIPS-compatible CPUs. - developed at Institute of Computing Technology (ICT), - Chinese Academy of Sciences (CAS) in the People's Republic - of China. The chief architect is Professor Weiwu Hu. + Loongson-1 is a family of 32-bit MIPS-compatible SoCs developed by + the Institute of Computing Technology (ICT), Chinese Academy of + Sciences (CAS). -config MACH_LOONGSON1 - bool "Loongson 1 family of machines" +config MACH_LOONGSON64 + bool "Loongson-2/3 family of machines" select SYS_SUPPORTS_ZBOOT help - This enables support for the Loongson 1 based machines. + This enables the support of Loongson-2/3 family of machines. - Loongson 1 is a family of 32-bit MIPS-compatible SoCs developed by - the ICT (Institute of Computing Technology) and the Chinese Academy - of Sciences. + Loongson-2 is a family of single-core CPUs and Loongson-3 is a + family of multi-core CPUs. They are both 64-bit general-purpose + MIPS-compatible CPUs. Loongson-2/3 are developed by the Institute + of Computing Technology (ICT), Chinese Academy of Sciences (CAS) + in the People's Republic of China. The chief architect is Professor + Weiwu Hu. config MACH_PISTACHIO bool "IMG Pistachio SoC based boards" @@ -373,7 +381,7 @@ config MACH_PISTACHIO select COMMON_CLK select CSRC_R4K select DMA_MAYBE_COHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select LIBFDT select MFD_SYSCON select MIPS_CPU_SCACHE @@ -386,6 +394,8 @@ config MACH_PISTACHIO select SYS_SUPPORTS_MIPS_CPS select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_ZBOOT + select SYS_HAS_EARLY_PRINTK + select USE_GENERIC_EARLY_PRINTK_8250 select USE_OF help This enables support for the IMG Pistachio SoC platform. @@ -395,13 +405,14 @@ config MIPS_MALTA select ARCH_MAY_HAVE_PC_FDC select BOOT_ELF32 select BOOT_RAW + select BUILTIN_DTB select CEVT_R4K select CSRC_R4K select CLKSRC_MIPS_GIC select DMA_MAYBE_COHERENT select GENERIC_ISA_DMA select HAVE_PCSPKR_PLATFORM - select IRQ_CPU + select IRQ_MIPS_CPU select MIPS_GIC select HW_HAS_PCI select I8253 @@ -434,6 +445,8 @@ config MIPS_MALTA select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_SMARTMIPS select SYS_SUPPORTS_ZBOOT + select USE_OF + select ZONE_DMA32 if 64BIT help This enables support for the MIPS Technologies Malta evaluation board. @@ -449,7 +462,7 @@ config MIPS_SEAD3 select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select MIPS_GIC select LIBFDT select MIPS_MSC @@ -512,7 +525,7 @@ config PMC_MSP select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_MIPS16 - select IRQ_CPU + select IRQ_MIPS_CPU select SERIAL_8250 select SERIAL_8250_CONSOLE select USB_EHCI_BIG_ENDIAN_MMIO @@ -529,7 +542,7 @@ config RALINK select CSRC_R4K select BOOT_RAW select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select USE_OF select SYS_HAS_CPU_MIPS32_R1 select SYS_HAS_CPU_MIPS32_R2 @@ -555,7 +568,7 @@ config SGI_IP22 select I8253 select I8259 select IP22_CPU_SCACHE - select IRQ_CPU + select IRQ_MIPS_CPU select GENERIC_ISA_DMA_SUPPORT_BROKEN select SGI_HAS_I8042 select SGI_HAS_INDYDOG @@ -614,7 +627,7 @@ config SGI_IP28 select DEFAULT_SGI_PARTITION select DMA_NONCOHERENT select GENERIC_ISA_DMA_SUPPORT_BROKEN - select IRQ_CPU + select IRQ_MIPS_CPU select HW_HAS_EISA select I8253 select I8259 @@ -650,7 +663,7 @@ config SGI_IP32 select CSRC_R4K select DMA_NONCOHERENT select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select R5000_CPU_SCACHE select RM7000_CPU_SCACHE select SYS_HAS_CPU_R5000 @@ -766,7 +779,7 @@ config SNI_RM select HAVE_PCSPKR_PLATFORM select HW_HAS_EISA select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select I8253 select I8259 select ISA @@ -799,7 +812,7 @@ config MIKROTIK_RB532 select CSRC_R4K select DMA_NONCOHERENT select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select SYS_HAS_CPU_MIPS32_R1 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN @@ -866,7 +879,7 @@ config NLM_XLR_BOARD select NR_CPUS_DEFAULT_32 select CEVT_R4K select CSRC_R4K - select IRQ_CPU + select IRQ_MIPS_CPU select ZONE_DMA32 if 64BIT select SYNC_R4K select SYS_HAS_EARLY_PRINTK @@ -893,7 +906,7 @@ config NLM_XLP_BOARD select NR_CPUS_DEFAULT_32 select CEVT_R4K select CSRC_R4K - select IRQ_CPU + select IRQ_MIPS_CPU select ZONE_DMA32 if 64BIT select SYNC_R4K select SYS_HAS_EARLY_PRINTK @@ -942,8 +955,8 @@ source "arch/mips/sibyte/Kconfig" source "arch/mips/txx9/Kconfig" source "arch/mips/vr41xx/Kconfig" source "arch/mips/cavium-octeon/Kconfig" -source "arch/mips/loongson/Kconfig" -source "arch/mips/loongson1/Kconfig" +source "arch/mips/loongson32/Kconfig" +source "arch/mips/loongson64/Kconfig" source "arch/mips/netlogic/Kconfig" source "arch/mips/paravirt/Kconfig" @@ -1142,10 +1155,6 @@ config SYS_SUPPORTS_HUGETLBFS config MIPS_HUGE_TLB_SUPPORT def_bool HUGETLB_PAGE || TRANSPARENT_HUGEPAGE -config IRQ_CPU - bool - select IRQ_DOMAIN - config IRQ_CPU_RM7K bool @@ -1172,7 +1181,7 @@ config SOC_EMMA2RH select CEVT_R4K select CSRC_R4K select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select SWAP_IO_SPACE select SYS_HAS_CPU_R5500 select SYS_SUPPORTS_32BIT_KERNEL @@ -1183,7 +1192,7 @@ config SOC_PNX833X bool select CEVT_R4K select CSRC_R4K - select IRQ_CPU + select IRQ_MIPS_CPU select DMA_NONCOHERENT select SYS_HAS_CPU_MIPS32_R2 select SYS_SUPPORTS_32BIT_KERNEL @@ -1569,7 +1578,8 @@ config CPU_CAVIUM_OCTEON select WEAK_ORDERING select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES - select USB_EHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN + select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select MIPS_L1_CACHE_SHIFT_7 help The Cavium Octeon processor is a highly integrated chip containing @@ -1587,7 +1597,7 @@ config CPU_BMIPS select CPU_BMIPS5000 if SYS_HAS_CPU_BMIPS5000 select CPU_SUPPORTS_32BIT_KERNEL select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select SWAP_IO_SPACE select WEAK_ORDERING select CPU_SUPPORTS_HIGHMEM @@ -2672,6 +2682,51 @@ config USE_OF config BUILTIN_DTB bool +choice + prompt "Kernel appended dtb support" if OF + default MIPS_NO_APPENDED_DTB + + config MIPS_NO_APPENDED_DTB + bool "None" + help + Do not enable appended dtb support. + + config MIPS_RAW_APPENDED_DTB + bool "vmlinux.bin" + help + With this option, the boot code will look for a device tree binary + DTB) appended to raw vmlinux.bin (without decompressor). + (e.g. cat vmlinux.bin <filename>.dtb > vmlinux_w_dtb). + + This is meant as a backward compatibility convenience for those + systems with a bootloader that can't be upgraded to accommodate + the documented boot protocol using a device tree. + + Beware that there is very little in terms of protection against + this option being confused by leftover garbage in memory that might + look like a DTB header after a reboot if no actual DTB is appended + to vmlinux.bin. Do not leave this option active in a production kernel + if you don't intend to always append a DTB. + + config MIPS_ZBOOT_APPENDED_DTB + bool "vmlinuz.bin" + depends on SYS_SUPPORTS_ZBOOT + help + With this option, the boot code will look for a device tree binary + DTB) appended to raw vmlinuz.bin (with decompressor). + (e.g. cat vmlinuz.bin <filename>.dtb > vmlinuz_w_dtb). + + This is meant as a backward compatibility convenience for those + systems with a bootloader that can't be upgraded to accommodate + the documented boot protocol using a device tree. + + Beware that there is very little in terms of protection against + this option being confused by leftover garbage in memory that might + look like a DTB header after a reboot if no actual DTB is appended + to vmlinuz.bin. Do not leave this option active in a production kernel + if you don't intend to always append a DTB. +endchoice + endmenu config LOCKDEP_SUPPORT diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index 6a98d2cb402c..6e46abe0dac6 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -752,12 +752,12 @@ static int __init alchemy_clk_init_fgens(int ctype) switch (ctype) { case ALCHEMY_CPU_AU1000...ALCHEMY_CPU_AU1200: id.ops = &alchemy_clkops_fgenv1; - id.parent_names = (const char **)alchemy_clk_fgv1_parents; + id.parent_names = alchemy_clk_fgv1_parents; id.num_parents = 2; break; case ALCHEMY_CPU_AU1300: id.ops = &alchemy_clkops_fgenv2; - id.parent_names = (const char **)alchemy_clk_fgv2_parents; + id.parent_names = alchemy_clk_fgv2_parents; id.num_parents = 3; break; default: @@ -961,7 +961,7 @@ static int __init alchemy_clk_setup_imux(int ctype) struct clk *c; id.ops = &alchemy_clkops_csrc; - id.parent_names = (const char **)alchemy_clk_csrc_parents; + id.parent_names = alchemy_clk_csrc_parents; id.num_parents = 7; id.flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE; diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index 2befa7d766a6..8742e1cee492 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -76,7 +76,7 @@ static void ar2315_misc_irq_handler(unsigned irq, struct irq_desc *desc) unsigned nr, misc_irq = 0; if (pending) { - struct irq_domain *domain = irq_get_handler_data(irq); + struct irq_domain *domain = irq_desc_get_handler_data(desc); nr = __ffs(pending); misc_irq = irq_find_mapping(domain, nr); diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index b6887f75144c..094b938fd603 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -80,7 +80,7 @@ static void ar5312_misc_irq_handler(unsigned irq, struct irq_desc *desc) unsigned nr, misc_irq = 0; if (pending) { - struct irq_domain *domain = irq_get_handler_data(irq); + struct irq_domain *domain = irq_desc_get_handler_data(desc); nr = __ffs(pending); misc_irq = irq_find_mapping(domain, nr); diff --git a/arch/mips/ath25/board.c b/arch/mips/ath25/board.c index b8bb78282d6a..9ab48ff80c1c 100644 --- a/arch/mips/ath25/board.c +++ b/arch/mips/ath25/board.c @@ -216,7 +216,7 @@ void __init plat_time_init(void) ar2315_plat_time_init(); } -unsigned int __cpuinit get_c0_compare_int(void) +unsigned int get_c0_compare_int(void) { return CP0_LEGACY_COMPARE_IRQ; } diff --git a/arch/mips/ath79/Kconfig b/arch/mips/ath79/Kconfig index dfc60209dc63..13c04cf54afa 100644 --- a/arch/mips/ath79/Kconfig +++ b/arch/mips/ath79/Kconfig @@ -71,6 +71,18 @@ config ATH79_MACH_UBNT_XM Say 'Y' here if you want your kernel to support the Ubiquiti Networks XM (rev 1.0) board. +choice + prompt "Build a DTB in the kernel" + optional + help + Select a devicetree that should be built into the kernel. + + config DTB_TL_WR1043ND_V1 + bool "TL-WR1043ND Version 1" + select BUILTIN_DTB + select SOC_AR913X +endchoice + endmenu config SOC_AR71XX diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c index 26479f437675..eb5117ced95a 100644 --- a/arch/mips/ath79/clock.c +++ b/arch/mips/ath79/clock.c @@ -17,6 +17,7 @@ #include <linux/err.h> #include <linux/clk.h> #include <linux/clkdev.h> +#include <linux/clk-provider.h> #include <asm/div64.h> @@ -28,24 +29,27 @@ #define AR724X_BASE_FREQ 5000000 #define AR913X_BASE_FREQ 5000000 -struct clk { - unsigned long rate; +static struct clk *clks[3]; +static struct clk_onecell_data clk_data = { + .clks = clks, + .clk_num = ARRAY_SIZE(clks), }; -static void __init ath79_add_sys_clkdev(const char *id, unsigned long rate) +static struct clk *__init ath79_add_sys_clkdev( + const char *id, unsigned long rate) { struct clk *clk; int err; - clk = kzalloc(sizeof(*clk), GFP_KERNEL); + clk = clk_register_fixed_rate(NULL, id, NULL, CLK_IS_ROOT, rate); if (!clk) panic("failed to allocate %s clock structure", id); - clk->rate = rate; - err = clk_register_clkdev(clk, id, NULL); if (err) panic("unable to register %s clock device", id); + + return clk; } static void __init ar71xx_clocks_init(void) @@ -62,7 +66,7 @@ static void __init ar71xx_clocks_init(void) pll = ath79_pll_rr(AR71XX_PLL_REG_CPU_CONFIG); - div = ((pll >> AR71XX_PLL_DIV_SHIFT) & AR71XX_PLL_DIV_MASK) + 1; + div = ((pll >> AR71XX_PLL_FB_SHIFT) & AR71XX_PLL_FB_MASK) + 1; freq = div * ref_rate; div = ((pll >> AR71XX_CPU_DIV_SHIFT) & AR71XX_CPU_DIV_MASK) + 1; @@ -75,9 +79,9 @@ static void __init ar71xx_clocks_init(void) ahb_rate = cpu_rate / div; ath79_add_sys_clkdev("ref", ref_rate); - ath79_add_sys_clkdev("cpu", cpu_rate); - ath79_add_sys_clkdev("ddr", ddr_rate); - ath79_add_sys_clkdev("ahb", ahb_rate); + clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate); + clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate); + clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate); clk_add_alias("wdt", NULL, "ahb", NULL); clk_add_alias("uart", NULL, "ahb", NULL); @@ -96,7 +100,7 @@ static void __init ar724x_clocks_init(void) ref_rate = AR724X_BASE_FREQ; pll = ath79_pll_rr(AR724X_PLL_REG_CPU_CONFIG); - div = ((pll >> AR724X_PLL_DIV_SHIFT) & AR724X_PLL_DIV_MASK); + div = ((pll >> AR724X_PLL_FB_SHIFT) & AR724X_PLL_FB_MASK); freq = div * ref_rate; div = ((pll >> AR724X_PLL_REF_DIV_SHIFT) & AR724X_PLL_REF_DIV_MASK); @@ -111,9 +115,9 @@ static void __init ar724x_clocks_init(void) ahb_rate = cpu_rate / div; ath79_add_sys_clkdev("ref", ref_rate); - ath79_add_sys_clkdev("cpu", cpu_rate); - ath79_add_sys_clkdev("ddr", ddr_rate); - ath79_add_sys_clkdev("ahb", ahb_rate); + clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate); + clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate); + clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate); clk_add_alias("wdt", NULL, "ahb", NULL); clk_add_alias("uart", NULL, "ahb", NULL); @@ -132,7 +136,7 @@ static void __init ar913x_clocks_init(void) ref_rate = AR913X_BASE_FREQ; pll = ath79_pll_rr(AR913X_PLL_REG_CPU_CONFIG); - div = ((pll >> AR913X_PLL_DIV_SHIFT) & AR913X_PLL_DIV_MASK); + div = ((pll >> AR913X_PLL_FB_SHIFT) & AR913X_PLL_FB_MASK); freq = div * ref_rate; cpu_rate = freq; @@ -144,9 +148,9 @@ static void __init ar913x_clocks_init(void) ahb_rate = cpu_rate / div; ath79_add_sys_clkdev("ref", ref_rate); - ath79_add_sys_clkdev("cpu", cpu_rate); - ath79_add_sys_clkdev("ddr", ddr_rate); - ath79_add_sys_clkdev("ahb", ahb_rate); + clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate); + clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate); + clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate); clk_add_alias("wdt", NULL, "ahb", NULL); clk_add_alias("uart", NULL, "ahb", NULL); @@ -206,9 +210,9 @@ static void __init ar933x_clocks_init(void) } ath79_add_sys_clkdev("ref", ref_rate); - ath79_add_sys_clkdev("cpu", cpu_rate); - ath79_add_sys_clkdev("ddr", ddr_rate); - ath79_add_sys_clkdev("ahb", ahb_rate); + clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate); + clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate); + clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate); clk_add_alias("wdt", NULL, "ahb", NULL); clk_add_alias("uart", NULL, "ref", NULL); @@ -340,9 +344,9 @@ static void __init ar934x_clocks_init(void) ahb_rate = cpu_pll / (postdiv + 1); ath79_add_sys_clkdev("ref", ref_rate); - ath79_add_sys_clkdev("cpu", cpu_rate); - ath79_add_sys_clkdev("ddr", ddr_rate); - ath79_add_sys_clkdev("ahb", ahb_rate); + clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate); + clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate); + clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate); clk_add_alias("wdt", NULL, "ref", NULL); clk_add_alias("uart", NULL, "ref", NULL); @@ -427,9 +431,9 @@ static void __init qca955x_clocks_init(void) ahb_rate = cpu_pll / (postdiv + 1); ath79_add_sys_clkdev("ref", ref_rate); - ath79_add_sys_clkdev("cpu", cpu_rate); - ath79_add_sys_clkdev("ddr", ddr_rate); - ath79_add_sys_clkdev("ahb", ahb_rate); + clks[0] = ath79_add_sys_clkdev("cpu", cpu_rate); + clks[1] = ath79_add_sys_clkdev("ddr", ddr_rate); + clks[2] = ath79_add_sys_clkdev("ahb", ahb_rate); clk_add_alias("wdt", NULL, "ref", NULL); clk_add_alias("uart", NULL, "ref", NULL); @@ -451,6 +455,8 @@ void __init ath79_clocks_init(void) qca955x_clocks_init(); else BUG(); + + of_clk_init(NULL); } unsigned long __init @@ -469,22 +475,16 @@ ath79_get_sys_clk_rate(const char *id) return rate; } -/* - * Linux clock API - */ -int clk_enable(struct clk *clk) +#ifdef CONFIG_OF +static void __init ath79_clocks_init_dt(struct device_node *np) { - return 0; + of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data); } -EXPORT_SYMBOL(clk_enable); -void clk_disable(struct clk *clk) -{ -} -EXPORT_SYMBOL(clk_disable); - -unsigned long clk_get_rate(struct clk *clk) -{ - return clk->rate; -} -EXPORT_SYMBOL(clk_get_rate); +CLK_OF_DECLARE(ar7100, "qca,ar7100-pll", ath79_clocks_init_dt); +CLK_OF_DECLARE(ar7240, "qca,ar7240-pll", ath79_clocks_init_dt); +CLK_OF_DECLARE(ar9130, "qca,ar9130-pll", ath79_clocks_init_dt); +CLK_OF_DECLARE(ar9330, "qca,ar9330-pll", ath79_clocks_init_dt); +CLK_OF_DECLARE(ar9340, "qca,ar9340-pll", ath79_clocks_init_dt); +CLK_OF_DECLARE(ar9550, "qca,qca9550-pll", ath79_clocks_init_dt); +#endif diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index eb3966cd8cfc..3cedd1f95e0f 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -38,11 +38,27 @@ unsigned int ath79_soc_rev; void __iomem *ath79_pll_base; void __iomem *ath79_reset_base; EXPORT_SYMBOL_GPL(ath79_reset_base); -void __iomem *ath79_ddr_base; +static void __iomem *ath79_ddr_base; +static void __iomem *ath79_ddr_wb_flush_base; +static void __iomem *ath79_ddr_pci_win_base; + +void ath79_ddr_ctrl_init(void) +{ + ath79_ddr_base = ioremap_nocache(AR71XX_DDR_CTRL_BASE, + AR71XX_DDR_CTRL_SIZE); + if (soc_is_ar71xx() || soc_is_ar934x()) { + ath79_ddr_wb_flush_base = ath79_ddr_base + 0x9c; + ath79_ddr_pci_win_base = ath79_ddr_base + 0x7c; + } else { + ath79_ddr_wb_flush_base = ath79_ddr_base + 0x7c; + ath79_ddr_pci_win_base = 0; + } +} +EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init); void ath79_ddr_wb_flush(u32 reg) { - void __iomem *flush_reg = ath79_ddr_base + reg; + void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg; /* Flush the DDR write buffer. */ __raw_writel(0x1, flush_reg); @@ -56,6 +72,21 @@ void ath79_ddr_wb_flush(u32 reg) } EXPORT_SYMBOL_GPL(ath79_ddr_wb_flush); +void ath79_ddr_set_pci_windows(void) +{ + BUG_ON(!ath79_ddr_pci_win_base); + + __raw_writel(AR71XX_PCI_WIN0_OFFS, ath79_ddr_pci_win_base + 0); + __raw_writel(AR71XX_PCI_WIN1_OFFS, ath79_ddr_pci_win_base + 1); + __raw_writel(AR71XX_PCI_WIN2_OFFS, ath79_ddr_pci_win_base + 2); + __raw_writel(AR71XX_PCI_WIN3_OFFS, ath79_ddr_pci_win_base + 3); + __raw_writel(AR71XX_PCI_WIN4_OFFS, ath79_ddr_pci_win_base + 4); + __raw_writel(AR71XX_PCI_WIN5_OFFS, ath79_ddr_pci_win_base + 5); + __raw_writel(AR71XX_PCI_WIN6_OFFS, ath79_ddr_pci_win_base + 6); + __raw_writel(AR71XX_PCI_WIN7_OFFS, ath79_ddr_pci_win_base + 7); +} +EXPORT_SYMBOL_GPL(ath79_ddr_set_pci_windows); + void ath79_device_reset_set(u32 mask) { unsigned long flags; diff --git a/arch/mips/ath79/common.h b/arch/mips/ath79/common.h index c39de61f9b36..e5ea71277f0c 100644 --- a/arch/mips/ath79/common.h +++ b/arch/mips/ath79/common.h @@ -22,6 +22,7 @@ void ath79_clocks_init(void); unsigned long ath79_get_sys_clk_rate(const char *id); +void ath79_ddr_ctrl_init(void); void ath79_ddr_wb_flush(unsigned int reg); void ath79_gpio_function_enable(u32 mask); diff --git a/arch/mips/ath79/dev-common.c b/arch/mips/ath79/dev-common.c index 516225d207ee..9d0172a4dc69 100644 --- a/arch/mips/ath79/dev-common.c +++ b/arch/mips/ath79/dev-common.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/platform_device.h> +#include <linux/platform_data/gpio-ath79.h> #include <linux/serial_8250.h> #include <linux/clk.h> #include <linux/err.h> @@ -106,3 +107,53 @@ void __init ath79_register_wdt(void) platform_device_register_simple("ath79-wdt", -1, &res, 1); } + +static struct ath79_gpio_platform_data ath79_gpio_pdata; + +static struct resource ath79_gpio_resources[] = { + { + .flags = IORESOURCE_MEM, + .start = AR71XX_GPIO_BASE, + .end = AR71XX_GPIO_BASE + AR71XX_GPIO_SIZE - 1, + }, + { + .start = ATH79_MISC_IRQ(2), + .end = ATH79_MISC_IRQ(2), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device ath79_gpio_device = { + .name = "ath79-gpio", + .id = -1, + .resource = ath79_gpio_resources, + .num_resources = ARRAY_SIZE(ath79_gpio_resources), + .dev = { + .platform_data = &ath79_gpio_pdata + }, +}; + +void __init ath79_gpio_init(void) +{ + if (soc_is_ar71xx()) { + ath79_gpio_pdata.ngpios = AR71XX_GPIO_COUNT; + } else if (soc_is_ar7240()) { + ath79_gpio_pdata.ngpios = AR7240_GPIO_COUNT; + } else if (soc_is_ar7241() || soc_is_ar7242()) { + ath79_gpio_pdata.ngpios = AR7241_GPIO_COUNT; + } else if (soc_is_ar913x()) { + ath79_gpio_pdata.ngpios = AR913X_GPIO_COUNT; + } else if (soc_is_ar933x()) { + ath79_gpio_pdata.ngpios = AR933X_GPIO_COUNT; + } else if (soc_is_ar934x()) { + ath79_gpio_pdata.ngpios = AR934X_GPIO_COUNT; + ath79_gpio_pdata.oe_inverted = 1; + } else if (soc_is_qca955x()) { + ath79_gpio_pdata.ngpios = QCA955X_GPIO_COUNT; + ath79_gpio_pdata.oe_inverted = 1; + } else { + BUG(); + } + + platform_device_register(&ath79_gpio_device); +} diff --git a/arch/mips/ath79/gpio.c b/arch/mips/ath79/gpio.c index 8d025b028bb1..f59ccb26520a 100644 --- a/arch/mips/ath79/gpio.c +++ b/arch/mips/ath79/gpio.c @@ -20,13 +20,15 @@ #include <linux/io.h> #include <linux/ioport.h> #include <linux/gpio.h> +#include <linux/platform_data/gpio-ath79.h> +#include <linux/of_device.h> #include <asm/mach-ath79/ar71xx_regs.h> #include <asm/mach-ath79/ath79.h> #include "common.h" static void __iomem *ath79_gpio_base; -static unsigned long ath79_gpio_count; +static u32 ath79_gpio_count; static DEFINE_SPINLOCK(ath79_gpio_lock); static void __ath79_gpio_set_value(unsigned gpio, int value) @@ -178,39 +180,72 @@ void ath79_gpio_function_disable(u32 mask) ath79_gpio_function_setup(0, mask); } -void __init ath79_gpio_init(void) +static const struct of_device_id ath79_gpio_of_match[] = { + { .compatible = "qca,ar7100-gpio" }, + { .compatible = "qca,ar9340-gpio" }, + {}, +}; + +static int ath79_gpio_probe(struct platform_device *pdev) { + struct ath79_gpio_platform_data *pdata = pdev->dev.platform_data; + struct device_node *np = pdev->dev.of_node; + struct resource *res; + bool oe_inverted; int err; - if (soc_is_ar71xx()) - ath79_gpio_count = AR71XX_GPIO_COUNT; - else if (soc_is_ar7240()) - ath79_gpio_count = AR7240_GPIO_COUNT; - else if (soc_is_ar7241() || soc_is_ar7242()) - ath79_gpio_count = AR7241_GPIO_COUNT; - else if (soc_is_ar913x()) - ath79_gpio_count = AR913X_GPIO_COUNT; - else if (soc_is_ar933x()) - ath79_gpio_count = AR933X_GPIO_COUNT; - else if (soc_is_ar934x()) - ath79_gpio_count = AR934X_GPIO_COUNT; - else if (soc_is_qca955x()) - ath79_gpio_count = QCA955X_GPIO_COUNT; - else - BUG(); + if (np) { + err = of_property_read_u32(np, "ngpios", &ath79_gpio_count); + if (err) { + dev_err(&pdev->dev, "ngpios property is not valid\n"); + return err; + } + if (ath79_gpio_count >= 32) { + dev_err(&pdev->dev, "ngpios must be less than 32\n"); + return -EINVAL; + } + oe_inverted = of_device_is_compatible(np, "qca,ar9340-gpio"); + } else if (pdata) { + ath79_gpio_count = pdata->ngpios; + oe_inverted = pdata->oe_inverted; + } else { + dev_err(&pdev->dev, "No DT node or platform data found\n"); + return -EINVAL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ath79_gpio_base = devm_ioremap_nocache( + &pdev->dev, res->start, resource_size(res)); + if (!ath79_gpio_base) + return -ENOMEM; - ath79_gpio_base = ioremap_nocache(AR71XX_GPIO_BASE, AR71XX_GPIO_SIZE); + ath79_gpio_chip.dev = &pdev->dev; ath79_gpio_chip.ngpio = ath79_gpio_count; - if (soc_is_ar934x() || soc_is_qca955x()) { + if (oe_inverted) { ath79_gpio_chip.direction_input = ar934x_gpio_direction_input; ath79_gpio_chip.direction_output = ar934x_gpio_direction_output; } err = gpiochip_add(&ath79_gpio_chip); - if (err) - panic("cannot add AR71xx GPIO chip, error=%d", err); + if (err) { + dev_err(&pdev->dev, + "cannot add AR71xx GPIO chip, error=%d", err); + return err; + } + + return 0; } +static struct platform_driver ath79_gpio_driver = { + .driver = { + .name = "ath79-gpio", + .of_match_table = ath79_gpio_of_match, + }, + .probe = ath79_gpio_probe, +}; + +module_platform_driver(ath79_gpio_driver); + int gpio_get_value(unsigned gpio) { if (gpio < ath79_gpio_count) diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 6adae366f11a..afb009603f7f 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -15,7 +15,9 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/interrupt.h> -#include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/of_irq.h> +#include "../../../drivers/irqchip/irqchip.h" #include <asm/irq_cpu.h> #include <asm/mipsregs.h> @@ -23,9 +25,7 @@ #include <asm/mach-ath79/ath79.h> #include <asm/mach-ath79/ar71xx_regs.h> #include "common.h" - -static void (*ath79_ip2_handler)(void); -static void (*ath79_ip3_handler)(void); +#include "machtypes.h" static void ath79_misc_irq_handler(unsigned int irq, struct irq_desc *desc) { @@ -129,10 +129,10 @@ static void ar934x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc) status = ath79_reset_rr(AR934X_RESET_REG_PCIE_WMAC_INT_STATUS); if (status & AR934X_PCIE_WMAC_INT_PCIE_ALL) { - ath79_ddr_wb_flush(AR934X_DDR_REG_FLUSH_PCIE); + ath79_ddr_wb_flush(3); generic_handle_irq(ATH79_IP2_IRQ(0)); } else if (status & AR934X_PCIE_WMAC_INT_WMAC_ALL) { - ath79_ddr_wb_flush(AR934X_DDR_REG_FLUSH_WMAC); + ath79_ddr_wb_flush(4); generic_handle_irq(ATH79_IP2_IRQ(1)); } else { spurious_interrupt(); @@ -235,128 +235,132 @@ static void qca955x_irq_init(void) irq_set_chained_handler(ATH79_CPU_IRQ(3), qca955x_ip3_irq_dispatch); } -asmlinkage void plat_irq_dispatch(void) -{ - unsigned long pending; - - pending = read_c0_status() & read_c0_cause() & ST0_IM; - - if (pending & STATUSF_IP7) - do_IRQ(ATH79_CPU_IRQ(7)); - - else if (pending & STATUSF_IP2) - ath79_ip2_handler(); - - else if (pending & STATUSF_IP4) - do_IRQ(ATH79_CPU_IRQ(4)); - - else if (pending & STATUSF_IP5) - do_IRQ(ATH79_CPU_IRQ(5)); - - else if (pending & STATUSF_IP3) - ath79_ip3_handler(); - - else if (pending & STATUSF_IP6) - do_IRQ(ATH79_CPU_IRQ(6)); - - else - spurious_interrupt(); -} - /* * The IP2/IP3 lines are tied to a PCI/WMAC/USB device. Drivers for * these devices typically allocate coherent DMA memory, however the * DMA controller may still have some unsynchronized data in the FIFO. * Issue a flush in the handlers to ensure that the driver sees * the update. + * + * This array map the interrupt lines to the DDR write buffer channels. */ -static void ath79_default_ip2_handler(void) -{ - do_IRQ(ATH79_CPU_IRQ(2)); -} +static unsigned irq_wb_chan[8] = { + -1, -1, -1, -1, -1, -1, -1, -1, +}; -static void ath79_default_ip3_handler(void) +asmlinkage void plat_irq_dispatch(void) { - do_IRQ(ATH79_CPU_IRQ(3)); -} + unsigned long pending; + int irq; -static void ar71xx_ip2_handler(void) -{ - ath79_ddr_wb_flush(AR71XX_DDR_REG_FLUSH_PCI); - do_IRQ(ATH79_CPU_IRQ(2)); -} + pending = read_c0_status() & read_c0_cause() & ST0_IM; -static void ar724x_ip2_handler(void) -{ - ath79_ddr_wb_flush(AR724X_DDR_REG_FLUSH_PCIE); - do_IRQ(ATH79_CPU_IRQ(2)); -} + if (!pending) { + spurious_interrupt(); + return; + } -static void ar913x_ip2_handler(void) -{ - ath79_ddr_wb_flush(AR913X_DDR_REG_FLUSH_WMAC); - do_IRQ(ATH79_CPU_IRQ(2)); + pending >>= CAUSEB_IP; + while (pending) { + irq = fls(pending) - 1; + if (irq < ARRAY_SIZE(irq_wb_chan) && irq_wb_chan[irq] != -1) + ath79_ddr_wb_flush(irq_wb_chan[irq]); + do_IRQ(MIPS_CPU_IRQ_BASE + irq); + pending &= ~BIT(irq); + } } -static void ar933x_ip2_handler(void) +#ifdef CONFIG_IRQCHIP +static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - ath79_ddr_wb_flush(AR933X_DDR_REG_FLUSH_WMAC); - do_IRQ(ATH79_CPU_IRQ(2)); + irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq); + return 0; } -static void ar71xx_ip3_handler(void) -{ - ath79_ddr_wb_flush(AR71XX_DDR_REG_FLUSH_USB); - do_IRQ(ATH79_CPU_IRQ(3)); -} +static const struct irq_domain_ops misc_irq_domain_ops = { + .xlate = irq_domain_xlate_onecell, + .map = misc_map, +}; -static void ar724x_ip3_handler(void) +static int __init ath79_misc_intc_of_init( + struct device_node *node, struct device_node *parent) { - ath79_ddr_wb_flush(AR724X_DDR_REG_FLUSH_USB); - do_IRQ(ATH79_CPU_IRQ(3)); -} + void __iomem *base = ath79_reset_base; + struct irq_domain *domain; + int irq; -static void ar913x_ip3_handler(void) -{ - ath79_ddr_wb_flush(AR913X_DDR_REG_FLUSH_USB); - do_IRQ(ATH79_CPU_IRQ(3)); -} + irq = irq_of_parse_and_map(node, 0); + if (!irq) + panic("Failed to get MISC IRQ"); -static void ar933x_ip3_handler(void) -{ - ath79_ddr_wb_flush(AR933X_DDR_REG_FLUSH_USB); - do_IRQ(ATH79_CPU_IRQ(3)); + domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT, + ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, NULL); + if (!domain) + panic("Failed to add MISC irqdomain"); + + /* Disable and clear all interrupts */ + __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE); + __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS); + + + irq_set_chained_handler(irq, ath79_misc_irq_handler); + + return 0; } +IRQCHIP_DECLARE(ath79_misc_intc, "qca,ar7100-misc-intc", + ath79_misc_intc_of_init); -static void ar934x_ip3_handler(void) +static int __init ar79_cpu_intc_of_init( + struct device_node *node, struct device_node *parent) { - ath79_ddr_wb_flush(AR934X_DDR_REG_FLUSH_USB); - do_IRQ(ATH79_CPU_IRQ(3)); + int err, i, count; + + /* Fill the irq_wb_chan table */ + count = of_count_phandle_with_args( + node, "qca,ddr-wb-channels", "#qca,ddr-wb-channel-cells"); + + for (i = 0; i < count; i++) { + struct of_phandle_args args; + u32 irq = i; + + of_property_read_u32_index( + node, "qca,ddr-wb-channel-interrupts", i, &irq); + if (irq >= ARRAY_SIZE(irq_wb_chan)) + continue; + + err = of_parse_phandle_with_args( + node, "qca,ddr-wb-channels", + "#qca,ddr-wb-channel-cells", + i, &args); + if (err) + return err; + + irq_wb_chan[irq] = args.args[0]; + pr_info("IRQ: Set flush channel of IRQ%d to %d\n", + irq, args.args[0]); + } + + return mips_cpu_irq_of_init(node, parent); } +IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc", + ar79_cpu_intc_of_init); + +#endif void __init arch_init_irq(void) { - if (soc_is_ar71xx()) { - ath79_ip2_handler = ar71xx_ip2_handler; - ath79_ip3_handler = ar71xx_ip3_handler; - } else if (soc_is_ar724x()) { - ath79_ip2_handler = ar724x_ip2_handler; - ath79_ip3_handler = ar724x_ip3_handler; - } else if (soc_is_ar913x()) { - ath79_ip2_handler = ar913x_ip2_handler; - ath79_ip3_handler = ar913x_ip3_handler; - } else if (soc_is_ar933x()) { - ath79_ip2_handler = ar933x_ip2_handler; - ath79_ip3_handler = ar933x_ip3_handler; + if (mips_machtype == ATH79_MACH_GENERIC_OF) { + irqchip_init(); + return; + } + + if (soc_is_ar71xx() || soc_is_ar724x() || + soc_is_ar913x() || soc_is_ar933x()) { + irq_wb_chan[2] = 3; + irq_wb_chan[3] = 2; } else if (soc_is_ar934x()) { - ath79_ip2_handler = ath79_default_ip2_handler; - ath79_ip3_handler = ar934x_ip3_handler; - } else if (soc_is_qca955x()) { - ath79_ip2_handler = ath79_default_ip2_handler; - ath79_ip3_handler = ath79_default_ip3_handler; - } else { - BUG(); + irq_wb_chan[3] = 2; } mips_cpu_irq_init(); diff --git a/arch/mips/ath79/machtypes.h b/arch/mips/ath79/machtypes.h index 26254058c545..a13db3d15c8f 100644 --- a/arch/mips/ath79/machtypes.h +++ b/arch/mips/ath79/machtypes.h @@ -15,6 +15,7 @@ #include <asm/mips_machine.h> enum ath79_mach_type { + ATH79_MACH_GENERIC_OF = -1, /* Device tree board */ ATH79_MACH_GENERIC = 0, ATH79_MACH_AP121, /* Atheros AP121 reference board */ ATH79_MACH_AP136_010, /* Atheros AP136-010 reference board */ diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 7fc8397d16f2..01a644f174dd 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -17,12 +17,16 @@ #include <linux/bootmem.h> #include <linux/err.h> #include <linux/clk.h> +#include <linux/of_platform.h> +#include <linux/of_fdt.h> #include <asm/bootinfo.h> #include <asm/idle.h> #include <asm/time.h> /* for mips_hpt_frequency */ #include <asm/reboot.h> /* for _machine_{restart,halt} */ #include <asm/mips_machine.h> +#include <asm/prom.h> +#include <asm/fw/fw.h> #include <asm/mach-ath79/ath79.h> #include <asm/mach-ath79/ar71xx_regs.h> @@ -194,17 +198,28 @@ unsigned int get_c0_compare_int(void) void __init plat_mem_setup(void) { + unsigned long fdt_start; + set_io_port_base(KSEG1); + /* Get the position of the FDT passed by the bootloader */ + fdt_start = fw_getenvl("fdt_start"); + if (fdt_start) + __dt_setup_arch((void *)KSEG0ADDR(fdt_start)); +#ifdef CONFIG_BUILTIN_DTB + else + __dt_setup_arch(__dtb_start); +#endif + ath79_reset_base = ioremap_nocache(AR71XX_RESET_BASE, AR71XX_RESET_SIZE); ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE, AR71XX_PLL_SIZE); - ath79_ddr_base = ioremap_nocache(AR71XX_DDR_CTRL_BASE, - AR71XX_DDR_CTRL_SIZE); + ath79_ddr_ctrl_init(); ath79_detect_sys_type(); - detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX); + if (mips_machtype != ATH79_MACH_GENERIC_OF) + detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX); _machine_restart = ath79_restart; _machine_halt = ath79_halt; @@ -236,6 +251,10 @@ void __init plat_time_init(void) static int __init ath79_setup(void) { + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); + if (mips_machtype == ATH79_MACH_GENERIC_OF) + return 0; + ath79_gpio_init(); ath79_register_uart(); ath79_register_wdt(); @@ -247,6 +266,11 @@ static int __init ath79_setup(void) arch_initcall(ath79_setup); +void __init device_tree_init(void) +{ + unflatten_and_copy_device_tree(); +} + static void __init ath79_generic_init(void) { /* Nothing to do */ diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index fc21d3659fa0..51ed599cc894 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -25,7 +25,6 @@ config BCM47XX_BCMA select BCMA select BCMA_HOST_SOC select BCMA_DRIVER_MIPS - select BCMA_HOST_PCI if PCI select BCMA_DRIVER_PCI_HOSTMODE if PCI select BCMA_DRIVER_GPIO default y diff --git a/arch/mips/bcm47xx/Makefile b/arch/mips/bcm47xx/Makefile index d58c51b5e501..66bea4ecf449 100644 --- a/arch/mips/bcm47xx/Makefile +++ b/arch/mips/bcm47xx/Makefile @@ -3,5 +3,5 @@ # under Linux. # -obj-y += irq.o nvram.o prom.o serial.o setup.o time.o sprom.o +obj-y += irq.o prom.o serial.o setup.o time.o sprom.o obj-y += board.o buttons.o leds.o workarounds.o diff --git a/arch/mips/bcm47xx/board.c b/arch/mips/bcm47xx/board.c index bd56415f2f3b..a88975a55c4d 100644 --- a/arch/mips/bcm47xx/board.c +++ b/arch/mips/bcm47xx/board.c @@ -149,6 +149,7 @@ struct bcm47xx_board_type_list2 bcm47xx_board_list_boot_hw[] __initconst = { /* board_id */ static const struct bcm47xx_board_type_list1 bcm47xx_board_list_board_id[] __initconst = { + {{BCM47XX_BOARD_LUXUL_XWR_1750_V1, "Luxul XWR-1750 V1"}, "luxul_xwr1750_v1"}, {{BCM47XX_BOARD_NETGEAR_WGR614V8, "Netgear WGR614 V8"}, "U12H072T00_NETGEAR"}, {{BCM47XX_BOARD_NETGEAR_WGR614V9, "Netgear WGR614 V9"}, "U12H094T00_NETGEAR"}, {{BCM47XX_BOARD_NETGEAR_WGR614_V10, "Netgear WGR614 V10"}, "U12H139T01_NETGEAR"}, diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c index 276276a8c6d7..08a4abf09a33 100644 --- a/arch/mips/bcm47xx/buttons.c +++ b/arch/mips/bcm47xx/buttons.c @@ -299,6 +299,13 @@ bcm47xx_buttons_linksys_wrtsl54gs[] __initconst = { BCM47XX_GPIO_KEY(6, KEY_RESTART), }; +/* Luxul */ + +static const struct gpio_keys_button +bcm47xx_buttons_luxul_xwr_1750_v1[] = { + BCM47XX_GPIO_KEY(14, BTN_TASK), +}; + /* Microsoft */ static const struct gpio_keys_button @@ -555,6 +562,10 @@ int __init bcm47xx_buttons_register(void) err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrtsl54gs); break; + case BCM47XX_BOARD_LUXUL_XWR_1750_V1: + err = bcm47xx_copy_bdata(bcm47xx_buttons_luxul_xwr_1750_v1); + break; + case BCM47XX_BOARD_MICROSOFT_MN700: err = bcm47xx_copy_bdata(bcm47xx_buttons_microsoft_nm700); break; diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c index 0e4ade342333..d20ae63eb3c2 100644 --- a/arch/mips/bcm47xx/leds.c +++ b/arch/mips/bcm47xx/leds.c @@ -370,6 +370,16 @@ bcm47xx_leds_linksys_wrtsl54gs[] __initconst = { BCM47XX_GPIO_LED(7, "orange", "wps", 1, LEDS_GPIO_DEFSTATE_OFF), }; +/* Luxul */ + +static const struct gpio_led +bcm47xx_leds_luxul_xwr_1750_v1[] __initconst = { + BCM47XX_GPIO_LED(5, "green", "5ghz", 0, LEDS_GPIO_DEFSTATE_OFF), + BCM47XX_GPIO_LED(12, "green", "usb", 0, LEDS_GPIO_DEFSTATE_OFF), + BCM47XX_GPIO_LED_TRIGGER(13, "green", "status", 0, "timer"), + BCM47XX_GPIO_LED(15, "green", "wps", 0, LEDS_GPIO_DEFSTATE_OFF), +}; + /* Microsoft */ static const struct gpio_led @@ -623,6 +633,10 @@ void __init bcm47xx_leds_register(void) bcm47xx_set_pdata(bcm47xx_leds_linksys_wrtsl54gs); break; + case BCM47XX_BOARD_LUXUL_XWR_1750_V1: + bcm47xx_set_pdata(bcm47xx_leds_luxul_xwr_1750_v1); + break; + case BCM47XX_BOARD_MICROSOFT_MN700: bcm47xx_set_pdata(bcm47xx_leds_microsoft_nm700); break; diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c index ab698bad6d62..135a5407f015 100644 --- a/arch/mips/bcm47xx/prom.c +++ b/arch/mips/bcm47xx/prom.c @@ -126,7 +126,7 @@ void __init prom_free_prom_memory(void) /* Stripped version of tlb_init, with the call to build_tlb_refill_handler * dropped. Calling it at this stage causes a hang. */ -void __cpuinit early_tlb_init(void) +void early_tlb_init(void) { write_c0_pagemask(PM_DEFAULT_MASK); write_c0_wired(0); diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 82ff9fd2ab6e..98c075f81795 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -206,9 +206,6 @@ void __init bcm47xx_bus_setup(void) err = bcma_host_soc_init(&bcm47xx_bus.bcma); if (err) panic("Failed to initialize BCMA bus (err %d)", err); - - bcm47xx_fill_bcma_boardinfo(&bcm47xx_bus.bcma.bus.boardinfo, - NULL); } #endif diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c index 68ebf2322f8b..2d5c7a7f24bb 100644 --- a/arch/mips/bcm47xx/sprom.c +++ b/arch/mips/bcm47xx/sprom.c @@ -200,7 +200,13 @@ static void bcm47xx_sprom_fill_auto(struct ssb_sprom *sprom, const char *pre = prefix; bool fb = fallback; + /* Broadcom extracts it for rev 8+ but it was found on 2 and 4 too */ + ENTRY(0xfffffffe, u16, pre, "devid", dev_id, 0, fallback); + ENTRY(0xfffffffe, u16, pre, "boardrev", board_rev, 0, true); + ENTRY(0xfffffffe, u32, pre, "boardflags", boardflags, 0, fb); + ENTRY(0xfffffff0, u32, pre, "boardflags2", boardflags2, 0, fb); + ENTRY(0xfffff800, u32, pre, "boardflags3", boardflags3, 0, fb); ENTRY(0x00000002, u16, pre, "boardflags", boardflags_lo, 0, fb); ENTRY(0xfffffffc, u16, pre, "boardtype", board_type, 0, true); ENTRY(0xfffffffe, u16, pre, "boardnum", board_num, 0, fb); @@ -409,27 +415,6 @@ static void bcm47xx_sprom_fill_auto(struct ssb_sprom *sprom, } #undef ENTRY /* It's specififc, uses local variable, don't use it (again). */ -static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom, - const char *prefix, bool fallback) -{ - nvram_read_u16(prefix, NULL, "devid", &sprom->dev_id, 0, fallback); - nvram_read_alpha2(prefix, "ccode", sprom->alpha2, fallback); -} - -static void bcm47xx_fill_sprom_r3(struct ssb_sprom *sprom, const char *prefix, - bool fallback) -{ - nvram_read_leddc(prefix, "leddc", &sprom->leddc_on_time, - &sprom->leddc_off_time, fallback); -} - -static void bcm47xx_fill_sprom_r4589(struct ssb_sprom *sprom, - const char *prefix, bool fallback) -{ - nvram_read_leddc(prefix, "leddc", &sprom->leddc_on_time, - &sprom->leddc_off_time, fallback); -} - static void bcm47xx_fill_sprom_path_r4589(struct ssb_sprom *sprom, const char *prefix, bool fallback) { @@ -528,6 +513,8 @@ static int mac_addr_used = 2; static void bcm47xx_fill_sprom_ethernet(struct ssb_sprom *sprom, const char *prefix, bool fallback) { + bool fb = fallback; + nvram_read_macaddr(prefix, "et0macaddr", sprom->et0mac, fallback); nvram_read_u8(prefix, NULL, "et0mdcport", &sprom->et0mdcport, 0, fallback); @@ -540,6 +527,10 @@ static void bcm47xx_fill_sprom_ethernet(struct ssb_sprom *sprom, nvram_read_u8(prefix, NULL, "et1phyaddr", &sprom->et1phyaddr, 0, fallback); + nvram_read_macaddr(prefix, "et2macaddr", sprom->et2mac, fb); + nvram_read_u8(prefix, NULL, "et2mdcport", &sprom->et2mdcport, 0, fb); + nvram_read_u8(prefix, NULL, "et2phyaddr", &sprom->et2phyaddr, 0, fb); + nvram_read_macaddr(prefix, "macaddr", sprom->il0mac, fallback); nvram_read_macaddr(prefix, "il0macaddr", sprom->il0mac, fallback); @@ -580,39 +571,22 @@ void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix, nvram_read_u8(prefix, NULL, "sromrev", &sprom->revision, 0, fallback); + /* Entries requiring custom functions */ + nvram_read_alpha2(prefix, "ccode", sprom->alpha2, fallback); + if (sprom->revision >= 3) + nvram_read_leddc(prefix, "leddc", &sprom->leddc_on_time, + &sprom->leddc_off_time, fallback); + switch (sprom->revision) { - case 1: - bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback); - break; - case 2: - bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback); - break; - case 3: - bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback); - bcm47xx_fill_sprom_r3(sprom, prefix, fallback); - break; case 4: case 5: - bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback); - bcm47xx_fill_sprom_r4589(sprom, prefix, fallback); bcm47xx_fill_sprom_path_r4589(sprom, prefix, fallback); bcm47xx_fill_sprom_path_r45(sprom, prefix, fallback); break; case 8: - bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback); - bcm47xx_fill_sprom_r4589(sprom, prefix, fallback); - bcm47xx_fill_sprom_path_r4589(sprom, prefix, fallback); - break; case 9: - bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback); - bcm47xx_fill_sprom_r4589(sprom, prefix, fallback); bcm47xx_fill_sprom_path_r4589(sprom, prefix, fallback); break; - default: - pr_warn("Unsupported SPROM revision %d detected. Will extract v1\n", - sprom->revision); - sprom->revision = 1; - bcm47xx_fill_sprom_r1234589(sprom, prefix, fallback); } bcm47xx_sprom_fill_auto(sprom, prefix, fallback); @@ -631,19 +605,6 @@ void bcm47xx_fill_ssb_boardinfo(struct ssb_boardinfo *boardinfo, } #endif -#ifdef CONFIG_BCM47XX_BCMA -void bcm47xx_fill_bcma_boardinfo(struct bcma_boardinfo *boardinfo, - const char *prefix) -{ - nvram_read_u16(prefix, NULL, "boardvendor", &boardinfo->vendor, 0, - true); - if (!boardinfo->vendor) - boardinfo->vendor = SSB_BOARDVENDOR_BCM; - - nvram_read_u16(prefix, NULL, "boardtype", &boardinfo->type, 0, true); -} -#endif - #if defined(CONFIG_BCM47XX_SSB) static int bcm47xx_get_sprom_ssb(struct ssb_bus *bus, struct ssb_sprom *out) { @@ -698,33 +659,46 @@ static void bcm47xx_sprom_apply_prefix_alias(char *prefix, size_t prefix_size) static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out) { - char prefix[10]; + struct bcma_boardinfo *binfo = &bus->boardinfo; struct bcma_device *core; + char buf[10]; + char *prefix; + bool fallback = false; switch (bus->hosttype) { case BCMA_HOSTTYPE_PCI: memset(out, 0, sizeof(struct ssb_sprom)); - snprintf(prefix, sizeof(prefix), "pci/%u/%u/", + snprintf(buf, sizeof(buf), "pci/%u/%u/", bus->host_pci->bus->number + 1, PCI_SLOT(bus->host_pci->devfn)); - bcm47xx_sprom_apply_prefix_alias(prefix, sizeof(prefix)); - bcm47xx_fill_sprom(out, prefix, false); - return 0; + bcm47xx_sprom_apply_prefix_alias(buf, sizeof(buf)); + prefix = buf; + break; case BCMA_HOSTTYPE_SOC: memset(out, 0, sizeof(struct ssb_sprom)); core = bcma_find_core(bus, BCMA_CORE_80211); if (core) { - snprintf(prefix, sizeof(prefix), "sb/%u/", + snprintf(buf, sizeof(buf), "sb/%u/", core->core_index); - bcm47xx_fill_sprom(out, prefix, true); + prefix = buf; + fallback = true; } else { - bcm47xx_fill_sprom(out, NULL, false); + prefix = NULL; } - return 0; + break; default: pr_warn("Unable to fill SPROM for given bustype.\n"); return -EINVAL; } + + nvram_read_u16(prefix, NULL, "boardvendor", &binfo->vendor, 0, true); + if (!binfo->vendor) + binfo->vendor = SSB_BOARDVENDOR_BCM; + nvram_read_u16(prefix, NULL, "boardtype", &binfo->type, 0, true); + + bcm47xx_fill_sprom(out, prefix, fallback); + + return 0; } #endif diff --git a/arch/mips/bmips/Kconfig b/arch/mips/bmips/Kconfig index f35c84c019df..e2c4fd682c74 100644 --- a/arch/mips/bmips/Kconfig +++ b/arch/mips/bmips/Kconfig @@ -57,6 +57,10 @@ config DT_BCM97425SVMB bool "BCM97425SVMB" select BUILTIN_DTB +config DT_BCM97435SVMB + bool "BCM97435SVMB" + select BUILTIN_DTB + endchoice endif diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index fae800e8b1e1..526ec2789bb9 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -149,6 +149,8 @@ void __init plat_mem_setup(void) /* intended to somewhat resemble ARM; see Documentation/arm/Booting */ if (fw_arg0 == 0 && fw_arg1 == 0xffffffff) dtb = phys_to_virt(fw_arg2); + else if (fw_arg0 == -2) /* UHI interface */ + dtb = (void *)fw_arg1; else if (__dtb_start != __dtb_end) dtb = (void *)__dtb_start; else diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S index 409cb483a9ff..c580e853b9fb 100644 --- a/arch/mips/boot/compressed/head.S +++ b/arch/mips/boot/compressed/head.S @@ -25,6 +25,22 @@ start: move s2, a2 move s3, a3 +#ifdef CONFIG_MIPS_ZBOOT_APPENDED_DTB + PTR_LA t0, __appended_dtb +#ifdef CONFIG_CPU_BIG_ENDIAN + li t1, 0xd00dfeed +#else + li t1, 0xedfe0dd0 +#endif + lw t2, (t0) + bne t1, t2, not_found + nop + + move s1, t0 + PTR_LI s0, -2 +not_found: +#endif + /* Clear BSS */ PTR_LA a0, _edata PTR_LA a2, _end diff --git a/arch/mips/boot/compressed/ld.script b/arch/mips/boot/compressed/ld.script index 5a33409c7f63..2ed08fbef8e7 100644 --- a/arch/mips/boot/compressed/ld.script +++ b/arch/mips/boot/compressed/ld.script @@ -29,8 +29,12 @@ SECTIONS *(.image) __image_end = .; CONSTRUCTORS + . = ALIGN(16); } - . = ALIGN(16); + __appended_dtb = .; + /* leave space for appended DTB */ + . += 0x100000; + _edata = .; /* End of data section */ diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index 237494b7a21a..408799a839b4 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -7,7 +7,7 @@ #include <asm/addrspace.h> -#if defined(CONFIG_MACH_LOONGSON) || defined(CONFIG_MIPS_MALTA) +#if defined(CONFIG_MACH_LOONGSON64) || defined(CONFIG_MIPS_MALTA) #define UART_BASE 0x1fd003f8 #define PORT(offset) (CKSEG1ADDR(UART_BASE) + (offset)) #endif diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index 5d95e4bd709a..778a34028c1b 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -1,8 +1,10 @@ dts-dirs += brcm dts-dirs += cavium-octeon +dts-dirs += ingenic dts-dirs += lantiq dts-dirs += mti dts-dirs += netlogic +dts-dirs += qca dts-dirs += ralink obj-y := $(addsuffix /, $(dts-dirs)) diff --git a/arch/mips/boot/dts/brcm/Makefile b/arch/mips/boot/dts/brcm/Makefile index 1c8353bfe003..eabeb603e805 100644 --- a/arch/mips/boot/dts/brcm/Makefile +++ b/arch/mips/boot/dts/brcm/Makefile @@ -9,6 +9,20 @@ dtb-$(CONFIG_DT_BCM97360SVMB) += bcm97360svmb.dtb dtb-$(CONFIG_DT_BCM97362SVMB) += bcm97362svmb.dtb dtb-$(CONFIG_DT_BCM97420C) += bcm97420c.dtb dtb-$(CONFIG_DT_BCM97425SVMB) += bcm97425svmb.dtb +dtb-$(CONFIG_DT_BCM97435SVMB) += bcm97435svmb.dtb + +dtb-$(CONFIG_DT_NONE) += \ + bcm93384wvg.dtb \ + bcm93384wvg_viper.dtb \ + bcm96368mvwg.dtb \ + bcm9ejtagprb.dtb \ + bcm97125cbmb.dtb \ + bcm97346dbsmb.dtb \ + bcm97358svmb.dtb \ + bcm97360svmb.dtb \ + bcm97362svmb.dtb \ + bcm97420c.dtb \ + bcm97425svmb.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi index 1f30728a3177..d817bb46b934 100644 --- a/arch/mips/boot/dts/brcm/bcm7346.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi @@ -24,6 +24,8 @@ aliases { uart0 = &uart0; + uart1 = &uart1; + uart2 = &uart2; }; cpu_intc: cpu_intc { @@ -118,6 +120,30 @@ status = "disabled"; }; + uart1: serial@406940 { + compatible = "ns16550a"; + reg = <0x406940 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <65>; + clocks = <&uart_clk>; + status = "disabled"; + }; + + uart2: serial@406980 { + compatible = "ns16550a"; + reg = <0x406980 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <66>; + clocks = <&uart_clk>; + status = "disabled"; + }; + enet0: ethernet@430000 { phy-mode = "internal"; phy-handle = <&phy1>; diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi index 2c2aa9368f76..277a90adc1a7 100644 --- a/arch/mips/boot/dts/brcm/bcm7358.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi @@ -18,6 +18,8 @@ aliases { uart0 = &uart0; + uart1 = &uart1; + uart2 = &uart2; }; cpu_intc: cpu_intc { @@ -112,6 +114,30 @@ status = "disabled"; }; + uart1: serial@406840 { + compatible = "ns16550a"; + reg = <0x406840 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <62>; + clocks = <&uart_clk>; + status = "disabled"; + }; + + uart2: serial@406880 { + compatible = "ns16550a"; + reg = <0x406880 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <63>; + clocks = <&uart_clk>; + status = "disabled"; + }; + enet0: ethernet@430000 { phy-mode = "internal"; phy-handle = <&phy1>; diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi index f23b0aed276f..9e1e571ba346 100644 --- a/arch/mips/boot/dts/brcm/bcm7360.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi @@ -18,6 +18,8 @@ aliases { uart0 = &uart0; + uart1 = &uart1; + uart2 = &uart2; }; cpu_intc: cpu_intc { @@ -112,6 +114,30 @@ status = "disabled"; }; + uart1: serial@406840 { + compatible = "ns16550a"; + reg = <0x406840 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <62>; + clocks = <&uart_clk>; + status = "disabled"; + }; + + uart2: serial@406880 { + compatible = "ns16550a"; + reg = <0x406880 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <63>; + clocks = <&uart_clk>; + status = "disabled"; + }; + enet0: ethernet@430000 { phy-mode = "internal"; phy-handle = <&phy1>; diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi index da99db665bbc..6e65db86fc61 100644 --- a/arch/mips/boot/dts/brcm/bcm7362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi @@ -24,6 +24,8 @@ aliases { uart0 = &uart0; + uart1 = &uart1; + uart2 = &uart2; }; cpu_intc: cpu_intc { @@ -118,6 +120,30 @@ status = "disabled"; }; + uart1: serial@406840 { + compatible = "ns16550a"; + reg = <0x406840 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <62>; + clocks = <&uart_clk>; + status = "disabled"; + }; + + uart2: serial@406880 { + compatible = "ns16550a"; + reg = <0x406880 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <63>; + clocks = <&uart_clk>; + status = "disabled"; + }; + enet0: ethernet@430000 { phy-mode = "internal"; phy-handle = <&phy1>; diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi new file mode 100644 index 000000000000..8b9432cc062b --- /dev/null +++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi @@ -0,0 +1,239 @@ +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "brcm,bcm7435"; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + mips-hpt-frequency = <163125000>; + + cpu@0 { + compatible = "brcm,bmips5200"; + device_type = "cpu"; + reg = <0>; + }; + + cpu@1 { + compatible = "brcm,bmips5200"; + device_type = "cpu"; + reg = <1>; + }; + + cpu@2 { + compatible = "brcm,bmips5200"; + device_type = "cpu"; + reg = <2>; + }; + + cpu@3 { + compatible = "brcm,bmips5200"; + device_type = "cpu"; + reg = <3>; + }; + }; + + aliases { + uart0 = &uart0; + }; + + cpu_intc: cpu_intc { + #address-cells = <0>; + compatible = "mti,cpu-interrupt-controller"; + + interrupt-controller; + #interrupt-cells = <1>; + }; + + clocks { + uart_clk: uart_clk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <81000000>; + }; + }; + + rdb { + #address-cells = <1>; + #size-cells = <1>; + + compatible = "simple-bus"; + ranges = <0 0x10000000 0x01000000>; + + periph_intc: periph_intc@41b500 { + compatible = "brcm,bcm7038-l1-intc"; + reg = <0x41b500 0x40>, <0x41b600 0x40>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpu_intc>; + interrupts = <2>, <3>; + }; + + sun_l2_intc: sun_l2_intc@403000 { + compatible = "brcm,l2-intc"; + reg = <0x403000 0x30>; + interrupt-controller; + #interrupt-cells = <1>; + interrupt-parent = <&periph_intc>; + interrupts = <52>; + }; + + gisb-arb@400000 { + compatible = "brcm,bcm7400-gisb-arb"; + reg = <0x400000 0xdc>; + native-endian; + interrupt-parent = <&sun_l2_intc>; + interrupts = <0>, <2>; + brcm,gisb-arb-master-mask = <0xf77f>; + brcm,gisb-arb-master-names = "ssp_0", "cpu_0", "webcpu_0", + "pcie_0", "bsp_0", + "rdc_0", "raaga_0", + "avd_1", "jtag_0", + "svd_0", "vice_0", + "vice_1", "raaga_1", + "scpu"; + }; + + upg_irq0_intc: upg_irq0_intc@406780 { + compatible = "brcm,bcm7120-l2-intc"; + reg = <0x406780 0x8>; + + brcm,int-map-mask = <0x44>; + brcm,int-fwd-mask = <0x70000>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&periph_intc>; + interrupts = <60>; + }; + + sun_top_ctrl: syscon@404000 { + compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; + reg = <0x404000 0x51c>; + little-endian; + }; + + reboot { + compatible = "brcm,brcmstb-reboot"; + syscon = <&sun_top_ctrl 0x304 0x308>; + }; + + uart0: serial@406b00 { + compatible = "ns16550a"; + reg = <0x406b00 0x20>; + reg-io-width = <0x4>; + reg-shift = <0x2>; + interrupt-parent = <&periph_intc>; + interrupts = <66>; + clocks = <&uart_clk>; + status = "disabled"; + }; + + enet0: ethernet@b80000 { + phy-mode = "internal"; + phy-handle = <&phy1>; + mac-address = [ 00 10 18 36 23 1a ]; + compatible = "brcm,genet-v3"; + #address-cells = <0x1>; + #size-cells = <0x1>; + reg = <0xb80000 0x11c88>; + interrupts = <17>, <18>; + interrupt-parent = <&periph_intc>; + status = "disabled"; + + mdio@e14 { + compatible = "brcm,genet-mdio-v3"; + #address-cells = <0x1>; + #size-cells = <0x0>; + reg = <0xe14 0x8>; + + phy1: ethernet-phy@1 { + max-speed = <100>; + reg = <0x1>; + compatible = "brcm,40nm-ephy", + "ethernet-phy-ieee802.3-c22"; + }; + }; + }; + + ehci0: usb@480300 { + compatible = "brcm,bcm7435-ehci", "generic-ehci"; + reg = <0x480300 0x100>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <70>; + status = "disabled"; + }; + + ohci0: usb@480400 { + compatible = "brcm,bcm7435-ohci", "generic-ohci"; + reg = <0x480400 0x100>; + native-endian; + no-big-frame-no; + interrupt-parent = <&periph_intc>; + interrupts = <72>; + status = "disabled"; + }; + + ehci1: usb@480500 { + compatible = "brcm,bcm7435-ehci", "generic-ehci"; + reg = <0x480500 0x100>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <71>; + status = "disabled"; + }; + + ohci1: usb@480600 { + compatible = "brcm,bcm7435-ohci", "generic-ohci"; + reg = <0x480600 0x100>; + native-endian; + no-big-frame-no; + interrupt-parent = <&periph_intc>; + interrupts = <73>; + status = "disabled"; + }; + + ehci2: usb@490300 { + compatible = "brcm,bcm7435-ehci", "generic-ehci"; + reg = <0x490300 0x100>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <75>; + status = "disabled"; + }; + + ohci2: usb@490400 { + compatible = "brcm,bcm7435-ohci", "generic-ohci"; + reg = <0x490400 0x100>; + native-endian; + no-big-frame-no; + interrupt-parent = <&periph_intc>; + interrupts = <77>; + status = "disabled"; + }; + + ehci3: usb@490500 { + compatible = "brcm,bcm7435-ehci", "generic-ehci"; + reg = <0x490500 0x100>; + native-endian; + interrupt-parent = <&periph_intc>; + interrupts = <76>; + status = "disabled"; + }; + + ohci3: usb@490600 { + compatible = "brcm,bcm7435-ohci", "generic-ohci"; + reg = <0x490600 0x100>; + native-endian; + no-big-frame-no; + interrupt-parent = <&periph_intc>; + interrupts = <78>; + status = "disabled"; + }; + }; +}; diff --git a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts index 70f196d89d26..3fe0445b9d37 100644 --- a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts @@ -21,6 +21,14 @@ status = "okay"; }; +&uart1 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + &enet0 { status = "okay"; }; diff --git a/arch/mips/boot/dts/brcm/bcm97358svmb.dts b/arch/mips/boot/dts/brcm/bcm97358svmb.dts index d18e6d947739..a8dc01e30313 100644 --- a/arch/mips/boot/dts/brcm/bcm97358svmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97358svmb.dts @@ -21,6 +21,14 @@ status = "okay"; }; +&uart1 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + &enet0 { status = "okay"; }; diff --git a/arch/mips/boot/dts/brcm/bcm97360svmb.dts b/arch/mips/boot/dts/brcm/bcm97360svmb.dts index 4fe515500102..eee8b0e32681 100644 --- a/arch/mips/boot/dts/brcm/bcm97360svmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97360svmb.dts @@ -21,6 +21,14 @@ status = "okay"; }; +&uart1 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + &enet0 { status = "okay"; }; diff --git a/arch/mips/boot/dts/brcm/bcm97362svmb.dts b/arch/mips/boot/dts/brcm/bcm97362svmb.dts index b7b88e5dc9e7..739c2ef5663b 100644 --- a/arch/mips/boot/dts/brcm/bcm97362svmb.dts +++ b/arch/mips/boot/dts/brcm/bcm97362svmb.dts @@ -21,6 +21,14 @@ status = "okay"; }; +&uart1 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + &enet0 { status = "okay"; }; diff --git a/arch/mips/boot/dts/brcm/bcm97435svmb.dts b/arch/mips/boot/dts/brcm/bcm97435svmb.dts new file mode 100644 index 000000000000..1df088183523 --- /dev/null +++ b/arch/mips/boot/dts/brcm/bcm97435svmb.dts @@ -0,0 +1,60 @@ +/dts-v1/; + +/include/ "bcm7435.dtsi" + +/ { + compatible = "brcm,bcm97435svmb", "brcm,bcm7435"; + model = "Broadcom BCM97435SVMB"; + + memory@0 { + device_type = "memory"; + reg = <0x00000000 0x10000000>, + <0x20000000 0x30000000>, + <0x90000000 0x40000000>; + }; + + chosen { + bootargs = "console=ttyS0,115200 maxcpus=1"; + stdout-path = &uart0; + }; +}; + +&uart0 { + status = "okay"; +}; + +&enet0 { + status = "okay"; +}; + +&ehci0 { + status = "okay"; +}; + +&ohci0 { + status = "okay"; +}; + +&ehci1 { + status = "okay"; +}; + +&ohci1 { + status = "okay"; +}; + +&ehci2 { + status = "okay"; +}; + +&ohci2 { + status = "okay"; +}; + +&ehci3 { + status = "okay"; +}; + +&ohci3 { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile new file mode 100644 index 000000000000..f2b864f07850 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/Makefile @@ -0,0 +1,10 @@ +dtb-$(CONFIG_JZ4740_QI_LB60) += qi_lb60.dtb +dtb-$(CONFIG_JZ4780_CI20) += ci20.dtb + +obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) + +# Force kbuild to make empty built-in.o if necessary +obj- += dummy.o + +always := $(dtb-y) +clean-files := *.dtb *.dtb.S diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts new file mode 100644 index 000000000000..9fcb9e7d1f57 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -0,0 +1,44 @@ +/dts-v1/; + +#include "jz4780.dtsi" + +/ { + compatible = "img,ci20", "ingenic,jz4780"; + + aliases { + serial0 = &uart0; + serial1 = &uart1; + serial3 = &uart3; + serial4 = &uart4; + }; + + chosen { + stdout-path = &uart4; + }; + + memory { + device_type = "memory"; + reg = <0x0 0x10000000 + 0x30000000 0x30000000>; + }; +}; + +&ext { + clock-frequency = <48000000>; +}; + +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&uart3 { + status = "okay"; +}; + +&uart4 { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi new file mode 100644 index 000000000000..8b2437cd019f --- /dev/null +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -0,0 +1,68 @@ +#include <dt-bindings/clock/jz4740-cgu.h> + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "ingenic,jz4740"; + + cpuintc: interrupt-controller@0 { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + intc: interrupt-controller@10001000 { + compatible = "ingenic,jz4740-intc"; + reg = <0x10001000 0x14>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + ext: ext { + compatible = "fixed-clock"; + #clock-cells = <0>; + }; + + rtc: rtc { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + + cgu: jz4740-cgu@10000000 { + compatible = "ingenic,jz4740-cgu"; + reg = <0x10000000 0x100>; + + clocks = <&ext>, <&rtc>; + clock-names = "ext", "rtc"; + + #clock-cells = <1>; + }; + + uart0: serial@10030000 { + compatible = "ingenic,jz4740-uart"; + reg = <0x10030000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <9>; + + clocks = <&ext>, <&cgu JZ4740_CLK_UART0>; + clock-names = "baud", "module"; + }; + + uart1: serial@10031000 { + compatible = "ingenic,jz4740-uart"; + reg = <0x10031000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <8>; + + clocks = <&ext>, <&cgu JZ4740_CLK_UART1>; + clock-names = "baud", "module"; + }; +}; diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi new file mode 100644 index 000000000000..65389f602733 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -0,0 +1,111 @@ +#include <dt-bindings/clock/jz4780-cgu.h> + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "ingenic,jz4780"; + + cpuintc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + intc: interrupt-controller@10001000 { + compatible = "ingenic,jz4780-intc"; + reg = <0x10001000 0x50>; + + interrupt-controller; + #interrupt-cells = <1>; + + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + ext: ext { + compatible = "fixed-clock"; + #clock-cells = <0>; + }; + + rtc: rtc { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + }; + + cgu: jz4780-cgu@10000000 { + compatible = "ingenic,jz4780-cgu"; + reg = <0x10000000 0x100>; + + clocks = <&ext>, <&rtc>; + clock-names = "ext", "rtc"; + + #clock-cells = <1>; + }; + + uart0: serial@10030000 { + compatible = "ingenic,jz4780-uart"; + reg = <0x10030000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <51>; + + clocks = <&ext>, <&cgu JZ4780_CLK_UART0>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + uart1: serial@10031000 { + compatible = "ingenic,jz4780-uart"; + reg = <0x10031000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <50>; + + clocks = <&ext>, <&cgu JZ4780_CLK_UART1>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + uart2: serial@10032000 { + compatible = "ingenic,jz4780-uart"; + reg = <0x10032000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <49>; + + clocks = <&ext>, <&cgu JZ4780_CLK_UART2>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + uart3: serial@10033000 { + compatible = "ingenic,jz4780-uart"; + reg = <0x10033000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <48>; + + clocks = <&ext>, <&cgu JZ4780_CLK_UART3>; + clock-names = "baud", "module"; + + status = "disabled"; + }; + + uart4: serial@10034000 { + compatible = "ingenic,jz4780-uart"; + reg = <0x10034000 0x100>; + + interrupt-parent = <&intc>; + interrupts = <34>; + + clocks = <&ext>, <&cgu JZ4780_CLK_UART4>; + clock-names = "baud", "module"; + + status = "disabled"; + }; +}; diff --git a/arch/mips/boot/dts/ingenic/qi_lb60.dts b/arch/mips/boot/dts/ingenic/qi_lb60.dts new file mode 100644 index 000000000000..2414d63ae818 --- /dev/null +++ b/arch/mips/boot/dts/ingenic/qi_lb60.dts @@ -0,0 +1,15 @@ +/dts-v1/; + +#include "jz4740.dtsi" + +/ { + compatible = "qi,lb60", "ingenic,jz4740"; + + chosen { + stdout-path = &uart0; + }; +}; + +&ext { + clock-frequency = <12000000>; +}; diff --git a/arch/mips/boot/dts/mti/Makefile b/arch/mips/boot/dts/mti/Makefile index ef1f3dbed033..144d776cc9f2 100644 --- a/arch/mips/boot/dts/mti/Makefile +++ b/arch/mips/boot/dts/mti/Makefile @@ -1,3 +1,4 @@ +dtb-$(CONFIG_MIPS_MALTA) += malta.dtb dtb-$(CONFIG_MIPS_SEAD3) += sead3.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/mips/boot/dts/mti/malta.dts b/arch/mips/boot/dts/mti/malta.dts new file mode 100644 index 000000000000..c678115f5b7f --- /dev/null +++ b/arch/mips/boot/dts/mti/malta.dts @@ -0,0 +1,7 @@ +/dts-v1/; + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "mti,malta"; +}; diff --git a/arch/mips/boot/dts/qca/Makefile b/arch/mips/boot/dts/qca/Makefile new file mode 100644 index 000000000000..2d61455d585d --- /dev/null +++ b/arch/mips/boot/dts/qca/Makefile @@ -0,0 +1,11 @@ +# All DTBs +dtb-$(CONFIG_ATH79) += ar9132_tl_wr1043nd_v1.dtb + +# Select a DTB to build in the kernel +obj-$(CONFIG_DTB_TL_WR1043ND_V1) += ar9132_tl_wr1043nd_v1.dtb.o + +# Force kbuild to make empty built-in.o if necessary +obj- += dummy.o + +always := $(dtb-y) +clean-files := *.dtb *.dtb.S diff --git a/arch/mips/boot/dts/qca/ar9132.dtsi b/arch/mips/boot/dts/qca/ar9132.dtsi new file mode 100644 index 000000000000..4759cff814d1 --- /dev/null +++ b/arch/mips/boot/dts/qca/ar9132.dtsi @@ -0,0 +1,133 @@ +/ { + compatible = "qca,ar9132"; + + #address-cells = <1>; + #size-cells = <1>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "mips,mips24Kc"; + reg = <0>; + }; + }; + + cpuintc: interrupt-controller { + compatible = "qca,ar9132-cpu-intc", "qca,ar7100-cpu-intc"; + + interrupt-controller; + #interrupt-cells = <1>; + + qca,ddr-wb-channel-interrupts = <2>, <3>, <4>, <5>; + qca,ddr-wb-channels = <&ddr_ctrl 3>, <&ddr_ctrl 2>, + <&ddr_ctrl 0>, <&ddr_ctrl 1>; + }; + + ahb { + compatible = "simple-bus"; + ranges; + + #address-cells = <1>; + #size-cells = <1>; + + interrupt-parent = <&cpuintc>; + + apb { + compatible = "simple-bus"; + ranges; + + #address-cells = <1>; + #size-cells = <1>; + + interrupt-parent = <&miscintc>; + + ddr_ctrl: memory-controller@18000000 { + compatible = "qca,ar9132-ddr-controller", + "qca,ar7240-ddr-controller"; + reg = <0x18000000 0x100>; + + #qca,ddr-wb-channel-cells = <1>; + }; + + uart@18020000 { + compatible = "ns8250"; + reg = <0x18020000 0x20>; + interrupts = <3>; + + clocks = <&pll 2>; + clock-names = "uart"; + + reg-io-width = <4>; + reg-shift = <2>; + no-loopback-test; + + status = "disabled"; + }; + + gpio: gpio@18040000 { + compatible = "qca,ar9132-gpio", + "qca,ar7100-gpio"; + reg = <0x18040000 0x30>; + interrupts = <2>; + + ngpios = <22>; + + gpio-controller; + #gpio-cells = <2>; + + interrupt-controller; + #interrupt-cells = <2>; + }; + + pll: pll-controller@18050000 { + compatible = "qca,ar9132-ppl", + "qca,ar9130-pll"; + reg = <0x18050000 0x20>; + + clock-names = "ref"; + /* The board must provides the ref clock */ + + #clock-cells = <1>; + clock-output-names = "cpu", "ddr", "ahb"; + }; + + wdt@18060008 { + compatible = "qca,ar7130-wdt"; + reg = <0x18060008 0x8>; + + interrupts = <4>; + + clocks = <&pll 2>; + clock-names = "wdt"; + }; + + miscintc: interrupt-controller@18060010 { + compatible = "qca,ar9132-misc-intc", + "qca,ar7100-misc-intc"; + reg = <0x18060010 0x4>; + + interrupt-parent = <&cpuintc>; + interrupts = <6>; + + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + spi@1f000000 { + compatible = "qca,ar9132-spi", "qca,ar7100-spi"; + reg = <0x1f000000 0x10>; + + clocks = <&pll 2>; + clock-names = "ahb"; + + status = "disabled"; + + #address-cells = <1>; + #size-cells = <0>; + }; + }; +}; diff --git a/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts b/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts new file mode 100644 index 000000000000..003015ab34e7 --- /dev/null +++ b/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts @@ -0,0 +1,112 @@ +/dts-v1/; + +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/input/input.h> + +#include "ar9132.dtsi" + +/ { + compatible = "tplink,tl-wr1043nd-v1", "qca,ar9132"; + model = "TP-Link TL-WR1043ND Version 1"; + + alias { + serial0 = "/ahb/apb/uart@18020000"; + }; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x2000000>; + }; + + extosc: oscillator { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <40000000>; + }; + + ahb { + apb { + uart@18020000 { + status = "okay"; + }; + + pll-controller@18050000 { + clocks = <&extosc>; + }; + }; + + spi@1f000000 { + status = "okay"; + num-cs = <1>; + + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "s25sl064a"; + reg = <0>; + spi-max-frequency = <25000000>; + + partition@0 { + label = "u-boot"; + reg = <0x000000 0x020000>; + }; + + partition@1 { + label = "firmware"; + reg = <0x020000 0x7D0000>; + }; + + partition@2 { + label = "art"; + reg = <0x7F0000 0x010000>; + read-only; + }; + }; + }; + }; + + gpio-keys { + compatible = "gpio-keys-polled"; + #address-cells = <1>; + #size-cells = <0>; + + poll-interval = <20>; + button@0 { + label = "reset"; + linux,code = <KEY_RESTART>; + gpios = <&gpio 3 GPIO_ACTIVE_LOW>; + debounce-interval = <60>; + }; + + button@1 { + label = "qss"; + linux,code = <KEY_WPS_BUTTON>; + gpios = <&gpio 7 GPIO_ACTIVE_LOW>; + debounce-interval = <60>; + }; + }; + + leds { + compatible = "gpio-leds"; + led@0 { + label = "tp-link:green:usb"; + gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + }; + + led@1 { + label = "tp-link:green:system"; + gpios = <&gpio 2 GPIO_ACTIVE_LOW>; + linux,default-trigger = "heartbeat"; + }; + + led@2 { + label = "tp-link:green:qss"; + gpios = <&gpio 5 GPIO_ACTIVE_HIGH>; + }; + + led@3 { + label = "tp-link:green:wlan"; + gpios = <&gpio 9 GPIO_ACTIVE_LOW>; + }; + }; +}; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 10f762557b92..d8124a3c5a85 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -698,7 +698,9 @@ static void octeon_irq_ciu_gpio_ack(struct irq_data *data) static void octeon_irq_handle_trigger(unsigned int irq, struct irq_desc *desc) { - if (irq_get_trigger_type(irq) & IRQ_TYPE_EDGE_BOTH) + struct irq_data *data = irq_desc_get_irq_data(desc); + + if (irqd_get_trigger_type(data) & IRQ_TYPE_EDGE_BOTH) handle_edge_irq(irq, desc); else handle_level_irq(irq, desc); diff --git a/arch/mips/cobalt/mtd.c b/arch/mips/cobalt/mtd.c index 8db7b5d81560..83e1b1093d5f 100644 --- a/arch/mips/cobalt/mtd.c +++ b/arch/mips/cobalt/mtd.c @@ -57,5 +57,4 @@ static int __init cobalt_mtd_init(void) return 0; } - -module_init(cobalt_mtd_init); +device_initcall(cobalt_mtd_init); diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig new file mode 100644 index 000000000000..4e36b6e1869c --- /dev/null +++ b/arch/mips/configs/ci20_defconfig @@ -0,0 +1,162 @@ +CONFIG_MACH_INGENIC=y +CONFIG_JZ4780_CI20=y +CONFIG_HIGHMEM=y +# CONFIG_COMPACTION is not set +CONFIG_CMA=y +CONFIG_HZ_100=y +CONFIG_PREEMPT=y +# CONFIG_SECCOMP is not set +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_KERNEL_XZ=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_KMEM=y +CONFIG_CGROUP_SCHED=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_SUSPEND is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +# CONFIG_IPV6 is not set +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +# CONFIG_FW_LOADER is not set +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=32 +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +CONFIG_DM9000=y +CONFIG_DM9000_FORCE_SIMPLE_PHY_POLL=y +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_WLAN is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_LEGACY_PTY_COUNT=2 +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=5 +CONFIG_SERIAL_8250_RUNTIME_UARTS=5 +CONFIG_SERIAL_8250_INGENIC=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_HW_RANDOM is not set +CONFIG_I2C=y +CONFIG_I2C_JZ4780=y +CONFIG_GPIO_SYSFS=y +# CONFIG_HWMON is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_DEBUG=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +# CONFIG_VGA_CONSOLE is not set +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_MEMORY=y +# CONFIG_DNOTIFY is not set +CONFIG_PROC_KCORE=y +# CONFIG_PROC_PAGE_MONITOR is not set +CONFIG_TMPFS=y +CONFIG_CONFIGFS_FS=y +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=y +CONFIG_NLS_CODEPAGE_775=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_CODEPAGE_852=y +CONFIG_NLS_CODEPAGE_855=y +CONFIG_NLS_CODEPAGE_857=y +CONFIG_NLS_CODEPAGE_860=y +CONFIG_NLS_CODEPAGE_861=y +CONFIG_NLS_CODEPAGE_862=y +CONFIG_NLS_CODEPAGE_863=y +CONFIG_NLS_CODEPAGE_864=y +CONFIG_NLS_CODEPAGE_865=y +CONFIG_NLS_CODEPAGE_866=y +CONFIG_NLS_CODEPAGE_869=y +CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_CODEPAGE_950=y +CONFIG_NLS_CODEPAGE_932=y +CONFIG_NLS_CODEPAGE_949=y +CONFIG_NLS_CODEPAGE_874=y +CONFIG_NLS_ISO8859_8=y +CONFIG_NLS_CODEPAGE_1250=y +CONFIG_NLS_CODEPAGE_1251=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=y +CONFIG_NLS_ISO8859_3=y +CONFIG_NLS_ISO8859_4=y +CONFIG_NLS_ISO8859_5=y +CONFIG_NLS_ISO8859_6=y +CONFIG_NLS_ISO8859_7=y +CONFIG_NLS_ISO8859_9=y +CONFIG_NLS_ISO8859_13=y +CONFIG_NLS_ISO8859_14=y +CONFIG_NLS_ISO8859_15=y +CONFIG_NLS_KOI8_R=y +CONFIG_NLS_KOI8_U=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +CONFIG_STRIP_ASM_SYMS=y +CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PANIC_TIMEOUT=10 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_DEBUG_PREEMPT is not set +CONFIG_STACKTRACE=y +# CONFIG_FTRACE is not set +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="earlycon console=ttyS4,115200 clk_ignore_unused" diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index b2a577ebce0b..a75c65da08b4 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -1,4 +1,4 @@ -CONFIG_MACH_LOONGSON=y +CONFIG_MACH_LOONGSON64=y CONFIG_64BIT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 0cbc9863c7c8..54cc3853d259 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -1,4 +1,4 @@ -CONFIG_MACH_LOONGSON=y +CONFIG_MACH_LOONGSON64=y CONFIG_LEMOTE_MACH2F=y CONFIG_CS5536_MFGPT=y CONFIG_64BIT=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index c8442997477b..f8bf915c6d6b 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -1,4 +1,4 @@ -CONFIG_MACH_LOONGSON=y +CONFIG_MACH_LOONGSON64=y CONFIG_SWIOTLB=y CONFIG_LOONGSON_MACH3X=y CONFIG_CPU_LOONGSON3=y diff --git a/arch/mips/configs/ls1b_defconfig b/arch/mips/configs/ls1b_defconfig index 7eb75543ca1a..1b2cc1fb26a1 100644 --- a/arch/mips/configs/ls1b_defconfig +++ b/arch/mips/configs/ls1b_defconfig @@ -1,4 +1,4 @@ -CONFIG_MACH_LOONGSON1=y +CONFIG_MACH_LOONGSON32=y CONFIG_PREEMPT=y # CONFIG_SECCOMP is not set CONFIG_EXPERIMENTAL=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index f8a32315bb38..ac0eb4daf101 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -84,15 +84,12 @@ CONFIG_NET_CLS_IND=y CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_IDE=y -# CONFIG_IDE_PROC_FS is not set -# CONFIG_IDEPCI_PCIBUS_ORDER is not set -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +# CONFIG_SATA_PMP is not set +CONFIG_ATA_PIIX=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set @@ -138,7 +135,6 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_HW_RANDOM=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MATROX=y @@ -152,7 +148,6 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_BACKLIGHT=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y @@ -160,7 +155,11 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig index f22e92ee7709..1646cce032c3 100644 --- a/arch/mips/configs/pistachio_defconfig +++ b/arch/mips/configs/pistachio_defconfig @@ -272,6 +272,7 @@ CONFIG_IIO=y CONFIG_CC10001_ADC=y CONFIG_PWM=y CONFIG_PWM_IMG=y +CONFIG_PHY_PISTACHIO_USB=y CONFIG_ANDROID=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig index 2b965470c35b..d7bb8cce1068 100644 --- a/arch/mips/configs/qi_lb60_defconfig +++ b/arch/mips/configs/qi_lb60_defconfig @@ -1,4 +1,4 @@ -CONFIG_MACH_JZ4740=y +CONFIG_MACH_INGENIC=y # CONFIG_COMPACTION is not set # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HZ_100=y @@ -66,6 +66,7 @@ CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_DMA is not set CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 +CONFIG_SERIAL_8250_INGENIC=y # CONFIG_HW_RANDOM is not set CONFIG_SPI=y CONFIG_SPI_GPIO=y diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 6156ac8c4cfb..76317a70200d 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -211,9 +211,13 @@ .endm #ifdef TOOLCHAIN_SUPPORTS_MSA +/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */ +#undef fp + .macro _cfcmsa rd, cs .set push .set mips32r2 + .set fp=64 .set msa cfcmsa \rd, $\cs .set pop @@ -222,6 +226,7 @@ .macro _ctcmsa cd, rs .set push .set mips32r2 + .set fp=64 .set msa ctcmsa $\cd, \rs .set pop @@ -230,6 +235,7 @@ .macro ld_d wd, off, base .set push .set mips32r2 + .set fp=64 .set msa ld.d $w\wd, \off(\base) .set pop @@ -238,6 +244,7 @@ .macro st_d wd, off, base .set push .set mips32r2 + .set fp=64 .set msa st.d $w\wd, \off(\base) .set pop @@ -246,6 +253,7 @@ .macro copy_u_w ws, n .set push .set mips32r2 + .set fp=64 .set msa copy_u.w $1, $w\ws[\n] .set pop @@ -254,6 +262,7 @@ .macro copy_u_d ws, n .set push .set mips64r2 + .set fp=64 .set msa copy_u.d $1, $w\ws[\n] .set pop @@ -262,6 +271,7 @@ .macro insert_w wd, n .set push .set mips32r2 + .set fp=64 .set msa insert.w $w\wd[\n], $1 .set pop @@ -270,6 +280,7 @@ .macro insert_d wd, n .set push .set mips64r2 + .set fp=64 .set msa insert.d $w\wd[\n], $1 .set pop diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 0cf29bd5dc5c..ce9666cf1499 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -469,7 +469,7 @@ static inline int test_and_change_bit(unsigned long nr, */ static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long *addr) { - smp_mb(); + smp_mb__before_llsc(); __clear_bit(nr, addr); } diff --git a/arch/mips/include/asm/bmips-spaces.h b/arch/mips/include/asm/bmips-spaces.h new file mode 100644 index 000000000000..eb96541ae67e --- /dev/null +++ b/arch/mips/include/asm/bmips-spaces.h @@ -0,0 +1,7 @@ +#ifndef __ASM_BMIPS_SPACES_H +#define __ASM_BMIPS_SPACES_H + +/* Avoid collisions with system base register (SBR) region on BMIPS3300 */ +#define FIXADDR_TOP ((unsigned long)(long)(int)0xff000000) + +#endif /* __ASM_BMIPS_SPACES_H */ diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 5aeaf19c26b0..f25de771f7ed 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -108,6 +108,9 @@ #ifndef cpu_has_llsc #define cpu_has_llsc (cpu_data[0].options & MIPS_CPU_LLSC) #endif +#ifndef cpu_has_bp_ghist +#define cpu_has_bp_ghist (cpu_data[0].options & MIPS_CPU_BP_GHIST) +#endif #ifndef kernel_uses_llsc #define kernel_uses_llsc cpu_has_llsc #endif diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 33f3cab9e689..d41e8e284825 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -32,12 +32,12 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_4KC: case CPU_ALCHEMY: case CPU_PR4450: - case CPU_JZRISC: #endif #if defined(CONFIG_SYS_HAS_CPU_MIPS32_R1) || \ defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) case CPU_4KEC: + case CPU_JZRISC: #endif #ifdef CONFIG_SYS_HAS_CPU_MIPS32_R2 diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index e3adca1d0b99..e46e40602af3 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -42,7 +42,9 @@ #define PRID_COMP_LEXRA 0x0b0000 #define PRID_COMP_NETLOGIC 0x0c0000 #define PRID_COMP_CAVIUM 0x0d0000 -#define PRID_COMP_INGENIC 0xd00000 +#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4740, JZ4750 */ +#define PRID_COMP_INGENIC_D1 0xd10000 /* JZ4770, JZ4775 */ +#define PRID_COMP_INGENIC_E1 0xe10000 /* JZ4780 */ /* * Assigned Processor ID (implementation) values for bits 15:8 of the PRId @@ -168,7 +170,7 @@ #define PRID_IMP_CAVIUM_CN70XX 0x9600 /* - * These are the PRID's for when 23:16 == PRID_COMP_INGENIC + * These are the PRID's for when 23:16 == PRID_COMP_INGENIC_* */ #define PRID_IMP_JZRISC 0x0200 @@ -379,6 +381,7 @@ enum cpu_type_enum { #define MIPS_CPU_RW_LLB 0x1000000000ull /* LLADDR/LLB writes are allowed */ #define MIPS_CPU_XPA 0x2000000000ull /* CPU supports Extended Physical Addressing */ #define MIPS_CPU_CDMM 0x4000000000ull /* CPU has Common Device Memory Map */ +#define MIPS_CPU_BP_GHIST 0x8000000000ull /* R12K+ Branch Prediction Global History */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h index 4087b47ad1cb..7b99efd31074 100644 --- a/arch/mips/include/asm/hazards.h +++ b/arch/mips/include/asm/hazards.h @@ -31,9 +31,15 @@ #define __mtc0_tlbw_hazard \ ___ehb +#define __mtc0_tlbr_hazard \ + ___ehb + #define __tlbw_use_hazard \ ___ehb +#define __tlb_read_hazard \ + ___ehb + #define __tlb_probe_hazard \ ___ehb @@ -80,12 +86,23 @@ do { \ ___ssnop; \ ___ehb +#define __mtc0_tlbr_hazard \ + ___ssnop; \ + ___ssnop; \ + ___ehb + #define __tlbw_use_hazard \ ___ssnop; \ ___ssnop; \ ___ssnop; \ ___ehb +#define __tlb_read_hazard \ + ___ssnop; \ + ___ssnop; \ + ___ssnop; \ + ___ehb + #define __tlb_probe_hazard \ ___ssnop; \ ___ssnop; \ @@ -147,8 +164,12 @@ do { \ #define __mtc0_tlbw_hazard +#define __mtc0_tlbr_hazard + #define __tlbw_use_hazard +#define __tlb_read_hazard + #define __tlb_probe_hazard #define __irq_enable_hazard @@ -166,8 +187,12 @@ do { \ */ #define __mtc0_tlbw_hazard +#define __mtc0_tlbr_hazard + #define __tlbw_use_hazard +#define __tlb_read_hazard + #define __tlb_probe_hazard #define __irq_enable_hazard @@ -196,11 +221,20 @@ do { \ nop; \ nop +#define __mtc0_tlbr_hazard \ + nop; \ + nop + #define __tlbw_use_hazard \ nop; \ nop; \ nop +#define __tlb_read_hazard \ + nop; \ + nop; \ + nop + #define __tlb_probe_hazard \ nop; \ nop; \ @@ -267,7 +301,9 @@ do { \ #define _ssnop ___ssnop #define _ehb ___ehb #define mtc0_tlbw_hazard __mtc0_tlbw_hazard +#define mtc0_tlbr_hazard __mtc0_tlbr_hazard #define tlbw_use_hazard __tlbw_use_hazard +#define tlb_read_hazard __tlb_read_hazard #define tlb_probe_hazard __tlb_probe_hazard #define irq_enable_hazard __irq_enable_hazard #define irq_disable_hazard __irq_disable_hazard @@ -300,6 +336,14 @@ do { \ } while (0) +#define mtc0_tlbr_hazard() \ +do { \ + __asm__ __volatile__( \ + __stringify(__mtc0_tlbr_hazard) \ + ); \ +} while (0) + + #define tlbw_use_hazard() \ do { \ __asm__ __volatile__( \ @@ -308,6 +352,14 @@ do { \ } while (0) +#define tlb_read_hazard() \ +do { \ + __asm__ __volatile__( \ + __stringify(__tlb_read_hazard) \ + ); \ +} while (0) + + #define tlb_probe_hazard() \ do { \ __asm__ __volatile__( \ diff --git a/arch/mips/include/asm/i8259.h b/arch/mips/include/asm/i8259.h index c7e278447c0a..a7fbcd6ed13c 100644 --- a/arch/mips/include/asm/i8259.h +++ b/arch/mips/include/asm/i8259.h @@ -41,6 +41,7 @@ extern int i8259A_irq_pending(unsigned int irq); extern void make_8259A_irq(unsigned int irq); extern void init_i8259_irqs(void); +extern int i8259_of_init(struct device_node *node, struct device_node *parent); /* * Do the traditional i8259 interrupt polling thing. This is for the few diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h index d60cc68fa31e..e7b138b4b3d3 100644 --- a/arch/mips/include/asm/irqflags.h +++ b/arch/mips/include/asm/irqflags.h @@ -60,7 +60,7 @@ static inline void arch_local_irq_restore(unsigned long flags) " .set push \n" " .set noreorder \n" " .set noat \n" -#if defined(CONFIG_IRQ_CPU) +#if defined(CONFIG_IRQ_MIPS_CPU) /* * Slow, but doesn't suffer from a relatively unlikely race * condition we're having since days 1. @@ -90,7 +90,7 @@ static inline void __arch_local_irq_restore(unsigned long flags) " .set push \n" " .set noreorder \n" " .set noat \n" -#if defined(CONFIG_IRQ_CPU) +#if defined(CONFIG_IRQ_MIPS_CPU) /* * Slow, but doesn't suffer from a relatively unlikely race * condition we're having since days 1. diff --git a/arch/mips/include/asm/kgdb.h b/arch/mips/include/asm/kgdb.h index e6c0b0e14ccb..69dc0df94a96 100644 --- a/arch/mips/include/asm/kgdb.h +++ b/arch/mips/include/asm/kgdb.h @@ -33,7 +33,6 @@ #define CACHE_FLUSH_IS_SAFE 0 extern void arch_kgdb_breakpoint(void); -extern int kgdb_early_setup; extern void *saved_vectors[32]; extern void handle_exception(struct pt_regs *regs); extern void breakinst(void); diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h index cd41e93bc1d8..aa3800c82332 100644 --- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h +++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h @@ -157,8 +157,8 @@ #define AR71XX_PLL_REG_ETH0_INT_CLOCK 0x10 #define AR71XX_PLL_REG_ETH1_INT_CLOCK 0x14 -#define AR71XX_PLL_DIV_SHIFT 3 -#define AR71XX_PLL_DIV_MASK 0x1f +#define AR71XX_PLL_FB_SHIFT 3 +#define AR71XX_PLL_FB_MASK 0x1f #define AR71XX_CPU_DIV_SHIFT 16 #define AR71XX_CPU_DIV_MASK 0x3 #define AR71XX_DDR_DIV_SHIFT 18 @@ -169,8 +169,8 @@ #define AR724X_PLL_REG_CPU_CONFIG 0x00 #define AR724X_PLL_REG_PCIE_CONFIG 0x18 -#define AR724X_PLL_DIV_SHIFT 0 -#define AR724X_PLL_DIV_MASK 0x3ff +#define AR724X_PLL_FB_SHIFT 0 +#define AR724X_PLL_FB_MASK 0x3ff #define AR724X_PLL_REF_DIV_SHIFT 10 #define AR724X_PLL_REF_DIV_MASK 0xf #define AR724X_AHB_DIV_SHIFT 19 @@ -183,8 +183,8 @@ #define AR913X_PLL_REG_ETH0_INT_CLOCK 0x14 #define AR913X_PLL_REG_ETH1_INT_CLOCK 0x18 -#define AR913X_PLL_DIV_SHIFT 0 -#define AR913X_PLL_DIV_MASK 0x3ff +#define AR913X_PLL_FB_SHIFT 0 +#define AR913X_PLL_FB_MASK 0x3ff #define AR913X_DDR_DIV_SHIFT 22 #define AR913X_DDR_DIV_MASK 0x3 #define AR913X_AHB_DIV_SHIFT 19 diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h index 1557934aaca9..4eee221b0cf0 100644 --- a/arch/mips/include/asm/mach-ath79/ath79.h +++ b/arch/mips/include/asm/mach-ath79/ath79.h @@ -115,7 +115,8 @@ static inline int soc_is_qca955x(void) return soc_is_qca9556() || soc_is_qca9558(); } -extern void __iomem *ath79_ddr_base; +void ath79_ddr_set_pci_windows(void); + extern void __iomem *ath79_pll_base; extern void __iomem *ath79_reset_base; diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h index 8ed77f618940..1461c10c1c4c 100644 --- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h +++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h @@ -52,10 +52,6 @@ void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix, void bcm47xx_fill_ssb_boardinfo(struct ssb_boardinfo *boardinfo, const char *prefix); #endif -#ifdef CONFIG_BCM47XX_BCMA -void bcm47xx_fill_bcma_boardinfo(struct bcma_boardinfo *boardinfo, - const char *prefix); -#endif void bcm47xx_set_system_type(u16 chip_id); diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h index c41d1dce1062..2afb84072ad0 100644 --- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h +++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h @@ -80,6 +80,8 @@ enum bcm47xx_board { BCM47XX_BOARD_LINKSYS_WRT610NV2, BCM47XX_BOARD_LINKSYS_WRTSL54GS, + BCM47XX_BOARD_LUXUL_XWR_1750_V1, + BCM47XX_BOARD_MICROSOFT_MN700, BCM47XX_BOARD_MOTOROLA_WE800G, diff --git a/arch/mips/include/asm/mach-bcm63xx/spaces.h b/arch/mips/include/asm/mach-bcm63xx/spaces.h index 61e750fb4653..1410ed0da4df 100644 --- a/arch/mips/include/asm/mach-bcm63xx/spaces.h +++ b/arch/mips/include/asm/mach-bcm63xx/spaces.h @@ -10,7 +10,7 @@ #ifndef _ASM_BCM63XX_SPACES_H #define _ASM_BCM63XX_SPACES_H -#define FIXADDR_TOP ((unsigned long)(long)(int)0xff000000) +#include <asm/bmips-spaces.h> #include <asm/mach-generic/spaces.h> diff --git a/arch/mips/include/asm/mach-bmips/spaces.h b/arch/mips/include/asm/mach-bmips/spaces.h index 1b05bddc8ec5..c59b28fd9e1d 100644 --- a/arch/mips/include/asm/mach-bmips/spaces.h +++ b/arch/mips/include/asm/mach-bmips/spaces.h @@ -11,7 +11,7 @@ #define _ASM_BMIPS_SPACES_H /* Avoid collisions with system base register (SBR) region on BMIPS3300 */ -#define FIXADDR_TOP ((unsigned long)(long)(int)0xff000000) +#include <asm/bmips-spaces.h> #include <asm/mach-generic/spaces.h> diff --git a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h index bdf045fb00c8..21eae03d752a 100644 --- a/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-dec/cpu-feature-overrides.h @@ -14,6 +14,13 @@ /* Generic ones first. */ #define cpu_has_tlb 1 +#define cpu_has_tlbinv 0 +#define cpu_has_segments 0 +#define cpu_has_eva 0 +#define cpu_has_htw 0 +#define cpu_has_rixiex 0 +#define cpu_has_maar 0 +#define cpu_has_rw_llb 0 #define cpu_has_tx39_cache 0 #define cpu_has_divec 0 #define cpu_has_prefetch 0 @@ -24,6 +31,7 @@ #define cpu_has_mips3d 0 #define cpu_has_smartmips 0 #define cpu_has_rixi 0 +#define cpu_has_xpa 0 #define cpu_has_vtag_icache 0 #define cpu_has_ic_fills_f_dc 0 #define cpu_has_pindexed_dcache 0 @@ -36,11 +44,18 @@ #define cpu_has_mips64r1 0 #define cpu_has_mips64r2 0 #define cpu_has_dsp 0 +#define cpu_has_dsp2 0 #define cpu_has_mipsmt 0 #define cpu_has_userlocal 0 +#define cpu_hwrena_impl_bits 0 +#define cpu_has_perf_cntr_intr_bit 0 +#define cpu_has_vz 0 +#define cpu_has_fre 0 +#define cpu_has_cdmm 0 /* R3k-specific ones. */ #ifdef CONFIG_CPU_R3000 +#define cpu_has_3kex 1 #define cpu_has_4kex 0 #define cpu_has_3k_cache 1 #define cpu_has_4k_cache 0 @@ -63,6 +78,7 @@ /* R4k-specific ones. */ #ifdef CONFIG_CPU_R4X00 +#define cpu_has_3kex 0 #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 diff --git a/arch/mips/include/asm/mach-generic/irq.h b/arch/mips/include/asm/mach-generic/irq.h index 050e18bb1a04..be546a0f65fa 100644 --- a/arch/mips/include/asm/mach-generic/irq.h +++ b/arch/mips/include/asm/mach-generic/irq.h @@ -18,7 +18,7 @@ #endif #endif -#ifdef CONFIG_IRQ_CPU +#ifdef CONFIG_IRQ_MIPS_CPU #ifndef MIPS_CPU_IRQ_BASE #ifdef CONFIG_I8259 @@ -34,7 +34,7 @@ #endif #endif -#endif /* CONFIG_IRQ_CPU */ +#endif /* CONFIG_IRQ_MIPS_CPU */ #ifdef CONFIG_MIPS_GIC #ifndef MIPS_GIC_IRQ_BASE diff --git a/arch/mips/include/asm/mach-generic/spaces.h b/arch/mips/include/asm/mach-generic/spaces.h index 9488fa5f8866..afc96ecb9004 100644 --- a/arch/mips/include/asm/mach-generic/spaces.h +++ b/arch/mips/include/asm/mach-generic/spaces.h @@ -94,7 +94,11 @@ #endif #ifndef FIXADDR_TOP +#ifdef CONFIG_KVM_GUEST +#define FIXADDR_TOP ((unsigned long)(long)(int)0x7ffe0000) +#else #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif +#endif #endif /* __ASM_MACH_GENERIC_SPACES_H */ diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index d6111aa2e886..7449794eade6 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -11,47 +11,69 @@ #include <asm/cpu.h> /* - * IP27 only comes with R10000 family processors all using the same config + * IP27 only comes with R1x000 family processors, all using the same config */ -#define cpu_has_watch 1 -#define cpu_has_mips16 0 -#define cpu_has_divec 0 -#define cpu_has_vce 0 -#define cpu_has_cache_cdex_p 0 -#define cpu_has_cache_cdex_s 0 -#define cpu_has_prefetch 1 -#define cpu_has_mcheck 0 -#define cpu_has_ejtag 0 +#define cpu_has_tlb 1 +#define cpu_has_tlbinv 0 +#define cpu_has_segments 0 +#define cpu_has_eva 0 +#define cpu_has_htw 0 +#define cpu_has_rixiex 0 +#define cpu_has_maar 0 +#define cpu_has_rw_llb 0 +#define cpu_has_3kex 0 +#define cpu_has_4kex 1 +#define cpu_has_3k_cache 0 +#define cpu_has_4k_cache 1 +#define cpu_has_6k_cache 0 +#define cpu_has_8k_cache 0 +#define cpu_has_tx39_cache 0 +#define cpu_has_fpu 1 +#define cpu_has_nofpuex 0 +#define cpu_has_32fpr 1 +#define cpu_has_counter 1 +#define cpu_has_watch 1 +#define cpu_has_64bits 1 +#define cpu_has_divec 0 +#define cpu_has_vce 0 +#define cpu_has_cache_cdex_p 0 +#define cpu_has_cache_cdex_s 0 +#define cpu_has_prefetch 1 +#define cpu_has_mcheck 0 +#define cpu_has_ejtag 0 +#define cpu_has_llsc 1 +#define cpu_has_mips16 0 +#define cpu_has_mdmx 0 +#define cpu_has_mips3d 0 +#define cpu_has_smartmips 0 +#define cpu_has_rixi 0 +#define cpu_has_xpa 0 +#define cpu_has_vtag_icache 0 +#define cpu_has_dc_aliases 0 +#define cpu_has_ic_fills_f_dc 0 -#define cpu_has_llsc 1 -#define cpu_has_vtag_icache 0 -#define cpu_has_dc_aliases 0 -#define cpu_has_ic_fills_f_dc 0 -#define cpu_has_dsp 0 -#define cpu_has_dsp2 0 #define cpu_icache_snoops_remote_store 1 -#define cpu_has_mipsmt 0 -#define cpu_has_userlocal 0 -#define cpu_has_nofpuex 0 -#define cpu_has_64bits 1 - -#define cpu_has_4kex 1 -#define cpu_has_3k_cache 0 -#define cpu_has_6k_cache 0 -#define cpu_has_4k_cache 1 -#define cpu_has_8k_cache 0 -#define cpu_has_tx39_cache 0 +#define cpu_has_mips32r1 0 +#define cpu_has_mips32r2 0 +#define cpu_has_mips64r1 0 +#define cpu_has_mips64r2 0 +#define cpu_has_mips32r6 0 +#define cpu_has_mips64r6 0 +#define cpu_has_dsp 0 +#define cpu_has_dsp2 0 +#define cpu_has_mipsmt 0 +#define cpu_has_userlocal 0 #define cpu_has_inclusive_pcaches 1 +#define cpu_hwrena_impl_bits 0 +#define cpu_has_perf_cntr_intr_bit 0 +#define cpu_has_vz 0 +#define cpu_has_fre 0 +#define cpu_has_cdmm 0 -#define cpu_dcache_line_size() 32 -#define cpu_icache_line_size() 64 -#define cpu_scache_line_size() 128 - -#define cpu_has_mips32r1 0 -#define cpu_has_mips32r2 0 -#define cpu_has_mips64r1 0 -#define cpu_has_mips64r2 0 +#define cpu_dcache_line_size() 32 +#define cpu_icache_line_size() 64 +#define cpu_scache_line_size() 128 #endif /* __ASM_MACH_IP27_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-jz4740/clock.h b/arch/mips/include/asm/mach-jz4740/clock.h index 16659cd76d4e..104d2dfe1e36 100644 --- a/arch/mips/include/asm/mach-jz4740/clock.h +++ b/arch/mips/include/asm/mach-jz4740/clock.h @@ -22,6 +22,9 @@ enum jz4740_wait_mode { void jz4740_clock_set_wait_mode(enum jz4740_wait_mode mode); +void jz4740_clock_suspend(void); +void jz4740_clock_resume(void); + void jz4740_clock_udc_enable_auto_suspend(void); void jz4740_clock_udc_disable_auto_suspend(void); diff --git a/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h b/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h index a225baaa215d..0933f94a1e69 100644 --- a/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-jz4740/cpu-feature-overrides.h @@ -12,8 +12,6 @@ #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 #define cpu_has_tx39_cache 0 -#define cpu_has_fpu 0 -#define cpu_has_32fpr 0 #define cpu_has_counter 0 #define cpu_has_watch 1 #define cpu_has_divec 1 @@ -34,7 +32,6 @@ #define cpu_has_ic_fills_f_dc 0 #define cpu_has_pindexed_dcache 0 #define cpu_has_mips32r1 1 -#define cpu_has_mips32r2 0 #define cpu_has_mips64r1 0 #define cpu_has_mips64r2 0 #define cpu_has_dsp 0 diff --git a/arch/mips/include/asm/mach-jz4740/irq.h b/arch/mips/include/asm/mach-jz4740/irq.h index df50736749c1..9b439fc218bd 100644 --- a/arch/mips/include/asm/mach-jz4740/irq.h +++ b/arch/mips/include/asm/mach-jz4740/irq.h @@ -19,6 +19,12 @@ #define MIPS_CPU_IRQ_BASE 0 #define JZ4740_IRQ_BASE 8 +#ifdef CONFIG_MACH_JZ4740 +# define NR_INTC_IRQS 32 +#else +# define NR_INTC_IRQS 64 +#endif + /* 1st-level interrupts */ #define JZ4740_IRQ(x) (JZ4740_IRQ_BASE + (x)) #define JZ4740_IRQ_I2C JZ4740_IRQ(1) @@ -44,13 +50,15 @@ #define JZ4740_IRQ_IPU JZ4740_IRQ(29) #define JZ4740_IRQ_LCD JZ4740_IRQ(30) +#define JZ4780_IRQ_TCU2 JZ4740_IRQ(25) + /* 2nd-level interrupts */ -#define JZ4740_IRQ_DMA(x) (JZ4740_IRQ(32) + (x)) +#define JZ4740_IRQ_DMA(x) (JZ4740_IRQ(NR_INTC_IRQS) + (x)) #define JZ4740_IRQ_INTC_GPIO(x) (JZ4740_IRQ_GPIO0 - (x)) -#define JZ4740_IRQ_GPIO(x) (JZ4740_IRQ(48) + (x)) +#define JZ4740_IRQ_GPIO(x) (JZ4740_IRQ(NR_INTC_IRQS + 16) + (x)) -#define JZ4740_IRQ_ADC_BASE JZ4740_IRQ(176) +#define JZ4740_IRQ_ADC_BASE JZ4740_IRQ(NR_INTC_IRQS + 144) #define NR_IRQS (JZ4740_IRQ_ADC_BASE + 6) diff --git a/arch/mips/include/asm/mach-jz4740/platform.h b/arch/mips/include/asm/mach-jz4740/platform.h index 069b43a9da6f..32cfbe6a191b 100644 --- a/arch/mips/include/asm/mach-jz4740/platform.h +++ b/arch/mips/include/asm/mach-jz4740/platform.h @@ -35,6 +35,4 @@ extern struct platform_device jz4740_wdt_device; extern struct platform_device jz4740_pwm_device; extern struct platform_device jz4740_dma_device; -void jz4740_serial_device_register(void); - #endif diff --git a/arch/mips/include/asm/mach-loongson/workarounds.h b/arch/mips/include/asm/mach-loongson/workarounds.h deleted file mode 100644 index e180c1422eae..000000000000 --- a/arch/mips/include/asm/mach-loongson/workarounds.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_MACH_LOONGSON_WORKAROUNDS_H_ -#define __ASM_MACH_LOONGSON_WORKAROUNDS_H_ - -#define WORKAROUND_CPUFREQ 0x00000001 -#define WORKAROUND_CPUHOTPLUG 0x00000002 - -#endif diff --git a/arch/mips/include/asm/mach-loongson1/cpufreq.h b/arch/mips/include/asm/mach-loongson32/cpufreq.h index e7765ce30bcf..6843fa1a608d 100644 --- a/arch/mips/include/asm/mach-loongson1/cpufreq.h +++ b/arch/mips/include/asm/mach-loongson32/cpufreq.h @@ -10,8 +10,8 @@ */ -#ifndef __ASM_MACH_LOONGSON1_CPUFREQ_H -#define __ASM_MACH_LOONGSON1_CPUFREQ_H +#ifndef __ASM_MACH_LOONGSON32_CPUFREQ_H +#define __ASM_MACH_LOONGSON32_CPUFREQ_H struct plat_ls1x_cpufreq { const char *clk_name; /* CPU clk */ @@ -20,4 +20,4 @@ struct plat_ls1x_cpufreq { unsigned int min_freq; /* in kHz */ }; -#endif /* __ASM_MACH_LOONGSON1_CPUFREQ_H */ +#endif /* __ASM_MACH_LOONGSON32_CPUFREQ_H */ diff --git a/arch/mips/include/asm/mach-loongson1/irq.h b/arch/mips/include/asm/mach-loongson32/irq.h index 96bfb1c1c73d..0d35b994e8d2 100644 --- a/arch/mips/include/asm/mach-loongson1/irq.h +++ b/arch/mips/include/asm/mach-loongson32/irq.h @@ -10,8 +10,8 @@ */ -#ifndef __ASM_MACH_LOONGSON1_IRQ_H -#define __ASM_MACH_LOONGSON1_IRQ_H +#ifndef __ASM_MACH_LOONGSON32_IRQ_H +#define __ASM_MACH_LOONGSON32_IRQ_H /* * CPU core Interrupt Numbers @@ -70,4 +70,4 @@ #define NR_IRQS (MIPS_CPU_IRQS + LS1X_IRQS) -#endif /* __ASM_MACH_LOONGSON1_IRQ_H */ +#endif /* __ASM_MACH_LOONGSON32_IRQ_H */ diff --git a/arch/mips/include/asm/mach-loongson1/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h index 20e0c2b155dd..12aa129aad80 100644 --- a/arch/mips/include/asm/mach-loongson1/loongson1.h +++ b/arch/mips/include/asm/mach-loongson32/loongson1.h @@ -10,8 +10,8 @@ */ -#ifndef __ASM_MACH_LOONGSON1_LOONGSON1_H -#define __ASM_MACH_LOONGSON1_LOONGSON1_H +#ifndef __ASM_MACH_LOONGSON32_LOONGSON1_H +#define __ASM_MACH_LOONGSON32_LOONGSON1_H #define DEFAULT_MEMSIZE 256 /* If no memsize provided */ @@ -47,4 +47,4 @@ #include <regs-pwm.h> #include <regs-wdt.h> -#endif /* __ASM_MACH_LOONGSON1_LOONGSON1_H */ +#endif /* __ASM_MACH_LOONGSON32_LOONGSON1_H */ diff --git a/arch/mips/include/asm/mach-loongson1/platform.h b/arch/mips/include/asm/mach-loongson32/platform.h index 47de55e0c835..c32f03f3f72c 100644 --- a/arch/mips/include/asm/mach-loongson1/platform.h +++ b/arch/mips/include/asm/mach-loongson32/platform.h @@ -8,8 +8,8 @@ */ -#ifndef __ASM_MACH_LOONGSON1_PLATFORM_H -#define __ASM_MACH_LOONGSON1_PLATFORM_H +#ifndef __ASM_MACH_LOONGSON32_PLATFORM_H +#define __ASM_MACH_LOONGSON32_PLATFORM_H #include <linux/platform_device.h> @@ -23,4 +23,4 @@ extern struct platform_device ls1x_rtc_pdev; extern void __init ls1x_clk_init(void); extern void __init ls1x_serial_setup(struct platform_device *pdev); -#endif /* __ASM_MACH_LOONGSON1_PLATFORM_H */ +#endif /* __ASM_MACH_LOONGSON32_PLATFORM_H */ diff --git a/arch/mips/include/asm/mach-loongson1/prom.h b/arch/mips/include/asm/mach-loongson32/prom.h index 34859a4d4ac4..a08503c0ba20 100644 --- a/arch/mips/include/asm/mach-loongson1/prom.h +++ b/arch/mips/include/asm/mach-loongson32/prom.h @@ -7,8 +7,8 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON1_PROM_H -#define __ASM_MACH_LOONGSON1_PROM_H +#ifndef __ASM_MACH_LOONGSON32_PROM_H +#define __ASM_MACH_LOONGSON32_PROM_H #include <linux/io.h> #include <linux/init.h> @@ -21,4 +21,4 @@ extern unsigned long memsize, highmemsize; extern char *prom_getenv(char *name); extern void __init prom_init_cmdline(void); -#endif /* __ASM_MACH_LOONGSON1_PROM_H */ +#endif /* __ASM_MACH_LOONGSON32_PROM_H */ diff --git a/arch/mips/include/asm/mach-loongson1/regs-clk.h b/arch/mips/include/asm/mach-loongson32/regs-clk.h index ee2445b10fc3..1f5a715ac841 100644 --- a/arch/mips/include/asm/mach-loongson1/regs-clk.h +++ b/arch/mips/include/asm/mach-loongson32/regs-clk.h @@ -9,8 +9,8 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON1_REGS_CLK_H -#define __ASM_MACH_LOONGSON1_REGS_CLK_H +#ifndef __ASM_MACH_LOONGSON32_REGS_CLK_H +#define __ASM_MACH_LOONGSON32_REGS_CLK_H #define LS1X_CLK_REG(x) \ ((void __iomem *)KSEG1ADDR(LS1X_CLK_BASE + (x))) @@ -48,4 +48,4 @@ #define BYPASS_DDR_WIDTH 1 #define BYPASS_CPU_WIDTH 1 -#endif /* __ASM_MACH_LOONGSON1_REGS_CLK_H */ +#endif /* __ASM_MACH_LOONGSON32_REGS_CLK_H */ diff --git a/arch/mips/include/asm/mach-loongson1/regs-mux.h b/arch/mips/include/asm/mach-loongson32/regs-mux.h index fb1e36efaa19..8302d92f2da2 100644 --- a/arch/mips/include/asm/mach-loongson1/regs-mux.h +++ b/arch/mips/include/asm/mach-loongson32/regs-mux.h @@ -9,8 +9,8 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON1_REGS_MUX_H -#define __ASM_MACH_LOONGSON1_REGS_MUX_H +#ifndef __ASM_MACH_LOONGSON32_REGS_MUX_H +#define __ASM_MACH_LOONGSON32_REGS_MUX_H #define LS1X_MUX_REG(x) \ ((void __iomem *)KSEG1ADDR(LS1X_MUX_BASE + (x))) @@ -64,4 +64,4 @@ #define GMAC1_USE_PWM23 (0x1 << 1) #define GMAC0_USE_PWM01 0x1 -#endif /* __ASM_MACH_LOONGSON1_REGS_MUX_H */ +#endif /* __ASM_MACH_LOONGSON32_REGS_MUX_H */ diff --git a/arch/mips/include/asm/mach-loongson1/regs-pwm.h b/arch/mips/include/asm/mach-loongson32/regs-pwm.h index 99f2bcc586f0..69f174ed13a4 100644 --- a/arch/mips/include/asm/mach-loongson1/regs-pwm.h +++ b/arch/mips/include/asm/mach-loongson32/regs-pwm.h @@ -9,8 +9,8 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON1_REGS_PWM_H -#define __ASM_MACH_LOONGSON1_REGS_PWM_H +#ifndef __ASM_MACH_LOONGSON32_REGS_PWM_H +#define __ASM_MACH_LOONGSON32_REGS_PWM_H /* Loongson 1 PWM Timer Register Definitions */ #define PWM_CNT 0x0 @@ -26,4 +26,4 @@ #define PWM_OE (0x1 << 3) #define CNT_EN 0x1 -#endif /* __ASM_MACH_LOONGSON1_REGS_PWM_H */ +#endif /* __ASM_MACH_LOONGSON32_REGS_PWM_H */ diff --git a/arch/mips/include/asm/mach-loongson1/regs-wdt.h b/arch/mips/include/asm/mach-loongson32/regs-wdt.h index c39ee982ad3b..6644ab6d3391 100644 --- a/arch/mips/include/asm/mach-loongson1/regs-wdt.h +++ b/arch/mips/include/asm/mach-loongson32/regs-wdt.h @@ -9,11 +9,11 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON1_REGS_WDT_H -#define __ASM_MACH_LOONGSON1_REGS_WDT_H +#ifndef __ASM_MACH_LOONGSON32_REGS_WDT_H +#define __ASM_MACH_LOONGSON32_REGS_WDT_H #define WDT_EN 0x0 #define WDT_TIMER 0x4 #define WDT_SET 0x8 -#endif /* __ASM_MACH_LOONGSON1_REGS_WDT_H */ +#endif /* __ASM_MACH_LOONGSON32_REGS_WDT_H */ diff --git a/arch/mips/include/asm/mach-loongson/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index fa802926523f..d3f3258b7cd4 100644 --- a/arch/mips/include/asm/mach-loongson/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_LOONGSON_BOOT_PARAM_H_ -#define __ASM_MACH_LOONGSON_BOOT_PARAM_H_ +#ifndef __ASM_MACH_LOONGSON64_BOOT_PARAM_H_ +#define __ASM_MACH_LOONGSON64_BOOT_PARAM_H_ #define SYSTEM_RAM_LOW 1 #define SYSTEM_RAM_HIGH 2 diff --git a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index acc376897e46..98963c2c7be4 100644 --- a/arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -13,8 +13,8 @@ * loongson2f user manual. */ -#ifndef __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H -#define __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H +#ifndef __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H +#define __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 @@ -58,4 +58,4 @@ #define cpu_has_wsbh IS_ENABLED(CONFIG_CPU_LOONGSON3) -#endif /* __ASM_MACH_LOONGSON_CPU_FEATURE_OVERRIDES_H */ +#endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-loongson/cs5536/cs5536.h b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h index a0ee0cb775ad..a0ee0cb775ad 100644 --- a/arch/mips/include/asm/mach-loongson/cs5536/cs5536.h +++ b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536.h diff --git a/arch/mips/include/asm/mach-loongson/cs5536/cs5536_mfgpt.h b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h index 021d0172dad6..021d0172dad6 100644 --- a/arch/mips/include/asm/mach-loongson/cs5536/cs5536_mfgpt.h +++ b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_mfgpt.h diff --git a/arch/mips/include/asm/mach-loongson/cs5536/cs5536_pci.h b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h index 8a7ecb4d5c64..8a7ecb4d5c64 100644 --- a/arch/mips/include/asm/mach-loongson/cs5536/cs5536_pci.h +++ b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_pci.h diff --git a/arch/mips/include/asm/mach-loongson/cs5536/cs5536_vsm.h b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h index 1f17c1815ee5..1f17c1815ee5 100644 --- a/arch/mips/include/asm/mach-loongson/cs5536/cs5536_vsm.h +++ b/arch/mips/include/asm/mach-loongson64/cs5536/cs5536_vsm.h diff --git a/arch/mips/include/asm/mach-loongson/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h index 4bf4e19f72e8..1602a9e9e8c2 100644 --- a/arch/mips/include/asm/mach-loongson/dma-coherence.h +++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h @@ -8,8 +8,8 @@ * Author: Fuxin Zhang, [email protected] * */ -#ifndef __ASM_MACH_LOONGSON_DMA_COHERENCE_H -#define __ASM_MACH_LOONGSON_DMA_COHERENCE_H +#ifndef __ASM_MACH_LOONGSON64_DMA_COHERENCE_H +#define __ASM_MACH_LOONGSON64_DMA_COHERENCE_H #ifdef CONFIG_SWIOTLB #include <linux/swiotlb.h> @@ -82,4 +82,4 @@ static inline void plat_post_dma_flush(struct device *dev) { } -#endif /* __ASM_MACH_LOONGSON_DMA_COHERENCE_H */ +#endif /* __ASM_MACH_LOONGSON64_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/mach-loongson/gpio.h b/arch/mips/include/asm/mach-loongson64/gpio.h index b3b216904a9a..b3b216904a9a 100644 --- a/arch/mips/include/asm/mach-loongson/gpio.h +++ b/arch/mips/include/asm/mach-loongson64/gpio.h diff --git a/arch/mips/include/asm/mach-loongson/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h index a281cca5f2fb..d18c45c7c394 100644 --- a/arch/mips/include/asm/mach-loongson/irq.h +++ b/arch/mips/include/asm/mach-loongson64/irq.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_LOONGSON_IRQ_H_ -#define __ASM_MACH_LOONGSON_IRQ_H_ +#ifndef __ASM_MACH_LOONGSON64_IRQ_H_ +#define __ASM_MACH_LOONGSON64_IRQ_H_ #include <boot_param.h> @@ -40,4 +40,4 @@ extern void fixup_irqs(void); extern void loongson3_ipi_interrupt(struct pt_regs *regs); #include_next <irq.h> -#endif /* __ASM_MACH_LOONGSON_IRQ_H_ */ +#endif /* __ASM_MACH_LOONGSON64_IRQ_H_ */ diff --git a/arch/mips/include/asm/mach-loongson/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h index df5fca8eeb80..3f2f84f6c401 100644 --- a/arch/mips/include/asm/mach-loongson/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h @@ -8,8 +8,8 @@ * Copyright (C) 2009 Jiajie Chen ([email protected]) * Copyright (C) 2012 Huacai Chen ([email protected]) */ -#ifndef __ASM_MACH_LOONGSON_KERNEL_ENTRY_H -#define __ASM_MACH_LOONGSON_KERNEL_ENTRY_H +#ifndef __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H +#define __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H /* * Override macros used in arch/mips/kernel/head.S. @@ -49,4 +49,4 @@ #endif .endm -#endif /* __ASM_MACH_LOONGSON_KERNEL_ENTRY_H */ +#endif /* __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H */ diff --git a/arch/mips/include/asm/mach-loongson/loongson.h b/arch/mips/include/asm/mach-loongson64/loongson.h index 9783103fd6f6..d1ff774ac4b6 100644 --- a/arch/mips/include/asm/mach-loongson/loongson.h +++ b/arch/mips/include/asm/mach-loongson64/loongson.h @@ -8,8 +8,8 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON_LOONGSON_H -#define __ASM_MACH_LOONGSON_LOONGSON_H +#ifndef __ASM_MACH_LOONGSON64_LOONGSON_H +#define __ASM_MACH_LOONGSON64_LOONGSON_H #include <linux/io.h> #include <linux/init.h> @@ -357,4 +357,4 @@ extern unsigned long _loongson_addrwincfg_base; #endif /* ! CONFIG_CPU_SUPPORTS_ADDRWINCFG */ -#endif /* __ASM_MACH_LOONGSON_LOONGSON_H */ +#endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */ diff --git a/arch/mips/include/asm/mach-loongson/loongson_hwmon.h b/arch/mips/include/asm/mach-loongson64/loongson_hwmon.h index 4431fc54a36c..4431fc54a36c 100644 --- a/arch/mips/include/asm/mach-loongson/loongson_hwmon.h +++ b/arch/mips/include/asm/mach-loongson64/loongson_hwmon.h diff --git a/arch/mips/include/asm/mach-loongson/machine.h b/arch/mips/include/asm/mach-loongson64/machine.h index cb2b60249cd2..c52549bb4e56 100644 --- a/arch/mips/include/asm/mach-loongson/machine.h +++ b/arch/mips/include/asm/mach-loongson64/machine.h @@ -8,8 +8,8 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON_MACHINE_H -#define __ASM_MACH_LOONGSON_MACHINE_H +#ifndef __ASM_MACH_LOONGSON64_MACHINE_H +#define __ASM_MACH_LOONGSON64_MACHINE_H #ifdef CONFIG_LEMOTE_FULOONG2E @@ -30,4 +30,4 @@ #endif /* CONFIG_LOONGSON_MACH3X */ -#endif /* __ASM_MACH_LOONGSON_MACHINE_H */ +#endif /* __ASM_MACH_LOONGSON64_MACHINE_H */ diff --git a/arch/mips/include/asm/mach-loongson/mc146818rtc.h b/arch/mips/include/asm/mach-loongson64/mc146818rtc.h index ed7fe978335a..ebdccfee50be 100644 --- a/arch/mips/include/asm/mach-loongson/mc146818rtc.h +++ b/arch/mips/include/asm/mach-loongson64/mc146818rtc.h @@ -7,8 +7,8 @@ * * RTC routines for PC style attached Dallas chip. */ -#ifndef __ASM_MACH_LOONGSON_MC146818RTC_H -#define __ASM_MACH_LOONGSON_MC146818RTC_H +#ifndef __ASM_MACH_LOONGSON64_MC146818RTC_H +#define __ASM_MACH_LOONGSON64_MC146818RTC_H #include <linux/io.h> @@ -33,4 +33,4 @@ static inline void CMOS_WRITE(unsigned char data, unsigned long addr) #define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970) #endif -#endif /* __ASM_MACH_LOONGSON_MC146818RTC_H */ +#endif /* __ASM_MACH_LOONGSON64_MC146818RTC_H */ diff --git a/arch/mips/include/asm/mach-loongson/mem.h b/arch/mips/include/asm/mach-loongson64/mem.h index f4a36d7dbfab..75c16bead536 100644 --- a/arch/mips/include/asm/mach-loongson/mem.h +++ b/arch/mips/include/asm/mach-loongson64/mem.h @@ -8,8 +8,8 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON_MEM_H -#define __ASM_MACH_LOONGSON_MEM_H +#ifndef __ASM_MACH_LOONGSON64_MEM_H +#define __ASM_MACH_LOONGSON64_MEM_H /* * high memory space @@ -38,4 +38,4 @@ #define LOONGSON_MMIO_MEM_END 0x80000000 #endif -#endif /* __ASM_MACH_LOONGSON_MEM_H */ +#endif /* __ASM_MACH_LOONGSON64_MEM_H */ diff --git a/arch/mips/include/asm/mach-loongson/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h index 37c08a27b4f0..37c08a27b4f0 100644 --- a/arch/mips/include/asm/mach-loongson/mmzone.h +++ b/arch/mips/include/asm/mach-loongson64/mmzone.h diff --git a/arch/mips/include/asm/mach-loongson/pci.h b/arch/mips/include/asm/mach-loongson64/pci.h index 1212774f66ef..3401f557434a 100644 --- a/arch/mips/include/asm/mach-loongson/pci.h +++ b/arch/mips/include/asm/mach-loongson64/pci.h @@ -9,8 +9,8 @@ * option) any later version. */ -#ifndef __ASM_MACH_LOONGSON_PCI_H_ -#define __ASM_MACH_LOONGSON_PCI_H_ +#ifndef __ASM_MACH_LOONGSON64_PCI_H_ +#define __ASM_MACH_LOONGSON64_PCI_H_ extern struct pci_ops loongson_pci_ops; @@ -52,4 +52,4 @@ extern struct pci_ops loongson_pci_ops; #endif /* !CONFIG_CPU_SUPPORTS_ADDRWINCFG */ -#endif /* !__ASM_MACH_LOONGSON_PCI_H_ */ +#endif /* !__ASM_MACH_LOONGSON64_PCI_H_ */ diff --git a/arch/mips/include/asm/mach-loongson/spaces.h b/arch/mips/include/asm/mach-loongson64/spaces.h index e2506ee90044..c6040b9fcf94 100644 --- a/arch/mips/include/asm/mach-loongson/spaces.h +++ b/arch/mips/include/asm/mach-loongson64/spaces.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_LOONGSON_SPACES_H_ -#define __ASM_MACH_LOONGSON_SPACES_H_ +#ifndef __ASM_MACH_LOONGSON64_SPACES_H_ +#define __ASM_MACH_LOONGSON64_SPACES_H_ #if defined(CONFIG_64BIT) #define CAC_BASE _AC(0x9800000000000000, UL) diff --git a/arch/mips/include/asm/mach-loongson/topology.h b/arch/mips/include/asm/mach-loongson64/topology.h index 0d8f3b55bdbc..0d8f3b55bdbc 100644 --- a/arch/mips/include/asm/mach-loongson/topology.h +++ b/arch/mips/include/asm/mach-loongson64/topology.h diff --git a/arch/mips/include/asm/mach-loongson64/workarounds.h b/arch/mips/include/asm/mach-loongson64/workarounds.h new file mode 100644 index 000000000000..e659f041e116 --- /dev/null +++ b/arch/mips/include/asm/mach-loongson64/workarounds.h @@ -0,0 +1,7 @@ +#ifndef __ASM_MACH_LOONGSON64_WORKAROUNDS_H_ +#define __ASM_MACH_LOONGSON64_WORKAROUNDS_H_ + +#define WORKAROUND_CPUFREQ 0x00000001 +#define WORKAROUND_CPUHOTPLUG 0x00000002 + +#endif diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index 59c0901bdd84..edc7ee95269e 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -11,6 +11,7 @@ #ifndef __MIPS_ASM_MIPS_CM_H__ #define __MIPS_ASM_MIPS_CM_H__ +#include <linux/errno.h> #include <linux/io.h> #include <linux/types.h> @@ -216,6 +217,10 @@ BUILD_CM_Cx_R_(tcid_8_priority, 0x80) #define CM_GCR_CPC_BASE_CPCEN_SHF 0 #define CM_GCR_CPC_BASE_CPCEN_MSK (_ULCAST_(0x1) << 0) +/* GCR_GIC_STATUS register fields */ +#define CM_GCR_GIC_STATUS_GICEX_SHF 0 +#define CM_GCR_GIC_STATUS_GICEX_MSK (_ULCAST_(0x1) << 0) + /* GCR_REGn_BASE register fields */ #define CM_GCR_REGn_BASE_BASEADDR_SHF 16 #define CM_GCR_REGn_BASE_BASEADDR_MSK (_ULCAST_(0xffff) << 16) diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 764e2756b54d..c5b0956a8530 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -589,6 +589,28 @@ /* EntryHI bit definition */ #define MIPS_ENTRYHI_EHINV (_ULCAST_(1) << 10) +/* R3000 EntryLo bit definitions */ +#define R3K_ENTRYLO_G (_ULCAST_(1) << 8) +#define R3K_ENTRYLO_V (_ULCAST_(1) << 9) +#define R3K_ENTRYLO_D (_ULCAST_(1) << 10) +#define R3K_ENTRYLO_N (_ULCAST_(1) << 11) + +/* R4000 compatible EntryLo bit definitions */ +#define MIPS_ENTRYLO_G (_ULCAST_(1) << 0) +#define MIPS_ENTRYLO_V (_ULCAST_(1) << 1) +#define MIPS_ENTRYLO_D (_ULCAST_(1) << 2) +#define MIPS_ENTRYLO_C_SHIFT 3 +#define MIPS_ENTRYLO_C (_ULCAST_(7) << MIPS_ENTRYLO_C_SHIFT) +#ifdef CONFIG_64BIT +/* as read by dmfc0 */ +#define MIPS_ENTRYLO_XI (_ULCAST_(1) << 62) +#define MIPS_ENTRYLO_RI (_ULCAST_(1) << 63) +#else +/* as read by mfc0 */ +#define MIPS_ENTRYLO_XI (_ULCAST_(1) << 30) +#define MIPS_ENTRYLO_RI (_ULCAST_(1) << 31) +#endif + /* CMGCRBase bit definitions */ #define MIPS_CMGCRB_BASE 11 #define MIPS_CMGCRF_BASE (~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1)) @@ -685,6 +707,15 @@ #define TX39_CONF_DRSIZE_SHIFT 0 #define TX39_CONF_DRSIZE_MASK 0x00000003 +/* + * Interesting Bits in the R10K CP0 Branch Diagnostic Register + */ +/* Disable Branch Target Address Cache */ +#define R10K_DIAG_D_BTAC (_ULCAST_(1) << 27) +/* Enable Branch Prediction Global History */ +#define R10K_DIAG_E_GHIST (_ULCAST_(1) << 26) +/* Disable Branch Return Cache */ +#define R10K_DIAG_D_BRC (_ULCAST_(1) << 22) /* * Coprocessor 1 (FPU) register names @@ -1247,6 +1278,10 @@ do { \ #define read_c0_diag() __read_32bit_c0_register($22, 0) #define write_c0_diag(val) __write_32bit_c0_register($22, 0, val) +/* R10K CP0 Branch Diagnostic register is 64bits wide */ +#define read_c0_r10k_diag() __read_64bit_c0_register($22, 0) +#define write_c0_r10k_diag(val) __write_64bit_c0_register($22, 0, val) + #define read_c0_diag1() __read_32bit_c0_register($22, 1) #define write_c0_diag1(val) __write_32bit_c0_register($22, 1, val) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 7d56686c0e62..832e2167d00f 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -18,7 +18,7 @@ #include <asm-generic/pgtable-nopmd.h> -extern int temp_tlb_entry __cpuinitdata; +extern int temp_tlb_entry; /* * - add_temporary_entry() add a temporary TLB entry. We use TLB entries diff --git a/arch/mips/include/asm/prom.h b/arch/mips/include/asm/prom.h index 8ebc2aa5f3e1..0b4b668925f6 100644 --- a/arch/mips/include/asm/prom.h +++ b/arch/mips/include/asm/prom.h @@ -11,7 +11,7 @@ #ifndef __ASM_PROM_H #define __ASM_PROM_H -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF #include <linux/bug.h> #include <linux/io.h> #include <linux/types.h> diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 1fca2e0793dc..9de4ba43dcd1 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -109,7 +109,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) " subu %[ticket], %[my_ticket], %[ticket] \n" "2: \n" " .subsection 2 \n" - "4: andi %[ticket], %[ticket], 0x1fff \n" + "4: andi %[ticket], %[ticket], 0xffff \n" " sll %[ticket], 5 \n" " \n" "6: bnez %[ticket], 6b \n" @@ -317,7 +317,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { - smp_mb(); + smp_mb__before_llsc(); __asm__ __volatile__( " # arch_write_unlock \n" diff --git a/arch/mips/include/asm/txx9irq.h b/arch/mips/include/asm/txx9irq.h index 5620879be37f..68a6650a4025 100644 --- a/arch/mips/include/asm/txx9irq.h +++ b/arch/mips/include/asm/txx9irq.h @@ -11,7 +11,7 @@ #include <irq.h> -#ifdef CONFIG_IRQ_CPU +#ifdef CONFIG_IRQ_MIPS_CPU #define TXX9_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8) #else #ifdef CONFIG_I8259 diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 9722357d2854..5305d694ffe5 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -78,6 +78,21 @@ extern u64 __ua_limit; #define segment_eq(a, b) ((a).seg == (b).seg) +/* + * eva_kernel_access() - determine whether kernel memory access on an EVA system + * + * Determines whether memory accesses should be performed to kernel memory + * on a system using Extended Virtual Addressing (EVA). + * + * Return: true if a kernel memory access on an EVA system, else false. + */ +static inline bool eva_kernel_access(void) +{ + if (!config_enabled(CONFIG_EVA)) + return false; + + return segment_eq(get_fs(), get_ds()); +} /* * Is a address valid? This does a straighforward calculation rather @@ -286,7 +301,7 @@ do { \ ({ \ int __gu_err; \ \ - if (segment_eq(get_fs(), get_ds())) { \ + if (eva_kernel_access()) { \ __get_kernel_common((x), size, ptr); \ } else { \ __chk_user_ptr(ptr); \ @@ -302,7 +317,7 @@ do { \ \ might_fault(); \ if (likely(access_ok(VERIFY_READ, __gu_ptr, size))) { \ - if (segment_eq(get_fs(), get_ds())) \ + if (eva_kernel_access()) \ __get_kernel_common((x), size, __gu_ptr); \ else \ __get_user_common((x), size, __gu_ptr); \ @@ -427,7 +442,7 @@ do { \ int __pu_err = 0; \ \ __pu_val = (x); \ - if (segment_eq(get_fs(), get_ds())) { \ + if (eva_kernel_access()) { \ __put_kernel_common(ptr, size); \ } else { \ __chk_user_ptr(ptr); \ @@ -444,7 +459,7 @@ do { \ \ might_fault(); \ if (likely(access_ok(VERIFY_WRITE, __pu_addr, size))) { \ - if (segment_eq(get_fs(), get_ds())) \ + if (eva_kernel_access()) \ __put_kernel_common(__pu_addr, size); \ else \ __put_user_common(__pu_addr, size); \ @@ -843,7 +858,7 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n); __cu_from = (from); \ __cu_len = (n); \ might_fault(); \ - if (segment_eq(get_fs(), get_ds())) \ + if (eva_kernel_access()) \ __cu_len = __invoke_copy_to_kernel(__cu_to, __cu_from, \ __cu_len); \ else \ @@ -863,7 +878,7 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n); __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ - if (segment_eq(get_fs(), get_ds())) \ + if (eva_kernel_access()) \ __cu_len = __invoke_copy_to_kernel(__cu_to, __cu_from, \ __cu_len); \ else \ @@ -881,7 +896,7 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n); __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ - if (segment_eq(get_fs(), get_ds())) \ + if (eva_kernel_access()) \ __cu_len = __invoke_copy_from_kernel_inatomic(__cu_to, \ __cu_from,\ __cu_len);\ @@ -915,7 +930,7 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n); __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ - if (segment_eq(get_fs(), get_ds())) { \ + if (eva_kernel_access()) { \ __cu_len = __invoke_copy_to_kernel(__cu_to, \ __cu_from, \ __cu_len); \ @@ -1139,7 +1154,7 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ - if (segment_eq(get_fs(), get_ds())) { \ + if (eva_kernel_access()) { \ __cu_len = __invoke_copy_from_kernel(__cu_to, \ __cu_from, \ __cu_len); \ @@ -1163,7 +1178,7 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ - if (segment_eq(get_fs(), get_ds())) { \ + if (eva_kernel_access()) { \ __cu_len = ___invoke_copy_in_kernel(__cu_to, __cu_from, \ __cu_len); \ } else { \ @@ -1183,7 +1198,7 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); __cu_to = (to); \ __cu_from = (from); \ __cu_len = (n); \ - if (segment_eq(get_fs(), get_ds())) { \ + if (eva_kernel_access()) { \ __cu_len = ___invoke_copy_in_kernel(__cu_to,__cu_from, \ __cu_len); \ } else { \ @@ -1263,7 +1278,7 @@ __strncpy_from_user(char *__to, const char __user *__from, long __len) { long res; - if (segment_eq(get_fs(), get_ds())) { + if (eva_kernel_access()) { __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, %2\n\t" @@ -1312,7 +1327,7 @@ strncpy_from_user(char *__to, const char __user *__from, long __len) { long res; - if (segment_eq(get_fs(), get_ds())) { + if (eva_kernel_access()) { __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, %2\n\t" @@ -1357,7 +1372,7 @@ static inline long strlen_user(const char __user *s) { long res; - if (segment_eq(get_fs(), get_ds())) { + if (eva_kernel_access()) { __asm__ __volatile__( "move\t$4, %1\n\t" __MODULE_JAL(__strlen_kernel_asm) @@ -1384,7 +1399,7 @@ static inline long __strnlen_user(const char __user *s, long n) { long res; - if (segment_eq(get_fs(), get_ds())) { + if (eva_kernel_access()) { __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, %2\n\t" @@ -1426,7 +1441,7 @@ static inline long strnlen_user(const char __user *s, long n) long res; might_fault(); - if (segment_eq(get_fs(), get_ds())) { + if (eva_kernel_access()) { __asm__ __volatile__( "move\t$4, %1\n\t" "move\t$5, %2\n\t" diff --git a/arch/mips/include/asm/xtalk/xwidget.h b/arch/mips/include/asm/xtalk/xwidget.h index 32e4e884f9b9..24f121da6a1d 100644 --- a/arch/mips/include/asm/xtalk/xwidget.h +++ b/arch/mips/include/asm/xtalk/xwidget.h @@ -84,6 +84,118 @@ #define WIDGET_LLP_MAXBURST 0x000003ff #define WIDGET_LLP_MAXBURST_SHFT 0 +/* Xtalk Widget Device Mfgr Nums */ +#define WIDGET_XBOW_MFGR_NUM 0x0 /* IP30 XBow Chip */ +#define WIDGET_XXBOW_MFGR_NUM 0x0 /* IP35 Xbow + XBridge Chip */ +#define WIDGET_ODYS_MFGR_NUM 0x023 /* Odyssey / VPro GFX */ +#define WIDGET_TPU_MFGR_NUM 0x024 /* Tensor Processor Unit */ +#define WIDGET_XBRDG_MFGR_NUM 0x024 /* IP35 XBridge Chip */ +#define WIDGET_HEART_MFGR_NUM 0x036 /* IP30 HEART Chip */ +#define WIDGET_BRIDG_MFGR_NUM 0x036 /* PCI Bridge */ +#define WIDGET_HUB_MFGR_NUM 0x036 /* IP27 Hub Chip */ +#define WIDGET_BDRCK_MFGR_NUM 0x036 /* IP35 Bedrock Chip */ +#define WIDGET_IMPCT_MFGR_NUM 0x2aa /* HQ4 / Impact GFX */ +#define WIDGET_KONA_MFGR_NUM 0x2aa /* InfiniteReality3 / Kona GFX */ +#define WIDGET_NULL_MFGR_NUM -1 /* NULL */ + +/* Xtalk Widget Device Part Nums */ +#define WIDGET_XBOW_PART_NUM 0x0000 +#define WIDGET_HEART_PART_NUM 0xc001 +#define WIDGET_BRIDG_PART_NUM 0xc002 +#define WIDGET_IMPCT_PART_NUM 0xc003 +#define WIDGET_ODYS_PART_NUM 0xc013 +#define WIDGET_HUB_PART_NUM 0xc101 +#define WIDGET_KONA_PART_NUM 0xc102 +#define WIDGET_BDRCK_PART_NUM 0xc110 +#define WIDGET_TPU_PART_NUM 0xc202 +#define WIDGET_XXBOW_PART_NUM 0xd000 +#define WIDGET_XBRDG_PART_NUM 0xd002 +#define WIDGET_NULL_PART_NUM -1 + +/* For Xtalk Widget identification */ +struct widget_ident { + u32 mfgr; + u32 part; + char *name; + char *revs[16]; +}; + +/* Known Xtalk Widgets */ +static const struct widget_ident __initconst widget_idents[] = { + { + WIDGET_XBOW_MFGR_NUM, + WIDGET_XBOW_PART_NUM, + "xbow", + {NULL, "1.0", "1.1", "1.2", "1.3", "2.0", NULL}, + }, + { + WIDGET_HEART_MFGR_NUM, + WIDGET_HEART_PART_NUM, + "heart", + {NULL, "A", "B", "C", "D", "E", "F", NULL}, + }, + { + WIDGET_BRIDG_MFGR_NUM, + WIDGET_BRIDG_PART_NUM, + "bridge", + {NULL, "A", "B", "C", "D", NULL}, + }, + { + WIDGET_IMPCT_MFGR_NUM, + WIDGET_IMPCT_PART_NUM, + "impact", + {NULL, "A", "B", NULL}, + }, + { + WIDGET_ODYS_MFGR_NUM, + WIDGET_ODYS_PART_NUM, + "odyssey", + {NULL, "A", "B", NULL}, + }, + { + WIDGET_HUB_MFGR_NUM, + WIDGET_HUB_PART_NUM, + "hub", + {NULL, "1.0", "2.0", "2.1", "2.2", "2.3", "2.4", NULL}, + }, + { + WIDGET_KONA_MFGR_NUM, + WIDGET_KONA_PART_NUM, + "kona", + {NULL}, + }, + { + WIDGET_BDRCK_MFGR_NUM, + WIDGET_BDRCK_PART_NUM, + "bedrock", + {NULL, "1.0", "1.1", NULL}, + }, + { + WIDGET_TPU_MFGR_NUM, + WIDGET_TPU_PART_NUM, + "tpu", + {"0", NULL}, + }, + { + WIDGET_XXBOW_MFGR_NUM, + WIDGET_XXBOW_PART_NUM, + "xxbow", + {NULL, "1.0", "2.0", NULL}, + }, + { + WIDGET_XBRDG_MFGR_NUM, + WIDGET_XBRDG_PART_NUM, + "xbridge", + {NULL, "A", "B", NULL}, + }, + { + WIDGET_NULL_MFGR_NUM, + WIDGET_NULL_PART_NUM, + NULL, + {NULL}, + } +}; + /* * according to the crosstalk spec, only 32-bits access to the widget * configuration registers is allowed. some widgets may allow 64-bits diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig index 468903053883..36f82017695d 100644 --- a/arch/mips/jz4740/Kconfig +++ b/arch/mips/jz4740/Kconfig @@ -1,9 +1,24 @@ choice prompt "Machine type" - depends on MACH_JZ4740 + depends on MACH_INGENIC default JZ4740_QI_LB60 config JZ4740_QI_LB60 bool "Qi Hardware Ben NanoNote" + select MACH_JZ4740 + +config JZ4780_CI20 + bool "MIPS Creator CI20" + select MACH_JZ4780 endchoice + +config MACH_JZ4740 + bool + select SYS_HAS_CPU_MIPS32_R1 + +config MACH_JZ4780 + bool + select MIPS_CPU_SCACHE + select SYS_HAS_CPU_MIPS32_R2 + select SYS_SUPPORTS_HIGHMEM diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile index 28e5535dfa9e..39d70bde8cfe 100644 --- a/arch/mips/jz4740/Makefile +++ b/arch/mips/jz4740/Makefile @@ -4,10 +4,12 @@ # Object file lists. -obj-y += prom.o irq.o time.o reset.o setup.o \ - gpio.o clock.o platform.o timer.o serial.o +obj-y += prom.o time.o reset.o setup.o \ + platform.o timer.o -obj-$(CONFIG_DEBUG_FS) += clock-debugfs.o +obj-$(CONFIG_MACH_JZ4740) += gpio.o + +CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt # board specific support diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform index c41d30080098..28448d358c10 100644 --- a/arch/mips/jz4740/Platform +++ b/arch/mips/jz4740/Platform @@ -1,4 +1,4 @@ -platform-$(CONFIG_MACH_JZ4740) += jz4740/ -cflags-$(CONFIG_MACH_JZ4740) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 -load-$(CONFIG_MACH_JZ4740) += 0xffffffff80010000 -zload-$(CONFIG_MACH_JZ4740) += 0xffffffff80600000 +platform-$(CONFIG_MACH_INGENIC) += jz4740/ +cflags-$(CONFIG_MACH_INGENIC) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 +load-$(CONFIG_MACH_INGENIC) += 0xffffffff80010000 +zload-$(CONFIG_MACH_INGENIC) += 0xffffffff80600000 diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index 9dd051edb411..4e62bf85d0b0 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -482,8 +482,6 @@ static int __init qi_lb60_init_platform_devices(void) gpiod_add_lookup_table(&qi_lb60_audio_gpio_table); gpiod_add_lookup_table(&qi_lb60_nand_gpio_table); - jz4740_serial_device_register(); - spi_register_board_info(qi_lb60_spi_board_info, ARRAY_SIZE(qi_lb60_spi_board_info)); @@ -497,11 +495,6 @@ static int __init qi_lb60_init_platform_devices(void) } -struct jz4740_clock_board_data jz4740_clock_bdata = { - .ext_rate = 12000000, - .rtc_rate = 32768, -}; - static __init int board_avt2(char *str) { qi_lb60_mmc_pdata.card_detect_active_low = 1; diff --git a/arch/mips/jz4740/clock-debugfs.c b/arch/mips/jz4740/clock-debugfs.c deleted file mode 100644 index 325422d0d453..000000000000 --- a/arch/mips/jz4740/clock-debugfs.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2010, Lars-Peter Clausen <[email protected]> - * JZ4740 SoC clock support debugfs entries - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/clk.h> -#include <linux/err.h> - -#include <linux/debugfs.h> -#include <linux/uaccess.h> - -#include <asm/mach-jz4740/clock.h> -#include "clock.h" - -static struct dentry *jz4740_clock_debugfs; - -static int jz4740_clock_debugfs_show_enabled(void *data, uint64_t *value) -{ - struct clk *clk = data; - *value = clk_is_enabled(clk); - - return 0; -} - -static int jz4740_clock_debugfs_set_enabled(void *data, uint64_t value) -{ - struct clk *clk = data; - - if (value) - return clk_enable(clk); - else - clk_disable(clk); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(jz4740_clock_debugfs_ops_enabled, - jz4740_clock_debugfs_show_enabled, - jz4740_clock_debugfs_set_enabled, - "%llu\n"); - -static int jz4740_clock_debugfs_show_rate(void *data, uint64_t *value) -{ - struct clk *clk = data; - *value = clk_get_rate(clk); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(jz4740_clock_debugfs_ops_rate, - jz4740_clock_debugfs_show_rate, - NULL, - "%llu\n"); - -void jz4740_clock_debugfs_add_clk(struct clk *clk) -{ - if (!jz4740_clock_debugfs) - return; - - clk->debugfs_entry = debugfs_create_dir(clk->name, jz4740_clock_debugfs); - debugfs_create_file("rate", S_IWUGO | S_IRUGO, clk->debugfs_entry, clk, - &jz4740_clock_debugfs_ops_rate); - debugfs_create_file("enabled", S_IRUGO, clk->debugfs_entry, clk, - &jz4740_clock_debugfs_ops_enabled); - - if (clk->parent) { - char parent_path[100]; - snprintf(parent_path, 100, "../%s", clk->parent->name); - clk->debugfs_parent_entry = debugfs_create_symlink("parent", - clk->debugfs_entry, - parent_path); - } -} - -/* TODO: Locking */ -void jz4740_clock_debugfs_update_parent(struct clk *clk) -{ - debugfs_remove(clk->debugfs_parent_entry); - - if (clk->parent) { - char parent_path[100]; - snprintf(parent_path, 100, "../%s", clk->parent->name); - clk->debugfs_parent_entry = debugfs_create_symlink("parent", - clk->debugfs_entry, - parent_path); - } else { - clk->debugfs_parent_entry = NULL; - } -} - -void jz4740_clock_debugfs_init(void) -{ - jz4740_clock_debugfs = debugfs_create_dir("jz4740-clock", NULL); - if (IS_ERR(jz4740_clock_debugfs)) - jz4740_clock_debugfs = NULL; -} diff --git a/arch/mips/jz4740/clock.c b/arch/mips/jz4740/clock.c deleted file mode 100644 index 1b5f55426cad..000000000000 --- a/arch/mips/jz4740/clock.c +++ /dev/null @@ -1,924 +0,0 @@ -/* - * Copyright (C) 2010, Lars-Peter Clausen <[email protected]> - * JZ4740 SoC clock support - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/clk.h> -#include <linux/spinlock.h> -#include <linux/io.h> -#include <linux/module.h> -#include <linux/list.h> -#include <linux/err.h> - -#include <asm/mach-jz4740/clock.h> -#include <asm/mach-jz4740/base.h> - -#include "clock.h" - -#define JZ_REG_CLOCK_CTRL 0x00 -#define JZ_REG_CLOCK_LOW_POWER 0x04 -#define JZ_REG_CLOCK_PLL 0x10 -#define JZ_REG_CLOCK_GATE 0x20 -#define JZ_REG_CLOCK_SLEEP_CTRL 0x24 -#define JZ_REG_CLOCK_I2S 0x60 -#define JZ_REG_CLOCK_LCD 0x64 -#define JZ_REG_CLOCK_MMC 0x68 -#define JZ_REG_CLOCK_UHC 0x6C -#define JZ_REG_CLOCK_SPI 0x74 - -#define JZ_CLOCK_CTRL_I2S_SRC_PLL BIT(31) -#define JZ_CLOCK_CTRL_KO_ENABLE BIT(30) -#define JZ_CLOCK_CTRL_UDC_SRC_PLL BIT(29) -#define JZ_CLOCK_CTRL_UDIV_MASK 0x1f800000 -#define JZ_CLOCK_CTRL_CHANGE_ENABLE BIT(22) -#define JZ_CLOCK_CTRL_PLL_HALF BIT(21) -#define JZ_CLOCK_CTRL_LDIV_MASK 0x001f0000 -#define JZ_CLOCK_CTRL_UDIV_OFFSET 23 -#define JZ_CLOCK_CTRL_LDIV_OFFSET 16 -#define JZ_CLOCK_CTRL_MDIV_OFFSET 12 -#define JZ_CLOCK_CTRL_PDIV_OFFSET 8 -#define JZ_CLOCK_CTRL_HDIV_OFFSET 4 -#define JZ_CLOCK_CTRL_CDIV_OFFSET 0 - -#define JZ_CLOCK_GATE_UART0 BIT(0) -#define JZ_CLOCK_GATE_TCU BIT(1) -#define JZ_CLOCK_GATE_RTC BIT(2) -#define JZ_CLOCK_GATE_I2C BIT(3) -#define JZ_CLOCK_GATE_SPI BIT(4) -#define JZ_CLOCK_GATE_AIC BIT(5) -#define JZ_CLOCK_GATE_I2S BIT(6) -#define JZ_CLOCK_GATE_MMC BIT(7) -#define JZ_CLOCK_GATE_ADC BIT(8) -#define JZ_CLOCK_GATE_CIM BIT(9) -#define JZ_CLOCK_GATE_LCD BIT(10) -#define JZ_CLOCK_GATE_UDC BIT(11) -#define JZ_CLOCK_GATE_DMAC BIT(12) -#define JZ_CLOCK_GATE_IPU BIT(13) -#define JZ_CLOCK_GATE_UHC BIT(14) -#define JZ_CLOCK_GATE_UART1 BIT(15) - -#define JZ_CLOCK_I2S_DIV_MASK 0x01ff - -#define JZ_CLOCK_LCD_DIV_MASK 0x01ff - -#define JZ_CLOCK_MMC_DIV_MASK 0x001f - -#define JZ_CLOCK_UHC_DIV_MASK 0x000f - -#define JZ_CLOCK_SPI_SRC_PLL BIT(31) -#define JZ_CLOCK_SPI_DIV_MASK 0x000f - -#define JZ_CLOCK_PLL_M_MASK 0x01ff -#define JZ_CLOCK_PLL_N_MASK 0x001f -#define JZ_CLOCK_PLL_OD_MASK 0x0003 -#define JZ_CLOCK_PLL_STABLE BIT(10) -#define JZ_CLOCK_PLL_BYPASS BIT(9) -#define JZ_CLOCK_PLL_ENABLED BIT(8) -#define JZ_CLOCK_PLL_STABLIZE_MASK 0x000f -#define JZ_CLOCK_PLL_M_OFFSET 23 -#define JZ_CLOCK_PLL_N_OFFSET 18 -#define JZ_CLOCK_PLL_OD_OFFSET 16 - -#define JZ_CLOCK_LOW_POWER_MODE_DOZE BIT(2) -#define JZ_CLOCK_LOW_POWER_MODE_SLEEP BIT(0) - -#define JZ_CLOCK_SLEEP_CTRL_SUSPEND_UHC BIT(7) -#define JZ_CLOCK_SLEEP_CTRL_ENABLE_UDC BIT(6) - -static void __iomem *jz_clock_base; -static spinlock_t jz_clock_lock; -static LIST_HEAD(jz_clocks); - -struct main_clk { - struct clk clk; - uint32_t div_offset; -}; - -struct divided_clk { - struct clk clk; - uint32_t reg; - uint32_t mask; -}; - -struct static_clk { - struct clk clk; - unsigned long rate; -}; - -static uint32_t jz_clk_reg_read(int reg) -{ - return readl(jz_clock_base + reg); -} - -static void jz_clk_reg_write_mask(int reg, uint32_t val, uint32_t mask) -{ - uint32_t val2; - - spin_lock(&jz_clock_lock); - val2 = readl(jz_clock_base + reg); - val2 &= ~mask; - val2 |= val; - writel(val2, jz_clock_base + reg); - spin_unlock(&jz_clock_lock); -} - -static void jz_clk_reg_set_bits(int reg, uint32_t mask) -{ - uint32_t val; - - spin_lock(&jz_clock_lock); - val = readl(jz_clock_base + reg); - val |= mask; - writel(val, jz_clock_base + reg); - spin_unlock(&jz_clock_lock); -} - -static void jz_clk_reg_clear_bits(int reg, uint32_t mask) -{ - uint32_t val; - - spin_lock(&jz_clock_lock); - val = readl(jz_clock_base + reg); - val &= ~mask; - writel(val, jz_clock_base + reg); - spin_unlock(&jz_clock_lock); -} - -static int jz_clk_enable_gating(struct clk *clk) -{ - if (clk->gate_bit == JZ4740_CLK_NOT_GATED) - return -EINVAL; - - jz_clk_reg_clear_bits(JZ_REG_CLOCK_GATE, clk->gate_bit); - return 0; -} - -static int jz_clk_disable_gating(struct clk *clk) -{ - if (clk->gate_bit == JZ4740_CLK_NOT_GATED) - return -EINVAL; - - jz_clk_reg_set_bits(JZ_REG_CLOCK_GATE, clk->gate_bit); - return 0; -} - -static int jz_clk_is_enabled_gating(struct clk *clk) -{ - if (clk->gate_bit == JZ4740_CLK_NOT_GATED) - return 1; - - return !(jz_clk_reg_read(JZ_REG_CLOCK_GATE) & clk->gate_bit); -} - -static unsigned long jz_clk_static_get_rate(struct clk *clk) -{ - return ((struct static_clk *)clk)->rate; -} - -static int jz_clk_ko_enable(struct clk *clk) -{ - jz_clk_reg_set_bits(JZ_REG_CLOCK_CTRL, JZ_CLOCK_CTRL_KO_ENABLE); - return 0; -} - -static int jz_clk_ko_disable(struct clk *clk) -{ - jz_clk_reg_clear_bits(JZ_REG_CLOCK_CTRL, JZ_CLOCK_CTRL_KO_ENABLE); - return 0; -} - -static int jz_clk_ko_is_enabled(struct clk *clk) -{ - return !!(jz_clk_reg_read(JZ_REG_CLOCK_CTRL) & JZ_CLOCK_CTRL_KO_ENABLE); -} - -static const int pllno[] = {1, 2, 2, 4}; - -static unsigned long jz_clk_pll_get_rate(struct clk *clk) -{ - uint32_t val; - int m; - int n; - int od; - - val = jz_clk_reg_read(JZ_REG_CLOCK_PLL); - - if (val & JZ_CLOCK_PLL_BYPASS) - return clk_get_rate(clk->parent); - - m = ((val >> 23) & 0x1ff) + 2; - n = ((val >> 18) & 0x1f) + 2; - od = (val >> 16) & 0x3; - - return ((clk_get_rate(clk->parent) / n) * m) / pllno[od]; -} - -static unsigned long jz_clk_pll_half_get_rate(struct clk *clk) -{ - uint32_t reg; - - reg = jz_clk_reg_read(JZ_REG_CLOCK_CTRL); - if (reg & JZ_CLOCK_CTRL_PLL_HALF) - return jz_clk_pll_get_rate(clk->parent); - return jz_clk_pll_get_rate(clk->parent) >> 1; -} - -static const int jz_clk_main_divs[] = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32}; - -static unsigned long jz_clk_main_round_rate(struct clk *clk, unsigned long rate) -{ - unsigned long parent_rate = jz_clk_pll_get_rate(clk->parent); - int div; - - div = parent_rate / rate; - if (div > 32) - return parent_rate / 32; - else if (div < 1) - return parent_rate; - - div &= (0x3 << (ffs(div) - 1)); - - return parent_rate / div; -} - -static unsigned long jz_clk_main_get_rate(struct clk *clk) -{ - struct main_clk *mclk = (struct main_clk *)clk; - uint32_t div; - - div = jz_clk_reg_read(JZ_REG_CLOCK_CTRL); - - div >>= mclk->div_offset; - div &= 0xf; - - if (div >= ARRAY_SIZE(jz_clk_main_divs)) - div = ARRAY_SIZE(jz_clk_main_divs) - 1; - - return jz_clk_pll_get_rate(clk->parent) / jz_clk_main_divs[div]; -} - -static int jz_clk_main_set_rate(struct clk *clk, unsigned long rate) -{ - struct main_clk *mclk = (struct main_clk *)clk; - int i; - int div; - unsigned long parent_rate = jz_clk_pll_get_rate(clk->parent); - - rate = jz_clk_main_round_rate(clk, rate); - - div = parent_rate / rate; - - i = (ffs(div) - 1) << 1; - if (i > 0 && !(div & BIT(i-1))) - i -= 1; - - jz_clk_reg_write_mask(JZ_REG_CLOCK_CTRL, i << mclk->div_offset, - 0xf << mclk->div_offset); - - return 0; -} - -static struct clk_ops jz_clk_static_ops = { - .get_rate = jz_clk_static_get_rate, - .enable = jz_clk_enable_gating, - .disable = jz_clk_disable_gating, - .is_enabled = jz_clk_is_enabled_gating, -}; - -static struct static_clk jz_clk_ext = { - .clk = { - .name = "ext", - .gate_bit = JZ4740_CLK_NOT_GATED, - .ops = &jz_clk_static_ops, - }, -}; - -static struct clk_ops jz_clk_pll_ops = { - .get_rate = jz_clk_pll_get_rate, -}; - -static struct clk jz_clk_pll = { - .name = "pll", - .parent = &jz_clk_ext.clk, - .ops = &jz_clk_pll_ops, -}; - -static struct clk_ops jz_clk_pll_half_ops = { - .get_rate = jz_clk_pll_half_get_rate, -}; - -static struct clk jz_clk_pll_half = { - .name = "pll half", - .parent = &jz_clk_pll, - .ops = &jz_clk_pll_half_ops, -}; - -static const struct clk_ops jz_clk_main_ops = { - .get_rate = jz_clk_main_get_rate, - .set_rate = jz_clk_main_set_rate, - .round_rate = jz_clk_main_round_rate, -}; - -static struct main_clk jz_clk_cpu = { - .clk = { - .name = "cclk", - .parent = &jz_clk_pll, - .ops = &jz_clk_main_ops, - }, - .div_offset = JZ_CLOCK_CTRL_CDIV_OFFSET, -}; - -static struct main_clk jz_clk_memory = { - .clk = { - .name = "mclk", - .parent = &jz_clk_pll, - .ops = &jz_clk_main_ops, - }, - .div_offset = JZ_CLOCK_CTRL_MDIV_OFFSET, -}; - -static struct main_clk jz_clk_high_speed_peripheral = { - .clk = { - .name = "hclk", - .parent = &jz_clk_pll, - .ops = &jz_clk_main_ops, - }, - .div_offset = JZ_CLOCK_CTRL_HDIV_OFFSET, -}; - - -static struct main_clk jz_clk_low_speed_peripheral = { - .clk = { - .name = "pclk", - .parent = &jz_clk_pll, - .ops = &jz_clk_main_ops, - }, - .div_offset = JZ_CLOCK_CTRL_PDIV_OFFSET, -}; - -static const struct clk_ops jz_clk_ko_ops = { - .enable = jz_clk_ko_enable, - .disable = jz_clk_ko_disable, - .is_enabled = jz_clk_ko_is_enabled, -}; - -static struct clk jz_clk_ko = { - .name = "cko", - .parent = &jz_clk_memory.clk, - .ops = &jz_clk_ko_ops, -}; - -static int jz_clk_spi_set_parent(struct clk *clk, struct clk *parent) -{ - if (parent == &jz_clk_pll) - jz_clk_reg_set_bits(JZ_CLOCK_SPI_SRC_PLL, JZ_REG_CLOCK_SPI); - else if (parent == &jz_clk_ext.clk) - jz_clk_reg_clear_bits(JZ_CLOCK_SPI_SRC_PLL, JZ_REG_CLOCK_SPI); - else - return -EINVAL; - - clk->parent = parent; - - return 0; -} - -static int jz_clk_i2s_set_parent(struct clk *clk, struct clk *parent) -{ - if (parent == &jz_clk_pll_half) - jz_clk_reg_set_bits(JZ_REG_CLOCK_CTRL, JZ_CLOCK_CTRL_I2S_SRC_PLL); - else if (parent == &jz_clk_ext.clk) - jz_clk_reg_clear_bits(JZ_REG_CLOCK_CTRL, JZ_CLOCK_CTRL_I2S_SRC_PLL); - else - return -EINVAL; - - clk->parent = parent; - - return 0; -} - -static int jz_clk_udc_enable(struct clk *clk) -{ - jz_clk_reg_set_bits(JZ_REG_CLOCK_SLEEP_CTRL, - JZ_CLOCK_SLEEP_CTRL_ENABLE_UDC); - - return 0; -} - -static int jz_clk_udc_disable(struct clk *clk) -{ - jz_clk_reg_clear_bits(JZ_REG_CLOCK_SLEEP_CTRL, - JZ_CLOCK_SLEEP_CTRL_ENABLE_UDC); - - return 0; -} - -static int jz_clk_udc_is_enabled(struct clk *clk) -{ - return !!(jz_clk_reg_read(JZ_REG_CLOCK_SLEEP_CTRL) & - JZ_CLOCK_SLEEP_CTRL_ENABLE_UDC); -} - -static int jz_clk_udc_set_parent(struct clk *clk, struct clk *parent) -{ - if (parent == &jz_clk_pll_half) - jz_clk_reg_set_bits(JZ_REG_CLOCK_CTRL, JZ_CLOCK_CTRL_UDC_SRC_PLL); - else if (parent == &jz_clk_ext.clk) - jz_clk_reg_clear_bits(JZ_REG_CLOCK_CTRL, JZ_CLOCK_CTRL_UDC_SRC_PLL); - else - return -EINVAL; - - clk->parent = parent; - - return 0; -} - -static int jz_clk_udc_set_rate(struct clk *clk, unsigned long rate) -{ - int div; - - if (clk->parent == &jz_clk_ext.clk) - return -EINVAL; - - div = clk_get_rate(clk->parent) / rate - 1; - - if (div < 0) - div = 0; - else if (div > 63) - div = 63; - - jz_clk_reg_write_mask(JZ_REG_CLOCK_CTRL, div << JZ_CLOCK_CTRL_UDIV_OFFSET, - JZ_CLOCK_CTRL_UDIV_MASK); - return 0; -} - -static unsigned long jz_clk_udc_get_rate(struct clk *clk) -{ - int div; - - if (clk->parent == &jz_clk_ext.clk) - return clk_get_rate(clk->parent); - - div = (jz_clk_reg_read(JZ_REG_CLOCK_CTRL) & JZ_CLOCK_CTRL_UDIV_MASK); - div >>= JZ_CLOCK_CTRL_UDIV_OFFSET; - div += 1; - - return clk_get_rate(clk->parent) / div; -} - -static unsigned long jz_clk_divided_get_rate(struct clk *clk) -{ - struct divided_clk *dclk = (struct divided_clk *)clk; - int div; - - if (clk->parent == &jz_clk_ext.clk) - return clk_get_rate(clk->parent); - - div = (jz_clk_reg_read(dclk->reg) & dclk->mask) + 1; - - return clk_get_rate(clk->parent) / div; -} - -static int jz_clk_divided_set_rate(struct clk *clk, unsigned long rate) -{ - struct divided_clk *dclk = (struct divided_clk *)clk; - int div; - - if (clk->parent == &jz_clk_ext.clk) - return -EINVAL; - - div = clk_get_rate(clk->parent) / rate - 1; - - if (div < 0) - div = 0; - else if (div > dclk->mask) - div = dclk->mask; - - jz_clk_reg_write_mask(dclk->reg, div, dclk->mask); - - return 0; -} - -static unsigned long jz_clk_ldclk_round_rate(struct clk *clk, unsigned long rate) -{ - int div; - unsigned long parent_rate = jz_clk_pll_half_get_rate(clk->parent); - - if (rate > 150000000) - return 150000000; - - div = parent_rate / rate; - if (div < 1) - div = 1; - else if (div > 32) - div = 32; - - return parent_rate / div; -} - -static int jz_clk_ldclk_set_rate(struct clk *clk, unsigned long rate) -{ - int div; - - if (rate > 150000000) - return -EINVAL; - - div = jz_clk_pll_half_get_rate(clk->parent) / rate - 1; - if (div < 0) - div = 0; - else if (div > 31) - div = 31; - - jz_clk_reg_write_mask(JZ_REG_CLOCK_CTRL, div << JZ_CLOCK_CTRL_LDIV_OFFSET, - JZ_CLOCK_CTRL_LDIV_MASK); - - return 0; -} - -static unsigned long jz_clk_ldclk_get_rate(struct clk *clk) -{ - int div; - - div = jz_clk_reg_read(JZ_REG_CLOCK_CTRL) & JZ_CLOCK_CTRL_LDIV_MASK; - div >>= JZ_CLOCK_CTRL_LDIV_OFFSET; - - return jz_clk_pll_half_get_rate(clk->parent) / (div + 1); -} - -static const struct clk_ops jz_clk_ops_ld = { - .set_rate = jz_clk_ldclk_set_rate, - .get_rate = jz_clk_ldclk_get_rate, - .round_rate = jz_clk_ldclk_round_rate, - .enable = jz_clk_enable_gating, - .disable = jz_clk_disable_gating, - .is_enabled = jz_clk_is_enabled_gating, -}; - -static struct clk jz_clk_ld = { - .name = "lcd", - .gate_bit = JZ_CLOCK_GATE_LCD, - .parent = &jz_clk_pll_half, - .ops = &jz_clk_ops_ld, -}; - -static const struct clk_ops jz_clk_i2s_ops = { - .set_rate = jz_clk_divided_set_rate, - .get_rate = jz_clk_divided_get_rate, - .enable = jz_clk_enable_gating, - .disable = jz_clk_disable_gating, - .is_enabled = jz_clk_is_enabled_gating, - .set_parent = jz_clk_i2s_set_parent, -}; - -static const struct clk_ops jz_clk_spi_ops = { - .set_rate = jz_clk_divided_set_rate, - .get_rate = jz_clk_divided_get_rate, - .enable = jz_clk_enable_gating, - .disable = jz_clk_disable_gating, - .is_enabled = jz_clk_is_enabled_gating, - .set_parent = jz_clk_spi_set_parent, -}; - -static const struct clk_ops jz_clk_divided_ops = { - .set_rate = jz_clk_divided_set_rate, - .get_rate = jz_clk_divided_get_rate, - .enable = jz_clk_enable_gating, - .disable = jz_clk_disable_gating, - .is_enabled = jz_clk_is_enabled_gating, -}; - -static struct divided_clk jz4740_clock_divided_clks[] = { - [0] = { - .clk = { - .name = "i2s", - .parent = &jz_clk_ext.clk, - .gate_bit = JZ_CLOCK_GATE_I2S, - .ops = &jz_clk_i2s_ops, - }, - .reg = JZ_REG_CLOCK_I2S, - .mask = JZ_CLOCK_I2S_DIV_MASK, - }, - [1] = { - .clk = { - .name = "spi", - .parent = &jz_clk_ext.clk, - .gate_bit = JZ_CLOCK_GATE_SPI, - .ops = &jz_clk_spi_ops, - }, - .reg = JZ_REG_CLOCK_SPI, - .mask = JZ_CLOCK_SPI_DIV_MASK, - }, - [2] = { - .clk = { - .name = "lcd_pclk", - .parent = &jz_clk_pll_half, - .gate_bit = JZ4740_CLK_NOT_GATED, - .ops = &jz_clk_divided_ops, - }, - .reg = JZ_REG_CLOCK_LCD, - .mask = JZ_CLOCK_LCD_DIV_MASK, - }, - [3] = { - .clk = { - .name = "mmc", - .parent = &jz_clk_pll_half, - .gate_bit = JZ_CLOCK_GATE_MMC, - .ops = &jz_clk_divided_ops, - }, - .reg = JZ_REG_CLOCK_MMC, - .mask = JZ_CLOCK_MMC_DIV_MASK, - }, - [4] = { - .clk = { - .name = "uhc", - .parent = &jz_clk_pll_half, - .gate_bit = JZ_CLOCK_GATE_UHC, - .ops = &jz_clk_divided_ops, - }, - .reg = JZ_REG_CLOCK_UHC, - .mask = JZ_CLOCK_UHC_DIV_MASK, - }, -}; - -static const struct clk_ops jz_clk_udc_ops = { - .set_parent = jz_clk_udc_set_parent, - .set_rate = jz_clk_udc_set_rate, - .get_rate = jz_clk_udc_get_rate, - .enable = jz_clk_udc_enable, - .disable = jz_clk_udc_disable, - .is_enabled = jz_clk_udc_is_enabled, -}; - -static const struct clk_ops jz_clk_simple_ops = { - .enable = jz_clk_enable_gating, - .disable = jz_clk_disable_gating, - .is_enabled = jz_clk_is_enabled_gating, -}; - -static struct clk jz4740_clock_simple_clks[] = { - [0] = { - .name = "udc", - .parent = &jz_clk_ext.clk, - .ops = &jz_clk_udc_ops, - }, - [1] = { - .name = "uart0", - .parent = &jz_clk_ext.clk, - .gate_bit = JZ_CLOCK_GATE_UART0, - .ops = &jz_clk_simple_ops, - }, - [2] = { - .name = "uart1", - .parent = &jz_clk_ext.clk, - .gate_bit = JZ_CLOCK_GATE_UART1, - .ops = &jz_clk_simple_ops, - }, - [3] = { - .name = "dma", - .parent = &jz_clk_high_speed_peripheral.clk, - .gate_bit = JZ_CLOCK_GATE_DMAC, - .ops = &jz_clk_simple_ops, - }, - [4] = { - .name = "ipu", - .parent = &jz_clk_high_speed_peripheral.clk, - .gate_bit = JZ_CLOCK_GATE_IPU, - .ops = &jz_clk_simple_ops, - }, - [5] = { - .name = "adc", - .parent = &jz_clk_ext.clk, - .gate_bit = JZ_CLOCK_GATE_ADC, - .ops = &jz_clk_simple_ops, - }, - [6] = { - .name = "i2c", - .parent = &jz_clk_ext.clk, - .gate_bit = JZ_CLOCK_GATE_I2C, - .ops = &jz_clk_simple_ops, - }, - [7] = { - .name = "aic", - .parent = &jz_clk_ext.clk, - .gate_bit = JZ_CLOCK_GATE_AIC, - .ops = &jz_clk_simple_ops, - }, -}; - -static struct static_clk jz_clk_rtc = { - .clk = { - .name = "rtc", - .gate_bit = JZ_CLOCK_GATE_RTC, - .ops = &jz_clk_static_ops, - }, - .rate = 32768, -}; - -int clk_enable(struct clk *clk) -{ - if (!clk->ops->enable) - return -EINVAL; - - return clk->ops->enable(clk); -} -EXPORT_SYMBOL_GPL(clk_enable); - -void clk_disable(struct clk *clk) -{ - if (clk->ops->disable) - clk->ops->disable(clk); -} -EXPORT_SYMBOL_GPL(clk_disable); - -int clk_is_enabled(struct clk *clk) -{ - if (clk->ops->is_enabled) - return clk->ops->is_enabled(clk); - - return 1; -} - -unsigned long clk_get_rate(struct clk *clk) -{ - if (clk->ops->get_rate) - return clk->ops->get_rate(clk); - if (clk->parent) - return clk_get_rate(clk->parent); - - return -EINVAL; -} -EXPORT_SYMBOL_GPL(clk_get_rate); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - if (!clk->ops->set_rate) - return -EINVAL; - return clk->ops->set_rate(clk, rate); -} -EXPORT_SYMBOL_GPL(clk_set_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - if (clk->ops->round_rate) - return clk->ops->round_rate(clk, rate); - - return -EINVAL; -} -EXPORT_SYMBOL_GPL(clk_round_rate); - -int clk_set_parent(struct clk *clk, struct clk *parent) -{ - int ret; - int enabled; - - if (!clk->ops->set_parent) - return -EINVAL; - - enabled = clk_is_enabled(clk); - if (enabled) - clk_disable(clk); - ret = clk->ops->set_parent(clk, parent); - if (enabled) - clk_enable(clk); - - jz4740_clock_debugfs_update_parent(clk); - - return ret; -} -EXPORT_SYMBOL_GPL(clk_set_parent); - -struct clk *clk_get(struct device *dev, const char *name) -{ - struct clk *clk; - - list_for_each_entry(clk, &jz_clocks, list) { - if (strcmp(clk->name, name) == 0) - return clk; - } - return ERR_PTR(-ENXIO); -} -EXPORT_SYMBOL_GPL(clk_get); - -void clk_put(struct clk *clk) -{ -} -EXPORT_SYMBOL_GPL(clk_put); - -static inline void clk_add(struct clk *clk) -{ - list_add_tail(&clk->list, &jz_clocks); - - jz4740_clock_debugfs_add_clk(clk); -} - -static void clk_register_clks(void) -{ - size_t i; - - clk_add(&jz_clk_ext.clk); - clk_add(&jz_clk_pll); - clk_add(&jz_clk_pll_half); - clk_add(&jz_clk_cpu.clk); - clk_add(&jz_clk_high_speed_peripheral.clk); - clk_add(&jz_clk_low_speed_peripheral.clk); - clk_add(&jz_clk_ko); - clk_add(&jz_clk_ld); - clk_add(&jz_clk_rtc.clk); - - for (i = 0; i < ARRAY_SIZE(jz4740_clock_divided_clks); ++i) - clk_add(&jz4740_clock_divided_clks[i].clk); - - for (i = 0; i < ARRAY_SIZE(jz4740_clock_simple_clks); ++i) - clk_add(&jz4740_clock_simple_clks[i]); -} - -void jz4740_clock_set_wait_mode(enum jz4740_wait_mode mode) -{ - switch (mode) { - case JZ4740_WAIT_MODE_IDLE: - jz_clk_reg_clear_bits(JZ_REG_CLOCK_LOW_POWER, JZ_CLOCK_LOW_POWER_MODE_SLEEP); - break; - case JZ4740_WAIT_MODE_SLEEP: - jz_clk_reg_set_bits(JZ_REG_CLOCK_LOW_POWER, JZ_CLOCK_LOW_POWER_MODE_SLEEP); - break; - } -} - -void jz4740_clock_udc_disable_auto_suspend(void) -{ - jz_clk_reg_clear_bits(JZ_REG_CLOCK_GATE, JZ_CLOCK_GATE_UDC); -} -EXPORT_SYMBOL_GPL(jz4740_clock_udc_disable_auto_suspend); - -void jz4740_clock_udc_enable_auto_suspend(void) -{ - jz_clk_reg_set_bits(JZ_REG_CLOCK_GATE, JZ_CLOCK_GATE_UDC); -} -EXPORT_SYMBOL_GPL(jz4740_clock_udc_enable_auto_suspend); - -void jz4740_clock_suspend(void) -{ - jz_clk_reg_set_bits(JZ_REG_CLOCK_GATE, - JZ_CLOCK_GATE_TCU | JZ_CLOCK_GATE_DMAC | JZ_CLOCK_GATE_UART0); - - jz_clk_reg_clear_bits(JZ_REG_CLOCK_PLL, JZ_CLOCK_PLL_ENABLED); -} - -void jz4740_clock_resume(void) -{ - uint32_t pll; - - jz_clk_reg_set_bits(JZ_REG_CLOCK_PLL, JZ_CLOCK_PLL_ENABLED); - - do { - pll = jz_clk_reg_read(JZ_REG_CLOCK_PLL); - } while (!(pll & JZ_CLOCK_PLL_STABLE)); - - jz_clk_reg_clear_bits(JZ_REG_CLOCK_GATE, - JZ_CLOCK_GATE_TCU | JZ_CLOCK_GATE_DMAC | JZ_CLOCK_GATE_UART0); -} - -static int jz4740_clock_init(void) -{ - uint32_t val; - - jz_clock_base = ioremap(JZ4740_CPM_BASE_ADDR, 0x100); - if (!jz_clock_base) - return -EBUSY; - - spin_lock_init(&jz_clock_lock); - - jz_clk_ext.rate = jz4740_clock_bdata.ext_rate; - jz_clk_rtc.rate = jz4740_clock_bdata.rtc_rate; - - val = jz_clk_reg_read(JZ_REG_CLOCK_SPI); - - if (val & JZ_CLOCK_SPI_SRC_PLL) - jz4740_clock_divided_clks[1].clk.parent = &jz_clk_pll_half; - - val = jz_clk_reg_read(JZ_REG_CLOCK_CTRL); - - if (val & JZ_CLOCK_CTRL_I2S_SRC_PLL) - jz4740_clock_divided_clks[0].clk.parent = &jz_clk_pll_half; - - if (val & JZ_CLOCK_CTRL_UDC_SRC_PLL) - jz4740_clock_simple_clks[0].parent = &jz_clk_pll_half; - - jz4740_clock_debugfs_init(); - - clk_register_clks(); - - return 0; -} -arch_initcall(jz4740_clock_init); diff --git a/arch/mips/jz4740/clock.h b/arch/mips/jz4740/clock.h deleted file mode 100644 index 5d07499d7461..000000000000 --- a/arch/mips/jz4740/clock.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (C) 2010, Lars-Peter Clausen <[email protected]> - * JZ4740 SoC clock support - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __MIPS_JZ4740_CLOCK_H__ -#define __MIPS_JZ4740_CLOCK_H__ - -#include <linux/list.h> - -struct jz4740_clock_board_data { - unsigned long ext_rate; - unsigned long rtc_rate; -}; - -extern struct jz4740_clock_board_data jz4740_clock_bdata; - -void jz4740_clock_suspend(void); -void jz4740_clock_resume(void); - -struct clk; - -struct clk_ops { - unsigned long (*get_rate)(struct clk *clk); - unsigned long (*round_rate)(struct clk *clk, unsigned long rate); - int (*set_rate)(struct clk *clk, unsigned long rate); - int (*enable)(struct clk *clk); - int (*disable)(struct clk *clk); - int (*is_enabled)(struct clk *clk); - - int (*set_parent)(struct clk *clk, struct clk *parent); - -}; - -struct clk { - const char *name; - struct clk *parent; - - uint32_t gate_bit; - - const struct clk_ops *ops; - - struct list_head list; - -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs_entry; - struct dentry *debugfs_parent_entry; -#endif - -}; - -#define JZ4740_CLK_NOT_GATED ((uint32_t)-1) - -int clk_is_enabled(struct clk *clk); - -#ifdef CONFIG_DEBUG_FS -void jz4740_clock_debugfs_init(void); -void jz4740_clock_debugfs_add_clk(struct clk *clk); -void jz4740_clock_debugfs_update_parent(struct clk *clk); -#else -static inline void jz4740_clock_debugfs_init(void) {}; -static inline void jz4740_clock_debugfs_add_clk(struct clk *clk) {}; -static inline void jz4740_clock_debugfs_update_parent(struct clk *clk) {}; -#endif - -#endif diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c index 00b798d2fb7c..54c80d42a88d 100644 --- a/arch/mips/jz4740/gpio.c +++ b/arch/mips/jz4740/gpio.c @@ -21,6 +21,7 @@ #include <linux/gpio.h> #include <linux/delay.h> #include <linux/interrupt.h> +#include <linux/irqchip/ingenic.h> #include <linux/bitops.h> #include <linux/debugfs.h> @@ -28,8 +29,6 @@ #include <asm/mach-jz4740/base.h> -#include "irq.h" - #define JZ4740_GPIO_BASE_A (32*0) #define JZ4740_GPIO_BASE_B (32*1) #define JZ4740_GPIO_BASE_C (32*2) @@ -442,8 +441,8 @@ static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id) ct->chip.irq_mask = irq_gc_mask_disable_reg; ct->chip.irq_unmask = jz_gpio_irq_unmask; ct->chip.irq_ack = irq_gc_ack_set_bit; - ct->chip.irq_suspend = jz4740_irq_suspend; - ct->chip.irq_resume = jz4740_irq_resume; + ct->chip.irq_suspend = ingenic_intc_irq_suspend; + ct->chip.irq_resume = ingenic_intc_irq_resume; ct->chip.irq_startup = jz_gpio_irq_startup; ct->chip.irq_shutdown = jz_gpio_irq_shutdown; ct->chip.irq_set_type = jz_gpio_irq_set_type; diff --git a/arch/mips/jz4740/irq.c b/arch/mips/jz4740/irq.c deleted file mode 100644 index 97206b3deb97..000000000000 --- a/arch/mips/jz4740/irq.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2009-2010, Lars-Peter Clausen <[email protected]> - * JZ4740 platform IRQ support - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/delay.h> - -#include <linux/debugfs.h> -#include <linux/seq_file.h> - -#include <asm/io.h> -#include <asm/mipsregs.h> -#include <asm/irq_cpu.h> - -#include <asm/mach-jz4740/base.h> -#include <asm/mach-jz4740/irq.h> - -#include "irq.h" - -static void __iomem *jz_intc_base; - -#define JZ_REG_INTC_STATUS 0x00 -#define JZ_REG_INTC_MASK 0x04 -#define JZ_REG_INTC_SET_MASK 0x08 -#define JZ_REG_INTC_CLEAR_MASK 0x0c -#define JZ_REG_INTC_PENDING 0x10 - -static irqreturn_t jz4740_cascade(int irq, void *data) -{ - uint32_t irq_reg; - - irq_reg = readl(jz_intc_base + JZ_REG_INTC_PENDING); - - if (irq_reg) - generic_handle_irq(__fls(irq_reg) + JZ4740_IRQ_BASE); - - return IRQ_HANDLED; -} - -static void jz4740_irq_set_mask(struct irq_chip_generic *gc, uint32_t mask) -{ - struct irq_chip_regs *regs = &gc->chip_types->regs; - - writel(mask, gc->reg_base + regs->enable); - writel(~mask, gc->reg_base + regs->disable); -} - -void jz4740_irq_suspend(struct irq_data *data) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); - jz4740_irq_set_mask(gc, gc->wake_active); -} - -void jz4740_irq_resume(struct irq_data *data) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); - jz4740_irq_set_mask(gc, gc->mask_cache); -} - -static struct irqaction jz4740_cascade_action = { - .handler = jz4740_cascade, - .name = "JZ4740 cascade interrupt", -}; - -void __init arch_init_irq(void) -{ - struct irq_chip_generic *gc; - struct irq_chip_type *ct; - - mips_cpu_irq_init(); - - jz_intc_base = ioremap(JZ4740_INTC_BASE_ADDR, 0x14); - - /* Mask all irqs */ - writel(0xffffffff, jz_intc_base + JZ_REG_INTC_SET_MASK); - - gc = irq_alloc_generic_chip("INTC", 1, JZ4740_IRQ_BASE, jz_intc_base, - handle_level_irq); - - gc->wake_enabled = IRQ_MSK(32); - - ct = gc->chip_types; - ct->regs.enable = JZ_REG_INTC_CLEAR_MASK; - ct->regs.disable = JZ_REG_INTC_SET_MASK; - ct->chip.irq_unmask = irq_gc_unmask_enable_reg; - ct->chip.irq_mask = irq_gc_mask_disable_reg; - ct->chip.irq_mask_ack = irq_gc_mask_disable_reg; - ct->chip.irq_set_wake = irq_gc_set_wake; - ct->chip.irq_suspend = jz4740_irq_suspend; - ct->chip.irq_resume = jz4740_irq_resume; - - irq_setup_generic_chip(gc, IRQ_MSK(32), 0, 0, IRQ_NOPROBE | IRQ_LEVEL); - - setup_irq(2, &jz4740_cascade_action); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - if (pending & STATUSF_IP2) - do_IRQ(2); - else if (pending & STATUSF_IP3) - do_IRQ(3); - else - spurious_interrupt(); -} - -#ifdef CONFIG_DEBUG_FS - -static inline void intc_seq_reg(struct seq_file *s, const char *name, - unsigned int reg) -{ - seq_printf(s, "%s:\t\t%08x\n", name, readl(jz_intc_base + reg)); -} - -static int intc_regs_show(struct seq_file *s, void *unused) -{ - intc_seq_reg(s, "Status", JZ_REG_INTC_STATUS); - intc_seq_reg(s, "Mask", JZ_REG_INTC_MASK); - intc_seq_reg(s, "Pending", JZ_REG_INTC_PENDING); - - return 0; -} - -static int intc_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, intc_regs_show, NULL); -} - -static const struct file_operations intc_regs_operations = { - .open = intc_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init intc_debugfs_init(void) -{ - (void) debugfs_create_file("jz_regs_intc", S_IFREG | S_IRUGO, - NULL, NULL, &intc_regs_operations); - return 0; -} -subsys_initcall(intc_debugfs_init); - -#endif diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c index 0b12f273cb2e..e8a463b9b663 100644 --- a/arch/mips/jz4740/platform.c +++ b/arch/mips/jz4740/platform.c @@ -13,6 +13,7 @@ * */ +#include <linux/clk.h> #include <linux/device.h> #include <linux/kernel.h> #include <linux/platform_device.h> @@ -29,7 +30,6 @@ #include <linux/serial_core.h> #include <linux/serial_8250.h> -#include "serial.h" #include "clock.h" /* OHCI controller */ @@ -279,42 +279,6 @@ struct platform_device jz4740_adc_device = { .resource = jz4740_adc_resources, }; -/* Serial */ -#define JZ4740_UART_DATA(_id) \ - { \ - .flags = UPF_SKIP_TEST | UPF_IOREMAP | UPF_FIXED_TYPE, \ - .iotype = UPIO_MEM, \ - .regshift = 2, \ - .serial_out = jz4740_serial_out, \ - .type = PORT_16550, \ - .mapbase = JZ4740_UART ## _id ## _BASE_ADDR, \ - .irq = JZ4740_IRQ_UART ## _id, \ - } - -static struct plat_serial8250_port jz4740_uart_data[] = { - JZ4740_UART_DATA(0), - JZ4740_UART_DATA(1), - {}, -}; - -static struct platform_device jz4740_uart_device = { - .name = "serial8250", - .id = 0, - .dev = { - .platform_data = jz4740_uart_data, - }, -}; - -void jz4740_serial_device_register(void) -{ - struct plat_serial8250_port *p; - - for (p = jz4740_uart_data; p->flags != 0; ++p) - p->uartclk = jz4740_clock_bdata.ext_rate; - - platform_device_register(&jz4740_uart_device); -} - /* Watchdog */ static struct resource jz4740_wdt_resources[] = { { diff --git a/arch/mips/jz4740/pm.c b/arch/mips/jz4740/pm.c index d8e213010169..2d8653f2fc61 100644 --- a/arch/mips/jz4740/pm.c +++ b/arch/mips/jz4740/pm.c @@ -20,8 +20,6 @@ #include <asm/mach-jz4740/clock.h> -#include "clock.h" - static int jz4740_pm_enter(suspend_state_t state) { jz4740_clock_suspend(); diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c index 5a93f381590d..6984683c90d0 100644 --- a/arch/mips/jz4740/prom.c +++ b/arch/mips/jz4740/prom.c @@ -53,16 +53,3 @@ void __init prom_init(void) void __init prom_free_prom_memory(void) { } - -#define UART_REG(_reg) ((void __iomem *)CKSEG1ADDR(JZ4740_UART0_BASE_ADDR + (_reg << 2))) - -void prom_putchar(char c) -{ - uint8_t lsr; - - do { - lsr = readb(UART_REG(UART_LSR)); - } while ((lsr & UART_LSR_TEMT) == 0); - - writeb(c, UART_REG(UART_TX)); -} diff --git a/arch/mips/jz4740/reset.c b/arch/mips/jz4740/reset.c index b6c6343d2834..954e669c9e6b 100644 --- a/arch/mips/jz4740/reset.c +++ b/arch/mips/jz4740/reset.c @@ -12,6 +12,7 @@ * */ +#include <linux/clk.h> #include <linux/io.h> #include <linux/kernel.h> #include <linux/pm.h> @@ -79,12 +80,20 @@ static void jz4740_power_off(void) void __iomem *rtc_base = ioremap(JZ4740_RTC_BASE_ADDR, 0x38); unsigned long wakeup_filter_ticks; unsigned long reset_counter_ticks; + struct clk *rtc_clk; + unsigned long rtc_rate; + + rtc_clk = clk_get(NULL, "rtc"); + if (IS_ERR(rtc_clk)) + panic("unable to get RTC clock"); + rtc_rate = clk_get_rate(rtc_clk); + clk_put(rtc_clk); /* * Set minimum wakeup pin assertion time: 100 ms. * Range is 0 to 2 sec if RTC is clocked at 32 kHz. */ - wakeup_filter_ticks = (100 * jz4740_clock_bdata.rtc_rate) / 1000; + wakeup_filter_ticks = (100 * rtc_rate) / 1000; if (wakeup_filter_ticks < JZ_RTC_WAKEUP_FILTER_MASK) wakeup_filter_ticks &= JZ_RTC_WAKEUP_FILTER_MASK; else @@ -96,7 +105,7 @@ static void jz4740_power_off(void) * Set reset pin low-level assertion time after wakeup: 60 ms. * Range is 0 to 125 ms if RTC is clocked at 32 kHz. */ - reset_counter_ticks = (60 * jz4740_clock_bdata.rtc_rate) / 1000; + reset_counter_ticks = (60 * rtc_rate) / 1000; if (reset_counter_ticks < JZ_RTC_RESET_COUNTER_MASK) reset_counter_ticks &= JZ_RTC_RESET_COUNTER_MASK; else diff --git a/arch/mips/jz4740/serial.c b/arch/mips/jz4740/serial.c deleted file mode 100644 index d23de45826d1..000000000000 --- a/arch/mips/jz4740/serial.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 2010, Lars-Peter Clausen <[email protected]> - * JZ4740 serial support - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/io.h> -#include <linux/serial_core.h> -#include <linux/serial_reg.h> - -void jz4740_serial_out(struct uart_port *p, int offset, int value) -{ - switch (offset) { - case UART_FCR: - value |= 0x10; /* Enable uart module */ - break; - case UART_IER: - value |= (value & 0x4) << 2; - break; - default: - break; - } - writeb(value, p->membase + (offset << p->regshift)); -} diff --git a/arch/mips/jz4740/serial.h b/arch/mips/jz4740/serial.h deleted file mode 100644 index 8eb715bb1ea8..000000000000 --- a/arch/mips/jz4740/serial.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2010, Lars-Peter Clausen <[email protected]> - * JZ4740 serial support - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __MIPS_JZ4740_SERIAL_H__ -#define __MIPS_JZ4740_SERIAL_H__ - -struct uart_port; - -void jz4740_serial_out(struct uart_port *p, int offset, int value); - -#endif diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index ef796f97b996..510fc0d962f2 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -16,9 +16,14 @@ #include <linux/init.h> #include <linux/io.h> +#include <linux/irqchip.h> #include <linux/kernel.h> +#include <linux/libfdt.h> +#include <linux/of_fdt.h> +#include <linux/of_platform.h> #include <asm/bootinfo.h> +#include <asm/prom.h> #include <asm/mach-jz4740/base.h> @@ -51,11 +56,40 @@ static void __init jz4740_detect_mem(void) void __init plat_mem_setup(void) { + int offset; + jz4740_reset_init(); - jz4740_detect_mem(); + __dt_setup_arch(__dtb_start); + + offset = fdt_path_offset(__dtb_start, "/memory"); + if (offset < 0) + jz4740_detect_mem(); } +void __init device_tree_init(void) +{ + if (!initial_boot_params) + return; + + unflatten_and_copy_device_tree(); +} + +static int __init populate_machine(void) +{ + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); + return 0; +} +arch_initcall(populate_machine); + const char *get_system_type(void) { + if (config_enabled(CONFIG_MACH_JZ4780)) + return "JZ4780"; + return "JZ4740"; } + +void __init arch_init_irq(void) +{ + irqchip_init(); +} diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c index 72b0cecbc17c..7ab47fee1be8 100644 --- a/arch/mips/jz4740/time.c +++ b/arch/mips/jz4740/time.c @@ -13,6 +13,8 @@ * */ +#include <linux/clk.h> +#include <linux/clk-provider.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/time.h> @@ -20,6 +22,7 @@ #include <linux/clockchips.h> #include <linux/sched_clock.h> +#include <asm/mach-jz4740/clock.h> #include <asm/mach-jz4740/irq.h> #include <asm/mach-jz4740/timer.h> #include <asm/time.h> @@ -99,7 +102,12 @@ static struct clock_event_device jz4740_clockevent = { .set_next_event = jz4740_clockevent_set_next, .set_mode = jz4740_clockevent_set_mode, .rating = 200, +#ifdef CONFIG_MACH_JZ4740 .irq = JZ4740_IRQ_TCU0, +#endif +#ifdef CONFIG_MACH_JZ4780 + .irq = JZ4780_IRQ_TCU2, +#endif }; static struct irqaction timer_irqaction = { @@ -114,10 +122,17 @@ void __init plat_time_init(void) int ret; uint32_t clk_rate; uint16_t ctrl; + struct clk *ext_clk; + of_clk_init(NULL); jz4740_timer_init(); - clk_rate = jz4740_clock_bdata.ext_rate >> 4; + ext_clk = clk_get(NULL, "ext"); + if (IS_ERR(ext_clk)) + panic("unable to get ext clock"); + clk_rate = clk_get_rate(ext_clk) >> 4; + clk_put(ext_clk); + jz4740_jiffies_per_tick = DIV_ROUND_CLOSEST(clk_rate, HZ); clockevent_set_clock(&jz4740_clockevent, clk_rate); @@ -134,7 +149,7 @@ void __init plat_time_init(void) sched_clock_register(jz4740_read_sched_clock, 16, clk_rate); - setup_irq(JZ4740_IRQ_TCU0, &timer_irqaction); + setup_irq(jz4740_clockevent.irq, &timer_irqaction); ctrl = JZ_TIMER_CTRL_PRESCALE_16 | JZ_TIMER_CTRL_SRC_EXT; diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index d3d2ff2d76dc..3f5cf8aff6f3 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -62,7 +62,6 @@ obj-$(CONFIG_MIPS_VPE_APSP_API_CMP) += rtlx-cmp.o obj-$(CONFIG_MIPS_VPE_APSP_API_MT) += rtlx-mt.o obj-$(CONFIG_I8259) += i8259.o -obj-$(CONFIG_IRQ_CPU) += irq_cpu.o obj-$(CONFIG_IRQ_CPU_RM7K) += irq-rm7000.o obj-$(CONFIG_MIPS_MSC) += irq-msc01.o obj-$(CONFIG_IRQ_TXX9) += irq_txx9.o @@ -77,6 +76,7 @@ obj-$(CONFIG_MIPS32_O32) += binfmt_elfo32.o scall64-o32.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_64BIT) += cpu-bugs64.o diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 209e5b76c1bc..dbe0792fc9c1 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -945,7 +945,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->options = MIPS_CPU_TLB | MIPS_CPU_4K_CACHE | MIPS_CPU_4KEX | MIPS_CPU_FPU | MIPS_CPU_32FPR | MIPS_CPU_COUNTER | MIPS_CPU_WATCH | - MIPS_CPU_LLSC; + MIPS_CPU_LLSC | MIPS_CPU_BP_GHIST; c->tlbsize = 64; break; case PRID_IMP_R14000: @@ -960,7 +960,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu) c->options = MIPS_CPU_TLB | MIPS_CPU_4K_CACHE | MIPS_CPU_4KEX | MIPS_CPU_FPU | MIPS_CPU_32FPR | MIPS_CPU_COUNTER | MIPS_CPU_WATCH | - MIPS_CPU_LLSC; + MIPS_CPU_LLSC | MIPS_CPU_BP_GHIST; c->tlbsize = 64; break; case PRID_IMP_LOONGSON_64: /* Loongson-2/3 */ @@ -1443,7 +1443,9 @@ void cpu_probe(void) case PRID_COMP_CAVIUM: cpu_probe_cavium(c, cpu); break; - case PRID_COMP_INGENIC: + case PRID_COMP_INGENIC_D0: + case PRID_COMP_INGENIC_D1: + case PRID_COMP_INGENIC_E1: cpu_probe_ingenic(c, cpu); break; case PRID_COMP_NETLOGIC: @@ -1478,6 +1480,10 @@ void cpu_probe(void) else cpu_set_nofpu_opts(c); + if (cpu_has_bp_ghist) + write_c0_r10k_diag(read_c0_r10k_diag() | + R10K_DIAG_E_GHIST); + if (cpu_has_mips_r2_r6) { c->srsets = ((read_c0_srsctl() >> 26) & 0x0f) + 1; /* R2 has Performance Counter Interrupt indicator */ diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index 95afd663cd45..4e4cc5b9a771 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -94,6 +94,22 @@ NESTED(kernel_entry, 16, sp) # kernel entry point jr t0 0: +#ifdef CONFIG_MIPS_RAW_APPENDED_DTB + PTR_LA t0, __appended_dtb + +#ifdef CONFIG_CPU_BIG_ENDIAN + li t1, 0xd00dfeed +#else + li t1, 0xedfe0dd0 +#endif + lw t2, (t0) + bne t1, t2, not_found + nop + + move a1, t0 + PTR_LI a0, -2 +not_found: +#endif PTR_LA t0, __bss_start # clear .bss LONG_S zero, (t0) PTR_LA t1, __bss_stop - LONGSIZE diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c index a74ec3ae557c..74f6752814d3 100644 --- a/arch/mips/kernel/i8259.c +++ b/arch/mips/kernel/i8259.c @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/irqdomain.h> #include <linux/kernel.h> +#include <linux/of_irq.h> #include <linux/spinlock.h> #include <linux/syscore_ops.h> #include <linux/irq.h> @@ -21,6 +22,8 @@ #include <asm/i8259.h> #include <asm/io.h> +#include "../../drivers/irqchip/irqchip.h" + /* * This is the 'legacy' 8259A Programmable Interrupt Controller, * present in the majority of PC/AT boxes. @@ -327,7 +330,7 @@ static struct irq_domain_ops i8259A_ops = { * driver compatibility reasons interrupts 0 - 15 to be the i8259 * interrupts even if the hardware uses a different interrupt numbering. */ -void __init init_i8259_irqs(void) +struct irq_domain * __init __init_i8259_irqs(struct device_node *node) { struct irq_domain *domain; @@ -336,10 +339,46 @@ void __init init_i8259_irqs(void) init_8259A(0); - domain = irq_domain_add_legacy(NULL, 16, I8259A_IRQ_BASE, 0, + domain = irq_domain_add_legacy(node, 16, I8259A_IRQ_BASE, 0, &i8259A_ops, NULL); if (!domain) panic("Failed to add i8259 IRQ domain"); setup_irq(I8259A_IRQ_BASE + PIC_CASCADE_IR, &irq2); + return domain; +} + +void __init init_i8259_irqs(void) +{ + __init_i8259_irqs(NULL); +} + +static void i8259_irq_dispatch(unsigned int irq, struct irq_desc *desc) +{ + struct irq_domain *domain = irq_get_handler_data(irq); + int hwirq = i8259_irq(); + + if (hwirq < 0) + return; + + irq = irq_linear_revmap(domain, hwirq); + generic_handle_irq(irq); +} + +int __init i8259_of_init(struct device_node *node, struct device_node *parent) +{ + struct irq_domain *domain; + unsigned int parent_irq; + + parent_irq = irq_of_parse_and_map(node, 0); + if (!parent_irq) { + pr_err("Failed to map i8259 parent IRQ\n"); + return -ENODEV; + } + + domain = __init_i8259_irqs(node); + irq_set_handler_data(parent_irq, domain); + irq_set_chained_handler(parent_irq, i8259_irq_dispatch); + return 0; } +IRQCHIP_DECLARE(i8259, "intel,i8259", i8259_of_init); diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 3c8a18a00a65..8eb5af805964 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -25,48 +25,6 @@ #include <linux/atomic.h> #include <asm/uaccess.h> -#ifdef CONFIG_KGDB -int kgdb_early_setup; -#endif - -static DECLARE_BITMAP(irq_map, NR_IRQS); - -int allocate_irqno(void) -{ - int irq; - -again: - irq = find_first_zero_bit(irq_map, NR_IRQS); - - if (irq >= NR_IRQS) - return -ENOSPC; - - if (test_and_set_bit(irq, irq_map)) - goto again; - - return irq; -} - -/* - * Allocate the 16 legacy interrupts for i8259 devices. This happens early - * in the kernel initialization so treating allocation failure as BUG() is - * ok. - */ -void __init alloc_legacy_irqno(void) -{ - int i; - - for (i = 0; i <= 16; i++) - BUG_ON(test_and_set_bit(i, irq_map)); -} - -void free_irqno(unsigned int irq) -{ - smp_mb__before_atomic(); - clear_bit(irq, irq_map); - smp_mb__after_atomic(); -} - /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -93,20 +51,10 @@ void __init init_IRQ(void) { int i; -#ifdef CONFIG_KGDB - if (kgdb_early_setup) - return; -#endif - for (i = 0; i < NR_IRQS; i++) irq_set_noprobe(i); arch_init_irq(); - -#ifdef CONFIG_KGDB - if (!kgdb_early_setup) - kgdb_early_setup = 1; -#endif } #ifdef CONFIG_DEBUG_STACKOVERFLOW diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index 7afcc2f22c0d..de63d36af895 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -378,10 +378,6 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, struct kgdb_arch arch_kgdb_ops; -/* - * We use kgdb_early_setup so that functions we need to call now don't - * cause trouble when called again later. - */ int kgdb_arch_init(void) { union mips_instruction insn = { diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index e303cb1ef2f4..b130033838ba 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -18,6 +18,7 @@ #include <linux/of_fdt.h> #include <linux/of_platform.h> +#include <asm/bootinfo.h> #include <asm/page.h> #include <asm/prom.h> diff --git a/arch/mips/kernel/sysrq.c b/arch/mips/kernel/sysrq.c new file mode 100644 index 000000000000..5b539f5fc9d9 --- /dev/null +++ b/arch/mips/kernel/sysrq.c @@ -0,0 +1,77 @@ +/* + * MIPS specific sysrq operations. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/sysrq.h> +#include <linux/workqueue.h> + +#include <asm/cpu-features.h> +#include <asm/mipsregs.h> +#include <asm/tlbdebug.h> + +/* + * Dump TLB entries on all CPUs. + */ + +static DEFINE_SPINLOCK(show_lock); + +static void sysrq_tlbdump_single(void *dummy) +{ + const int field = 2 * sizeof(unsigned long); + unsigned long flags; + + spin_lock_irqsave(&show_lock, flags); + + pr_info("CPU%d:\n", smp_processor_id()); + pr_info("Index : %0x\n", read_c0_index()); + pr_info("Pagemask: %0x\n", read_c0_pagemask()); + pr_info("EntryHi : %0*lx\n", field, read_c0_entryhi()); + pr_info("EntryLo0: %0*lx\n", field, read_c0_entrylo0()); + pr_info("EntryLo1: %0*lx\n", field, read_c0_entrylo1()); + pr_info("Wired : %0x\n", read_c0_wired()); + pr_info("Pagegrain: %0x\n", read_c0_pagegrain()); + if (cpu_has_htw) { + pr_info("PWField : %0*lx\n", field, read_c0_pwfield()); + pr_info("PWSize : %0*lx\n", field, read_c0_pwsize()); + pr_info("PWCtl : %0x\n", read_c0_pwctl()); + } + pr_info("\n"); + dump_tlb_all(); + pr_info("\n"); + + spin_unlock_irqrestore(&show_lock, flags); +} + +#ifdef CONFIG_SMP +static void sysrq_tlbdump_othercpus(struct work_struct *dummy) +{ + smp_call_function(sysrq_tlbdump_single, NULL, 0); +} + +static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus); +#endif + +static void sysrq_handle_tlbdump(int key) +{ + sysrq_tlbdump_single(NULL); +#ifdef CONFIG_SMP + schedule_work(&sysrq_tlbdump); +#endif +} + +static struct sysrq_key_op sysrq_tlbdump_op = { + .handler = sysrq_handle_tlbdump, + .help_msg = "show-tlbs(x)", + .action_msg = "Show TLB entries", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + +static int __init mips_sysrq_init(void) +{ + return register_sysrq_key('x', &sysrq_tlbdump_op); +} +arch_initcall(mips_sysrq_init); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index d2d1c1933bc9..2a7b38ed23f0 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -236,6 +236,7 @@ static void __show_regs(const struct pt_regs *regs) { const int field = 2 * sizeof(unsigned long); unsigned int cause = regs->cp0_cause; + unsigned int exccode; int i; show_regs_print_info(KERN_DEFAULT); @@ -317,10 +318,10 @@ static void __show_regs(const struct pt_regs *regs) } printk("\n"); - printk("Cause : %08x\n", cause); + exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; + printk("Cause : %08x (ExcCode %02x)\n", cause, exccode); - cause = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; - if (1 <= cause && cause <= 5) + if (1 <= exccode && exccode <= 5) printk("BadVA : %0*lx\n", field, regs->cp0_badvaddr); printk("PrId : %08x (%s)\n", read_c0_prid(), @@ -2184,11 +2185,6 @@ void __init trap_init(void) check_wait(); -#if defined(CONFIG_KGDB) - if (kgdb_early_setup) - return; /* Already done */ -#endif - if (cpu_has_veic || cpu_has_vint) { unsigned long size = 0x200 + VECTORSPACING*64; ebase = (unsigned long) diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 3b46f7ce9ca7..07d32a4aea60 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -125,8 +125,14 @@ SECTIONS .exit.data : { EXIT_DATA } - +#ifdef CONFIG_SMP PERCPU_SECTION(1 << CONFIG_MIPS_L1_CACHE_SHIFT) +#endif +#ifdef CONFIG_MIPS_RAW_APPENDED_DTB + __appended_dtb = .; + /* leave space for appended DTB */ + . += 0x100000; +#endif /* * Align to 64K in attempt to eliminate holes before the * .bss..swapper_pg_dir section at the start of .bss. This diff --git a/arch/mips/lib/dump_tlb.c b/arch/mips/lib/dump_tlb.c index 32b9f21bfd85..167f35634709 100644 --- a/arch/mips/lib/dump_tlb.c +++ b/arch/mips/lib/dump_tlb.c @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <linux/mm.h> +#include <asm/hazards.h> #include <asm/mipsregs.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -40,17 +41,20 @@ static inline const char *msk2str(unsigned int mask) return ""; } -#define BARRIER() \ - __asm__ __volatile__( \ - ".set\tnoreorder\n\t" \ - "nop;nop;nop;nop;nop;nop;nop\n\t" \ - ".set\treorder"); - static void dump_tlb(int first, int last) { unsigned long s_entryhi, entryhi, asid; - unsigned long long entrylo0, entrylo1; + unsigned long long entrylo0, entrylo1, pa; unsigned int s_index, s_pagemask, pagemask, c0, c1, i; +#ifdef CONFIG_32BIT + bool xpa = cpu_has_xpa && (read_c0_pagegrain() & PG_ELPA); + int pwidth = xpa ? 11 : 8; + int vwidth = 8; +#else + bool xpa = false; + int pwidth = 11; + int vwidth = 11; +#endif s_pagemask = read_c0_pagemask(); s_entryhi = read_c0_entryhi(); @@ -59,46 +63,74 @@ static void dump_tlb(int first, int last) for (i = first; i <= last; i++) { write_c0_index(i); - BARRIER(); + mtc0_tlbr_hazard(); tlb_read(); - BARRIER(); + tlb_read_hazard(); pagemask = read_c0_pagemask(); entryhi = read_c0_entryhi(); entrylo0 = read_c0_entrylo0(); entrylo1 = read_c0_entrylo1(); - /* Unused entries have a virtual address of CKSEG0. */ - if ((entryhi & ~0x1ffffUL) != CKSEG0 - && (entryhi & 0xff) == asid) { -#ifdef CONFIG_32BIT - int width = 8; -#else - int width = 11; -#endif - /* - * Only print entries in use - */ - printk("Index: %2d pgmask=%s ", i, msk2str(pagemask)); + /* EHINV bit marks entire entry as invalid */ + if (cpu_has_tlbinv && entryhi & MIPS_ENTRYHI_EHINV) + continue; + /* + * Prior to tlbinv, unused entries have a virtual address of + * CKSEG0. + */ + if ((entryhi & ~0x1ffffUL) == CKSEG0) + continue; + /* + * ASID takes effect in absence of G (global) bit. + * We check both G bits, even though architecturally they should + * match one another, because some revisions of the SB1 core may + * leave only a single G bit set after a machine check exception + * due to duplicate TLB entry. + */ + if (!((entrylo0 | entrylo1) & MIPS_ENTRYLO_G) && + (entryhi & 0xff) != asid) + continue; + + /* + * Only print entries in use + */ + printk("Index: %2d pgmask=%s ", i, msk2str(pagemask)); - c0 = (entrylo0 >> 3) & 7; - c1 = (entrylo1 >> 3) & 7; + c0 = (entrylo0 & MIPS_ENTRYLO_C) >> MIPS_ENTRYLO_C_SHIFT; + c1 = (entrylo1 & MIPS_ENTRYLO_C) >> MIPS_ENTRYLO_C_SHIFT; - printk("va=%0*lx asid=%02lx\n", - width, (entryhi & ~0x1fffUL), - entryhi & 0xff); - printk("\t[pa=%0*llx c=%d d=%d v=%d g=%d] ", - width, - (entrylo0 << 6) & PAGE_MASK, c0, - (entrylo0 & 4) ? 1 : 0, - (entrylo0 & 2) ? 1 : 0, - (entrylo0 & 1) ? 1 : 0); - printk("[pa=%0*llx c=%d d=%d v=%d g=%d]\n", - width, - (entrylo1 << 6) & PAGE_MASK, c1, - (entrylo1 & 4) ? 1 : 0, - (entrylo1 & 2) ? 1 : 0, - (entrylo1 & 1) ? 1 : 0); - } + printk("va=%0*lx asid=%02lx\n", + vwidth, (entryhi & ~0x1fffUL), + entryhi & 0xff); + /* RI/XI are in awkward places, so mask them off separately */ + pa = entrylo0 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); + if (xpa) + pa |= (unsigned long long)readx_c0_entrylo0() << 30; + pa = (pa << 6) & PAGE_MASK; + printk("\t["); + if (cpu_has_rixi) + printk("ri=%d xi=%d ", + (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0, + (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0); + printk("pa=%0*llx c=%d d=%d v=%d g=%d] [", + pwidth, pa, c0, + (entrylo0 & MIPS_ENTRYLO_D) ? 1 : 0, + (entrylo0 & MIPS_ENTRYLO_V) ? 1 : 0, + (entrylo0 & MIPS_ENTRYLO_G) ? 1 : 0); + /* RI/XI are in awkward places, so mask them off separately */ + pa = entrylo1 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); + if (xpa) + pa |= (unsigned long long)readx_c0_entrylo1() << 30; + pa = (pa << 6) & PAGE_MASK; + if (cpu_has_rixi) + printk("ri=%d xi=%d ", + (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0, + (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0); + printk("pa=%0*llx c=%d d=%d v=%d g=%d]\n", + pwidth, pa, c1, + (entrylo1 & MIPS_ENTRYLO_D) ? 1 : 0, + (entrylo1 & MIPS_ENTRYLO_V) ? 1 : 0, + (entrylo1 & MIPS_ENTRYLO_G) ? 1 : 0); } printk("\n"); diff --git a/arch/mips/lib/r3k_dump_tlb.c b/arch/mips/lib/r3k_dump_tlb.c index 975a13855116..8e0d3cff8ae4 100644 --- a/arch/mips/lib/r3k_dump_tlb.c +++ b/arch/mips/lib/r3k_dump_tlb.c @@ -14,8 +14,6 @@ #include <asm/pgtable.h> #include <asm/tlbdebug.h> -extern int r3k_have_wired_reg; /* defined in tlb-r3k.c */ - static void dump_tlb(int first, int last) { int i; @@ -35,8 +33,9 @@ static void dump_tlb(int first, int last) entrylo0 = read_c0_entrylo0(); /* Unused entries have a virtual address of KSEG0. */ - if ((entryhi & PAGE_MASK) != KSEG0 - && (entryhi & ASID_MASK) == asid) { + if ((entryhi & PAGE_MASK) != KSEG0 && + (entrylo0 & R3K_ENTRYLO_G || + (entryhi & ASID_MASK) == asid)) { /* * Only print entries in use */ @@ -47,10 +46,10 @@ static void dump_tlb(int first, int last) entryhi & PAGE_MASK, entryhi & ASID_MASK, entrylo0 & PAGE_MASK, - (entrylo0 & (1 << 11)) ? 1 : 0, - (entrylo0 & (1 << 10)) ? 1 : 0, - (entrylo0 & (1 << 9)) ? 1 : 0, - (entrylo0 & (1 << 8)) ? 1 : 0); + (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0); } } printk("\n"); diff --git a/arch/mips/loongson1/Kconfig b/arch/mips/loongson32/Kconfig index a2b796eaf3c3..7704f20529d6 100644 --- a/arch/mips/loongson1/Kconfig +++ b/arch/mips/loongson32/Kconfig @@ -1,4 +1,4 @@ -if MACH_LOONGSON1 +if MACH_LOONGSON32 choice prompt "Machine Type" @@ -10,7 +10,7 @@ config LOONGSON1_LS1B select SYS_HAS_CPU_LOONGSON1B select DMA_NONCOHERENT select BOOT_ELF32 - select IRQ_CPU + select IRQ_MIPS_CPU select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_HIGHMEM @@ -58,4 +58,4 @@ config TIMER_USE_PWM3 endchoice -endif # MACH_LOONGSON1 +endif # MACH_LOONGSON32 diff --git a/arch/mips/loongson1/Makefile b/arch/mips/loongson32/Makefile index 9719c75886f5..5f4bd6e071ca 100644 --- a/arch/mips/loongson1/Makefile +++ b/arch/mips/loongson32/Makefile @@ -2,7 +2,7 @@ # Common code for all Loongson 1 based systems # -obj-$(CONFIG_MACH_LOONGSON1) += common/ +obj-$(CONFIG_MACH_LOONGSON32) += common/ # # Loongson LS1B board diff --git a/arch/mips/loongson1/Platform b/arch/mips/loongson32/Platform index 11863441dea3..ebb6dc290f0a 100644 --- a/arch/mips/loongson1/Platform +++ b/arch/mips/loongson32/Platform @@ -2,6 +2,6 @@ cflags-$(CONFIG_CPU_LOONGSON1) += \ $(call cc-option,-march=mips32r2,-mips32r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS32) \ -Wa,-mips32r2 -Wa,--trap -platform-$(CONFIG_MACH_LOONGSON1) += loongson1/ -cflags-$(CONFIG_MACH_LOONGSON1) += -I$(srctree)/arch/mips/include/asm/mach-loongson1 +platform-$(CONFIG_MACH_LOONGSON32) += loongson32/ +cflags-$(CONFIG_MACH_LOONGSON32) += -I$(srctree)/arch/mips/include/asm/mach-loongson32 load-$(CONFIG_LOONGSON1_LS1B) += 0xffffffff80100000 diff --git a/arch/mips/loongson1/common/Makefile b/arch/mips/loongson32/common/Makefile index 723b4ce3b8f0..723b4ce3b8f0 100644 --- a/arch/mips/loongson1/common/Makefile +++ b/arch/mips/loongson32/common/Makefile diff --git a/arch/mips/loongson1/common/irq.c b/arch/mips/loongson32/common/irq.c index 455a7704a90f..455a7704a90f 100644 --- a/arch/mips/loongson1/common/irq.c +++ b/arch/mips/loongson32/common/irq.c diff --git a/arch/mips/loongson1/common/platform.c b/arch/mips/loongson32/common/platform.c index ddf1d4cbf31e..ddf1d4cbf31e 100644 --- a/arch/mips/loongson1/common/platform.c +++ b/arch/mips/loongson32/common/platform.c diff --git a/arch/mips/loongson1/common/prom.c b/arch/mips/loongson32/common/prom.c index 68600980ea49..68600980ea49 100644 --- a/arch/mips/loongson1/common/prom.c +++ b/arch/mips/loongson32/common/prom.c diff --git a/arch/mips/loongson1/common/reset.c b/arch/mips/loongson32/common/reset.c index c41e4ca56ab4..c41e4ca56ab4 100644 --- a/arch/mips/loongson1/common/reset.c +++ b/arch/mips/loongson32/common/reset.c diff --git a/arch/mips/loongson1/common/setup.c b/arch/mips/loongson32/common/setup.c index 62f41afee241..62f41afee241 100644 --- a/arch/mips/loongson1/common/setup.c +++ b/arch/mips/loongson32/common/setup.c diff --git a/arch/mips/loongson1/common/time.c b/arch/mips/loongson32/common/time.c index df0f850d6a5f..df0f850d6a5f 100644 --- a/arch/mips/loongson1/common/time.c +++ b/arch/mips/loongson32/common/time.c diff --git a/arch/mips/loongson1/ls1b/Makefile b/arch/mips/loongson32/ls1b/Makefile index 891eac482b82..891eac482b82 100644 --- a/arch/mips/loongson1/ls1b/Makefile +++ b/arch/mips/loongson32/ls1b/Makefile diff --git a/arch/mips/loongson1/ls1b/board.c b/arch/mips/loongson32/ls1b/board.c index 58daeea25739..58daeea25739 100644 --- a/arch/mips/loongson1/ls1b/board.c +++ b/arch/mips/loongson32/ls1b/board.c diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson64/Kconfig index 156de85b82cd..497912b38d8e 100644 --- a/arch/mips/loongson/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -1,4 +1,4 @@ -if MACH_LOONGSON +if MACH_LOONGSON64 choice prompt "Machine Type" @@ -15,7 +15,7 @@ config LEMOTE_FULOONG2E select HW_HAS_PCI select I8259 select ISA - select IRQ_CPU + select IRQ_MIPS_CPU select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN @@ -44,7 +44,7 @@ config LEMOTE_MACH2F select HAVE_CLK select HW_HAS_PCI select I8259 - select IRQ_CPU + select IRQ_MIPS_CPU select ISA select SYS_HAS_CPU_LOONGSON2F select SYS_HAS_EARLY_PRINTK @@ -73,7 +73,7 @@ config LOONGSON_MACH3X select ISA select HT_PCI select I8259 - select IRQ_CPU + select IRQ_MIPS_CPU select NR_CPUS_DEFAULT_4 select SYS_HAS_CPU_LOONGSON3 select SYS_HAS_EARLY_PRINTK @@ -155,4 +155,4 @@ config LOONGSON_MC146818 config LEFI_FIRMWARE_INTERFACE bool -endif # MACH_LOONGSON +endif # MACH_LOONGSON64 diff --git a/arch/mips/loongson/Makefile b/arch/mips/loongson64/Makefile index 7429994e7604..4fe3d88fc361 100644 --- a/arch/mips/loongson/Makefile +++ b/arch/mips/loongson64/Makefile @@ -2,7 +2,7 @@ # Common code for all Loongson based systems # -obj-$(CONFIG_MACH_LOONGSON) += common/ +obj-$(CONFIG_MACH_LOONGSON64) += common/ # # Lemote Fuloong mini-PC (Loongson 2E-based) diff --git a/arch/mips/loongson/Platform b/arch/mips/loongson64/Platform index 0ac20eb84ecc..2e48e83d5524 100644 --- a/arch/mips/loongson/Platform +++ b/arch/mips/loongson64/Platform @@ -26,8 +26,8 @@ endif # Loongson Machines' Support # -platform-$(CONFIG_MACH_LOONGSON) += loongson/ -cflags-$(CONFIG_MACH_LOONGSON) += -I$(srctree)/arch/mips/include/asm/mach-loongson -mno-branch-likely +platform-$(CONFIG_MACH_LOONGSON64) += loongson64/ +cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000 load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000 load-$(CONFIG_LOONGSON_MACH3X) += 0xffffffff80200000 diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson64/common/Makefile index f2e8153e44f5..f2e8153e44f5 100644 --- a/arch/mips/loongson/common/Makefile +++ b/arch/mips/loongson64/common/Makefile diff --git a/arch/mips/loongson/common/bonito-irq.c b/arch/mips/loongson64/common/bonito-irq.c index cc0e4fd548e6..cc0e4fd548e6 100644 --- a/arch/mips/loongson/common/bonito-irq.c +++ b/arch/mips/loongson64/common/bonito-irq.c diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c index 72fed003a536..72fed003a536 100644 --- a/arch/mips/loongson/common/cmdline.c +++ b/arch/mips/loongson64/common/cmdline.c diff --git a/arch/mips/loongson/common/cs5536/Makefile b/arch/mips/loongson64/common/cs5536/Makefile index f12e64007347..f12e64007347 100644 --- a/arch/mips/loongson/common/cs5536/Makefile +++ b/arch/mips/loongson64/common/cs5536/Makefile diff --git a/arch/mips/loongson/common/cs5536/cs5536_acc.c b/arch/mips/loongson64/common/cs5536/cs5536_acc.c index ab4d6cc57384..ab4d6cc57384 100644 --- a/arch/mips/loongson/common/cs5536/cs5536_acc.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_acc.c diff --git a/arch/mips/loongson/common/cs5536/cs5536_ehci.c b/arch/mips/loongson64/common/cs5536/cs5536_ehci.c index ec2e360267a8..ec2e360267a8 100644 --- a/arch/mips/loongson/common/cs5536/cs5536_ehci.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_ehci.c diff --git a/arch/mips/loongson/common/cs5536/cs5536_ide.c b/arch/mips/loongson64/common/cs5536/cs5536_ide.c index a73414d9ee51..a73414d9ee51 100644 --- a/arch/mips/loongson/common/cs5536/cs5536_ide.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_ide.c diff --git a/arch/mips/loongson/common/cs5536/cs5536_isa.c b/arch/mips/loongson64/common/cs5536/cs5536_isa.c index 924be39e7733..924be39e7733 100644 --- a/arch/mips/loongson/common/cs5536/cs5536_isa.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_isa.c diff --git a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c index 12c75db23420..12c75db23420 100644 --- a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c diff --git a/arch/mips/loongson/common/cs5536/cs5536_ohci.c b/arch/mips/loongson64/common/cs5536/cs5536_ohci.c index f7c905e50dc4..f7c905e50dc4 100644 --- a/arch/mips/loongson/common/cs5536/cs5536_ohci.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_ohci.c diff --git a/arch/mips/loongson/common/cs5536/cs5536_pci.c b/arch/mips/loongson64/common/cs5536/cs5536_pci.c index b739723205f8..b739723205f8 100644 --- a/arch/mips/loongson/common/cs5536/cs5536_pci.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_pci.c diff --git a/arch/mips/loongson/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c index 2c6b989c1bc4..2c6b989c1bc4 100644 --- a/arch/mips/loongson/common/dma-swiotlb.c +++ b/arch/mips/loongson64/common/dma-swiotlb.c diff --git a/arch/mips/loongson/common/early_printk.c b/arch/mips/loongson64/common/early_printk.c index 6ca632e529dc..6ca632e529dc 100644 --- a/arch/mips/loongson/common/early_printk.c +++ b/arch/mips/loongson64/common/early_printk.c diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson64/common/env.c index 22f04ca2ff3e..22f04ca2ff3e 100644 --- a/arch/mips/loongson/common/env.c +++ b/arch/mips/loongson64/common/env.c diff --git a/arch/mips/loongson/common/init.c b/arch/mips/loongson64/common/init.c index 9b987fe98b5b..9b987fe98b5b 100644 --- a/arch/mips/loongson/common/init.c +++ b/arch/mips/loongson64/common/init.c diff --git a/arch/mips/loongson/common/irq.c b/arch/mips/loongson64/common/irq.c index 687003b19b45..687003b19b45 100644 --- a/arch/mips/loongson/common/irq.c +++ b/arch/mips/loongson64/common/irq.c diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson64/common/machtype.c index f2807bc662a3..f2807bc662a3 100644 --- a/arch/mips/loongson/common/machtype.c +++ b/arch/mips/loongson64/common/machtype.c diff --git a/arch/mips/loongson/common/mem.c b/arch/mips/loongson64/common/mem.c index b01d52473da8..b01d52473da8 100644 --- a/arch/mips/loongson/common/mem.c +++ b/arch/mips/loongson64/common/mem.c diff --git a/arch/mips/loongson/common/pci.c b/arch/mips/loongson64/common/pci.c index 4e2575643781..4e2575643781 100644 --- a/arch/mips/loongson/common/pci.c +++ b/arch/mips/loongson64/common/pci.c diff --git a/arch/mips/loongson/common/platform.c b/arch/mips/loongson64/common/platform.c index 0ed38321a9a2..0ed38321a9a2 100644 --- a/arch/mips/loongson/common/platform.c +++ b/arch/mips/loongson64/common/platform.c diff --git a/arch/mips/loongson/common/pm.c b/arch/mips/loongson64/common/pm.c index a6b67ccfc811..a6b67ccfc811 100644 --- a/arch/mips/loongson/common/pm.c +++ b/arch/mips/loongson64/common/pm.c diff --git a/arch/mips/loongson/common/reset.c b/arch/mips/loongson64/common/reset.c index a60715e11306..a60715e11306 100644 --- a/arch/mips/loongson/common/reset.c +++ b/arch/mips/loongson64/common/reset.c diff --git a/arch/mips/loongson/common/rtc.c b/arch/mips/loongson64/common/rtc.c index b5709af09f7f..b5709af09f7f 100644 --- a/arch/mips/loongson/common/rtc.c +++ b/arch/mips/loongson64/common/rtc.c diff --git a/arch/mips/loongson/common/serial.c b/arch/mips/loongson64/common/serial.c index c23fa1373729..ffefc1cb2612 100644 --- a/arch/mips/loongson/common/serial.c +++ b/arch/mips/loongson64/common/serial.c @@ -11,7 +11,7 @@ */ #include <linux/io.h> -#include <linux/init.h> +#include <linux/module.h> #include <linux/serial_8250.h> #include <asm/bootinfo.h> @@ -108,5 +108,10 @@ static int __init serial_init(void) return platform_device_register(&uart8250_device); } +module_init(serial_init); -device_initcall(serial_init); +static void __init serial_exit(void) +{ + platform_device_unregister(&uart8250_device); +} +module_exit(serial_exit); diff --git a/arch/mips/loongson/common/setup.c b/arch/mips/loongson64/common/setup.c index d477dd6bb326..d477dd6bb326 100644 --- a/arch/mips/loongson/common/setup.c +++ b/arch/mips/loongson64/common/setup.c diff --git a/arch/mips/loongson/common/time.c b/arch/mips/loongson64/common/time.c index e1a5382ad47e..e1a5382ad47e 100644 --- a/arch/mips/loongson/common/time.c +++ b/arch/mips/loongson64/common/time.c diff --git a/arch/mips/loongson/common/uart_base.c b/arch/mips/loongson64/common/uart_base.c index 9de559d58e1f..9de559d58e1f 100644 --- a/arch/mips/loongson/common/uart_base.c +++ b/arch/mips/loongson64/common/uart_base.c diff --git a/arch/mips/loongson/fuloong-2e/Makefile b/arch/mips/loongson64/fuloong-2e/Makefile index b7622720c1ad..b7622720c1ad 100644 --- a/arch/mips/loongson/fuloong-2e/Makefile +++ b/arch/mips/loongson64/fuloong-2e/Makefile diff --git a/arch/mips/loongson/fuloong-2e/irq.c b/arch/mips/loongson64/fuloong-2e/irq.c index ef5ec8f3de5f..ef5ec8f3de5f 100644 --- a/arch/mips/loongson/fuloong-2e/irq.c +++ b/arch/mips/loongson64/fuloong-2e/irq.c diff --git a/arch/mips/loongson/fuloong-2e/reset.c b/arch/mips/loongson64/fuloong-2e/reset.c index da4d2ae2a1f8..da4d2ae2a1f8 100644 --- a/arch/mips/loongson/fuloong-2e/reset.c +++ b/arch/mips/loongson64/fuloong-2e/reset.c diff --git a/arch/mips/loongson/lemote-2f/Makefile b/arch/mips/loongson64/lemote-2f/Makefile index 4f9eaa328a16..4f9eaa328a16 100644 --- a/arch/mips/loongson/lemote-2f/Makefile +++ b/arch/mips/loongson64/lemote-2f/Makefile diff --git a/arch/mips/loongson/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c index 462e34d46b4a..462e34d46b4a 100644 --- a/arch/mips/loongson/lemote-2f/clock.c +++ b/arch/mips/loongson64/lemote-2f/clock.c diff --git a/arch/mips/loongson/lemote-2f/ec_kb3310b.c b/arch/mips/loongson64/lemote-2f/ec_kb3310b.c index 2b666d3a3947..2b666d3a3947 100644 --- a/arch/mips/loongson/lemote-2f/ec_kb3310b.c +++ b/arch/mips/loongson64/lemote-2f/ec_kb3310b.c diff --git a/arch/mips/loongson/lemote-2f/ec_kb3310b.h b/arch/mips/loongson64/lemote-2f/ec_kb3310b.h index 5a3f1860d4d2..5a3f1860d4d2 100644 --- a/arch/mips/loongson/lemote-2f/ec_kb3310b.h +++ b/arch/mips/loongson64/lemote-2f/ec_kb3310b.h diff --git a/arch/mips/loongson/lemote-2f/irq.c b/arch/mips/loongson64/lemote-2f/irq.c index cab5f43e0e29..cab5f43e0e29 100644 --- a/arch/mips/loongson/lemote-2f/irq.c +++ b/arch/mips/loongson64/lemote-2f/irq.c diff --git a/arch/mips/loongson/lemote-2f/machtype.c b/arch/mips/loongson64/lemote-2f/machtype.c index b55e6eece5e0..b55e6eece5e0 100644 --- a/arch/mips/loongson/lemote-2f/machtype.c +++ b/arch/mips/loongson64/lemote-2f/machtype.c diff --git a/arch/mips/loongson/lemote-2f/pm.c b/arch/mips/loongson64/lemote-2f/pm.c index cac4d382ea73..cac4d382ea73 100644 --- a/arch/mips/loongson/lemote-2f/pm.c +++ b/arch/mips/loongson64/lemote-2f/pm.c diff --git a/arch/mips/loongson/lemote-2f/reset.c b/arch/mips/loongson64/lemote-2f/reset.c index a26ca7fcd7e0..a26ca7fcd7e0 100644 --- a/arch/mips/loongson/lemote-2f/reset.c +++ b/arch/mips/loongson64/lemote-2f/reset.c diff --git a/arch/mips/loongson/loongson-3/Makefile b/arch/mips/loongson64/loongson-3/Makefile index 622fead5ebc9..622fead5ebc9 100644 --- a/arch/mips/loongson/loongson-3/Makefile +++ b/arch/mips/loongson64/loongson-3/Makefile diff --git a/arch/mips/loongson/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c index ea13764d0a03..ea13764d0a03 100644 --- a/arch/mips/loongson/loongson-3/cop2-ex.c +++ b/arch/mips/loongson64/loongson-3/cop2-ex.c diff --git a/arch/mips/loongson/loongson-3/hpet.c b/arch/mips/loongson64/loongson-3/hpet.c index 5c21cd3bd339..5c21cd3bd339 100644 --- a/arch/mips/loongson/loongson-3/hpet.c +++ b/arch/mips/loongson64/loongson-3/hpet.c diff --git a/arch/mips/loongson/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c index 0f75b6b3d218..0f75b6b3d218 100644 --- a/arch/mips/loongson/loongson-3/irq.c +++ b/arch/mips/loongson64/loongson-3/irq.c diff --git a/arch/mips/loongson/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 12d14ed48778..12d14ed48778 100644 --- a/arch/mips/loongson/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c diff --git a/arch/mips/loongson/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c index 25a97cc0ee33..25a97cc0ee33 100644 --- a/arch/mips/loongson/loongson-3/platform.c +++ b/arch/mips/loongson64/loongson-3/platform.c diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index 509877c6e9d9..509877c6e9d9 100644 --- a/arch/mips/loongson/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c diff --git a/arch/mips/loongson/loongson-3/smp.h b/arch/mips/loongson64/loongson-3/smp.h index d98ff654b7d7..d98ff654b7d7 100644 --- a/arch/mips/loongson/loongson-3/smp.h +++ b/arch/mips/loongson64/loongson-3/smp.h diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 2e03ab173591..7f660dc67596 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -295,7 +295,7 @@ static void r4k_blast_icache_page_setup(void) static void (*r4k_blast_icache_user_page)(unsigned long addr); -static void __cpuinit r4k_blast_icache_user_page_setup(void) +static void r4k_blast_icache_user_page_setup(void) { unsigned long ic_lsize = cpu_icache_line_size(); diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c index 8d909dbbf37f..596e18458e04 100644 --- a/arch/mips/mm/c-tx39.c +++ b/arch/mips/mm/c-tx39.c @@ -28,8 +28,6 @@ static unsigned long icache_size, dcache_size; /* Size in bytes */ #include <asm/r4kcache.h> -extern int r3k_have_wired_reg; /* in r3k-tlb.c */ - /* This sequence is required to ensure icache is disabled immediately */ #define TX39_STOP_STREAMING() \ __asm__ __volatile__( \ @@ -383,8 +381,6 @@ void tx39_cache_init(void) case CPU_TX3927: default: /* TX39/H2,H3 core (writeback 2way-set-associative cache) */ - r3k_have_wired_reg = 1; - write_c0_wired(0); /* set 8 on reset... */ /* board-dependent init code may set WBON */ __flush_cache_vmap = tx39__flush_cache_vmap; diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 609d1241b0c4..eeaf0245c3b1 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -262,12 +262,13 @@ static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, plat_unmap_dma_mem(dev, dma_addr, size, direction); } -static int mips_dma_map_sg(struct device *dev, struct scatterlist *sg, +static int mips_dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction, struct dma_attrs *attrs) { int i; + struct scatterlist *sg; - for (i = 0; i < nents; i++, sg++) { + for_each_sg(sglist, sg, nents, i) { if (!plat_device_is_coherent(dev)) __dma_sync(sg_page(sg), sg->offset, sg->length, direction); @@ -291,13 +292,14 @@ static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page, return plat_map_dma_mem_page(dev, page) + offset; } -static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg, +static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries, enum dma_data_direction direction, struct dma_attrs *attrs) { int i; + struct scatterlist *sg; - for (i = 0; i < nhwentries; i++, sg++) { + for_each_sg(sglist, sg, nhwentries, i) { if (!plat_device_is_coherent(dev) && direction != DMA_TO_DEVICE) __dma_sync(sg_page(sg), sg->offset, sg->length, @@ -324,26 +326,34 @@ static void mips_dma_sync_single_for_device(struct device *dev, } static void mips_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction direction) + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction) { int i; + struct scatterlist *sg; - if (cpu_needs_post_dma_flush(dev)) - for (i = 0; i < nelems; i++, sg++) + if (cpu_needs_post_dma_flush(dev)) { + for_each_sg(sglist, sg, nelems, i) { __dma_sync(sg_page(sg), sg->offset, sg->length, direction); + } + } plat_post_dma_flush(dev); } static void mips_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction direction) + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction) { int i; + struct scatterlist *sg; - if (!plat_device_is_coherent(dev)) - for (i = 0; i < nelems; i++, sg++) + if (!plat_device_is_coherent(dev)) { + for_each_sg(sglist, sg, nelems, i) { __dma_sync(sg_page(sg), sg->offset, sg->length, direction); + } + } } int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index 4094bbd42adf..2b75b8f880ed 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -36,30 +36,33 @@ extern void build_tlb_refill_handler(void); "nop\n\t" \ ".set pop\n\t") -int r3k_have_wired_reg; /* should be in cpu_data? */ +static int r3k_have_wired_reg; /* Should be in cpu_data? */ /* TLB operations. */ -void local_flush_tlb_all(void) +static void local_flush_tlb_from(int entry) { - unsigned long flags; unsigned long old_ctx; - int entry; - -#ifdef DEBUG_TLB - printk("[tlball]"); -#endif - local_irq_save(flags); old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entrylo0(0); - entry = r3k_have_wired_reg ? read_c0_wired() : 8; - for (; entry < current_cpu_data.tlbsize; entry++) { + while (entry < current_cpu_data.tlbsize) { write_c0_index(entry << 8); write_c0_entryhi((entry | 0x80000) << 12); - BARRIER; + entry++; /* BARRIER */ tlb_write_indexed(); } write_c0_entryhi(old_ctx); +} + +void local_flush_tlb_all(void) +{ + unsigned long flags; + +#ifdef DEBUG_TLB + printk("[tlball]"); +#endif + local_irq_save(flags); + local_flush_tlb_from(r3k_have_wired_reg ? read_c0_wired() : 8); local_irq_restore(flags); } @@ -277,7 +280,13 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, void tlb_init(void) { - local_flush_tlb_all(); - + switch (current_cpu_type()) { + case CPU_TX3922: + case CPU_TX3927: + r3k_have_wired_reg = 1; + write_c0_wired(0); /* Set to 8 on reset... */ + break; + } + local_flush_tlb_from(0); build_tlb_refill_handler(); } diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 08318ecb803a..5037d5868cef 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -423,7 +423,7 @@ int __init has_transparent_hugepage(void) * lifetime of the system */ -int temp_tlb_entry __cpuinitdata; +int temp_tlb_entry; __init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, unsigned long entryhi, unsigned long pagemask) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 97c87027c17f..323d1d302f2b 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -35,7 +35,7 @@ #include <asm/uasm.h> #include <asm/setup.h> -static int __cpuinitdata mips_xpa_disabled; +static int mips_xpa_disabled; static int __init xpa_disable(char *s) { @@ -1608,23 +1608,32 @@ build_pte_present(u32 **p, struct uasm_reloc **r, int pte, int ptr, int scratch, enum label_id lid) { int t = scratch >= 0 ? scratch : pte; + int cur = pte; if (cpu_has_rixi) { if (use_bbit_insns()) { uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); uasm_i_nop(p); } else { - uasm_i_srl(p, t, pte, _PAGE_PRESENT_SHIFT); - uasm_i_andi(p, t, t, 1); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, 1); uasm_il_beqz(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ iPTE_LW(p, pte, ptr); } } else { - uasm_i_srl(p, t, pte, _PAGE_PRESENT_SHIFT); - uasm_i_andi(p, t, t, 3); - uasm_i_xori(p, t, t, 3); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, + (_PAGE_PRESENT | _PAGE_READ) >> _PAGE_PRESENT_SHIFT); + uasm_i_xori(p, t, t, + (_PAGE_PRESENT | _PAGE_READ) >> _PAGE_PRESENT_SHIFT); uasm_il_bnez(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ @@ -1652,10 +1661,16 @@ build_pte_writable(u32 **p, struct uasm_reloc **r, enum label_id lid) { int t = scratch >= 0 ? scratch : pte; + int cur = pte; - uasm_i_srl(p, t, pte, _PAGE_PRESENT_SHIFT); - uasm_i_andi(p, t, t, 5); - uasm_i_xori(p, t, t, 5); + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, + (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT); + uasm_i_xori(p, t, t, + (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT); uasm_il_bnez(p, r, t, lid); if (pte == t) /* You lose the SMP race :-(*/ diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile index 6510ace272d4..ea35587a5c29 100644 --- a/arch/mips/mti-malta/Makefile +++ b/arch/mips/mti-malta/Makefile @@ -5,7 +5,7 @@ # Copyright (C) 2008 Wind River Systems, Inc. # written by Ralf Baechle <[email protected]> # -obj-y := malta-display.o malta-init.o \ +obj-y := malta-display.o malta-dt.o malta-init.o \ malta-int.o malta-memory.o malta-platform.o \ malta-reset.o malta-setup.o malta-time.o diff --git a/arch/mips/mti-malta/malta-dt.c b/arch/mips/mti-malta/malta-dt.c new file mode 100644 index 000000000000..47a22889285f --- /dev/null +++ b/arch/mips/mti-malta/malta-dt.c @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2015 Imagination Technologies + * Author: Paul Burton <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/clk-provider.h> +#include <linux/init.h> +#include <linux/of_fdt.h> +#include <linux/of_platform.h> + +void __init device_tree_init(void) +{ + unflatten_and_copy_device_tree(); +} + +static const struct of_device_id bus_ids[] __initconst = { + { .compatible = "simple-bus", }, + { .compatible = "isa", }, + {}, +}; + +static int __init publish_devices(void) +{ + if (!of_have_populated_dt()) + return 0; + + return of_platform_bus_probe(NULL, bus_ids, NULL); +} +device_initcall(publish_devices); diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index db7c9e5826a6..9d1e7f5ec36c 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/ioport.h> #include <linux/irq.h> +#include <linux/of_fdt.h> #include <linux/pci.h> #include <linux/screen_info.h> #include <linux/time.h> @@ -31,6 +32,7 @@ #include <asm/mips-boards/malta.h> #include <asm/mips-boards/maltaint.h> #include <asm/dma.h> +#include <asm/prom.h> #include <asm/traps.h> #ifdef CONFIG_VT #include <linux/console.h> @@ -249,6 +251,8 @@ void __init plat_mem_setup(void) { unsigned int i; + __dt_setup_arch(__dtb_start); + if (config_enabled(CONFIG_EVA)) /* EVA has already been configured in mach-malta/kernel-init.h */ pr_info("Enhanced Virtual Addressing (EVA) activated\n"); diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index ae74b3a91f5c..8c2771401f54 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,3 +1,3 @@ # MIPS networking code -obj-$(CONFIG_BPF_JIT) += bpf_jit.o +obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_asm.o diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index e23fdf2a9c80..0c4a133f6216 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -20,6 +20,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/types.h> +#include <asm/asm.h> #include <asm/bitops.h> #include <asm/cacheflush.h> #include <asm/cpu-features.h> @@ -28,14 +29,14 @@ #include "bpf_jit.h" /* ABI - * - * s0 1st scratch register - * s1 2nd scratch register - * s2 offset register - * s3 BPF register A - * s4 BPF register X - * s5 *skb - * s6 *scratch memory + * r_skb_hl SKB header length + * r_data SKB data pointer + * r_off Offset + * r_A BPF register A + * r_X BPF register X + * r_skb *skb + * r_M *scratch memory + * r_skb_len SKB length * * On entry (*bpf_func)(*skb, *filter) * a0 = MIPS_R_A0 = skb; @@ -63,44 +64,8 @@ * ---------------------------------------------------- */ -#define RSIZE (sizeof(unsigned long)) #define ptr typeof(unsigned long) -/* ABI specific return values */ -#ifdef CONFIG_32BIT /* O32 */ -#ifdef CONFIG_CPU_LITTLE_ENDIAN -#define r_err MIPS_R_V1 -#define r_val MIPS_R_V0 -#else /* CONFIG_CPU_LITTLE_ENDIAN */ -#define r_err MIPS_R_V0 -#define r_val MIPS_R_V1 -#endif -#else /* N64 */ -#define r_err MIPS_R_V0 -#define r_val MIPS_R_V0 -#endif - -#define r_ret MIPS_R_V0 - -/* - * Use 2 scratch registers to avoid pipeline interlocks. - * There is no overhead during epilogue and prologue since - * any of the $s0-$s6 registers will only be preserved if - * they are going to actually be used. - */ -#define r_s0 MIPS_R_S0 /* scratch reg 1 */ -#define r_s1 MIPS_R_S1 /* scratch reg 2 */ -#define r_off MIPS_R_S2 -#define r_A MIPS_R_S3 -#define r_X MIPS_R_S4 -#define r_skb MIPS_R_S5 -#define r_M MIPS_R_S6 -#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ -#define r_tmp MIPS_R_T7 /* No need to preserve this */ -#define r_zero MIPS_R_ZERO -#define r_sp MIPS_R_SP -#define r_ra MIPS_R_RA - #define SCRATCH_OFF(k) (4 * (k)) /* JIT flags */ @@ -108,13 +73,13 @@ #define SEEN_SREG_SFT (BPF_MEMWORDS + 1) #define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) #define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) -#define SEEN_S0 SEEN_SREG(0) -#define SEEN_S1 SEEN_SREG(1) #define SEEN_OFF SEEN_SREG(2) #define SEEN_A SEEN_SREG(3) #define SEEN_X SEEN_SREG(4) #define SEEN_SKB SEEN_SREG(5) #define SEEN_MEM SEEN_SREG(6) +/* SEEN_SK_DATA also implies skb_hl an skb_len */ +#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) /* Arguments used by JIT */ #define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ @@ -577,27 +542,13 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) /* Adjust the stack pointer */ emit_stack_offset(-align_sp(offset), ctx); - if (ctx->flags & SEEN_CALL) { - /* Argument save area */ - if (config_enabled(CONFIG_64BIT)) - /* Bottom of current frame */ - real_off = align_sp(offset) - RSIZE; - else - /* Top of previous frame */ - real_off = align_sp(offset) + RSIZE; - emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx); - emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx); - - real_off = 0; - } - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; /* sflags is essentially a bitmap */ while (tmp_flags) { if ((sflags >> i) & 0x1) { emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, ctx); - real_off += RSIZE; + real_off += SZREG; } i++; tmp_flags >>= 1; @@ -606,13 +557,13 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) /* save return address */ if (ctx->flags & SEEN_CALL) { emit_store_stack_reg(r_ra, r_sp, real_off, ctx); - real_off += RSIZE; + real_off += SZREG; } /* Setup r_M leaving the alignment gap if necessary */ if (ctx->flags & SEEN_MEM) { - if (real_off % (RSIZE * 2)) - real_off += RSIZE; + if (real_off % (SZREG * 2)) + real_off += SZREG; emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); } } @@ -623,19 +574,6 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx, int i, real_off = 0; u32 sflags, tmp_flags; - if (ctx->flags & SEEN_CALL) { - if (config_enabled(CONFIG_64BIT)) - /* Bottom of current frame */ - real_off = align_sp(offset) - RSIZE; - else - /* Top of previous frame */ - real_off = align_sp(offset) + RSIZE; - emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx); - emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx); - - real_off = 0; - } - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; /* sflags is a bitmap */ i = 0; @@ -643,7 +581,7 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx, if ((sflags >> i) & 0x1) { emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, ctx); - real_off += RSIZE; + real_off += SZREG; } i++; tmp_flags >>= 1; @@ -663,23 +601,13 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx) /* How may s* regs do we need to preserved? */ - sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE; + sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; if (ctx->flags & SEEN_MEM) sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ if (ctx->flags & SEEN_CALL) - /* - * The JIT code make calls to external functions using 2 - * arguments. Therefore, for o32 we don't need to allocate - * space because we don't care if the argumetns are lost - * across calls. We do need however to preserve incoming - * arguments but the space is already allocated for us by - * the caller. On the other hand, for n64, we need to allocate - * this space ourselves. We need to preserve $ra as well. - */ - sp_off += config_enabled(CONFIG_64BIT) ? - (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE; + sp_off += SZREG; /* Space for our ra register */ return sp_off; } @@ -696,6 +624,19 @@ static void build_prologue(struct jit_ctx *ctx) if (ctx->flags & SEEN_SKB) emit_reg_move(r_skb, MIPS_R_A0, ctx); + if (ctx->flags & SEEN_SKB_DATA) { + /* Load packet length */ + emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), + ctx); + emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), + ctx); + /* Load the data pointer */ + emit_load_ptr(r_skb_data, r_skb, + offsetof(struct sk_buff, data), ctx); + /* Load the header length */ + emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); + } + if (ctx->flags & SEEN_X) emit_jit_reg_move(r_X, r_zero, ctx); @@ -718,43 +659,17 @@ static void build_epilogue(struct jit_ctx *ctx) emit_nop(ctx); } -static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) -{ - u8 ret; - int err; - - err = skb_copy_bits(skb, offset, &ret, 1); - - return (u64)err << 32 | ret; -} - -static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) -{ - u16 ret; - int err; - - err = skb_copy_bits(skb, offset, &ret, 2); - - return (u64)err << 32 | ntohs(ret); -} - -static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) -{ - u32 ret; - int err; - - err = skb_copy_bits(skb, offset, &ret, 4); - - return (u64)err << 32 | ntohl(ret); -} +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ + func##_positive) static int build_body(struct jit_ctx *ctx) { - void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; const struct bpf_prog *prog = ctx->skf; const struct sock_filter *inst; - unsigned int i, off, load_order, condt; + unsigned int i, off, condt; u32 k, b_off __maybe_unused; + u8 (*sk_load_func)(unsigned long *skb, int offset); for (i = 0; i < prog->len; i++) { u16 code; @@ -788,71 +703,46 @@ static int build_body(struct jit_ctx *ctx) break; case BPF_LD | BPF_W | BPF_ABS: /* A <- P[k:4] */ - load_order = 2; + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); goto load; case BPF_LD | BPF_H | BPF_ABS: /* A <- P[k:2] */ - load_order = 1; + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); goto load; case BPF_LD | BPF_B | BPF_ABS: /* A <- P[k:1] */ - load_order = 0; + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); load: - /* the interpreter will deal with the negative K */ - if ((int)k < 0) - return -ENOTSUPP; - emit_load_imm(r_off, k, ctx); load_common: - /* - * We may got here from the indirect loads so - * return if offset is negative. - */ - emit_slt(r_s0, r_off, r_zero, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, - b_imm(prog->len, ctx), ctx); - emit_reg_move(r_ret, r_zero, ctx); - - ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 | - SEEN_SKB | SEEN_A; + ctx->flags |= SEEN_CALL | SEEN_OFF | + SEEN_SKB | SEEN_A | SEEN_SKB_DATA; - emit_load_func(r_s0, (ptr)load_func[load_order], - ctx); + emit_load_func(r_s0, (ptr)sk_load_func, ctx); emit_reg_move(MIPS_R_A0, r_skb, ctx); emit_jalr(MIPS_R_RA, r_s0, ctx); /* Load second argument to delay slot */ emit_reg_move(MIPS_R_A1, r_off, ctx); /* Check the error value */ - if (config_enabled(CONFIG_64BIT)) { - /* Get error code from the top 32-bits */ - emit_dsrl32(r_s0, r_val, 0, ctx); - /* Branch to 3 instructions ahead */ - emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2, - ctx); - } else { - /* Branch to 3 instructions ahead */ - emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2, - ctx); - } - emit_nop(ctx); - /* We are good */ - emit_b(b_imm(i + 1, ctx), ctx); - emit_jit_reg_move(r_A, r_val, ctx); + emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), + ctx); + /* Load return register on DS for failures */ + emit_reg_move(r_ret, r_zero, ctx); /* Return with error */ emit_b(b_imm(prog->len, ctx), ctx); - emit_reg_move(r_ret, r_zero, ctx); + emit_nop(ctx); break; case BPF_LD | BPF_W | BPF_IND: /* A <- P[X + k:4] */ - load_order = 2; + sk_load_func = sk_load_word; goto load_ind; case BPF_LD | BPF_H | BPF_IND: /* A <- P[X + k:2] */ - load_order = 1; + sk_load_func = sk_load_half; goto load_ind; case BPF_LD | BPF_B | BPF_IND: /* A <- P[X + k:1] */ - load_order = 0; + sk_load_func = sk_load_byte; load_ind: ctx->flags |= SEEN_OFF | SEEN_X; emit_addiu(r_off, r_X, k, ctx); @@ -874,14 +764,10 @@ load_ind: emit_load(r_X, r_skb, off, ctx); break; case BPF_LDX | BPF_B | BPF_MSH: - /* the interpreter will deal with the negative K */ - if ((int)k < 0) - return -ENOTSUPP; - /* X <- 4 * (P[k:1] & 0xf) */ - ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB; + ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; /* Load offset to a1 */ - emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx); + emit_load_func(r_s0, (ptr)sk_load_byte, ctx); /* * This may emit two instructions so it may not fit * in the delay slot. So use a0 in the delay slot. @@ -890,25 +776,15 @@ load_ind: emit_jalr(MIPS_R_RA, r_s0, ctx); emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ /* Check the error value */ - if (config_enabled(CONFIG_64BIT)) { - /* Top 32-bits of $v0 on 64-bit */ - emit_dsrl32(r_s0, r_val, 0, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, - 3 << 2, ctx); - } else { - emit_bcond(MIPS_COND_NE, r_err, r_zero, - 3 << 2, ctx); - } - /* No need for delay slot */ + emit_bcond(MIPS_COND_NE, r_ret, 0, + b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_zero, ctx); /* We are good */ /* X <- P[1:K] & 0xf */ - emit_andi(r_X, r_val, 0xf, ctx); + emit_andi(r_X, r_A, 0xf, ctx); /* X << 2 */ emit_b(b_imm(i + 1, ctx), ctx); emit_sll(r_X, r_X, 2, ctx); /* delay slot */ - /* Return with error */ - emit_b(b_imm(prog->len, ctx), ctx); - emit_load_imm(r_ret, 0, ctx); /* delay slot */ break; case BPF_ST: /* M[k] <- A */ @@ -943,7 +819,7 @@ load_ind: case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ /* Load K to scratch register before MUL */ - ctx->flags |= SEEN_A | SEEN_S0; + ctx->flags |= SEEN_A; emit_load_imm(r_s0, k, ctx); emit_mul(r_A, r_A, r_s0, ctx); break; @@ -961,7 +837,7 @@ load_ind: emit_srl(r_A, r_A, k, ctx); break; } - ctx->flags |= SEEN_A | SEEN_S0; + ctx->flags |= SEEN_A; emit_load_imm(r_s0, k, ctx); emit_div(r_A, r_s0, ctx); break; @@ -971,7 +847,7 @@ load_ind: ctx->flags |= SEEN_A; emit_jit_reg_move(r_A, r_zero, ctx); } else { - ctx->flags |= SEEN_A | SEEN_S0; + ctx->flags |= SEEN_A; emit_load_imm(r_s0, k, ctx); emit_mod(r_A, r_s0, ctx); } @@ -982,7 +858,7 @@ load_ind: /* Check if r_X is zero */ emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_imm(prog->len, ctx), ctx); - emit_load_imm(r_val, 0, ctx); /* delay slot */ + emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_div(r_A, r_X, ctx); break; case BPF_ALU | BPF_MOD | BPF_X: @@ -991,7 +867,7 @@ load_ind: /* Check if r_X is zero */ emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_imm(prog->len, ctx), ctx); - emit_load_imm(r_val, 0, ctx); /* delay slot */ + emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_mod(r_A, r_X, ctx); break; case BPF_ALU | BPF_OR | BPF_K: @@ -1085,10 +961,10 @@ jmp_cmp: if ((condt & MIPS_COND_GE) || (condt & MIPS_COND_GT)) { if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_S0 | SEEN_A; + ctx->flags |= SEEN_A; emit_sltiu(r_s0, r_A, k, ctx); } else { /* X */ - ctx->flags |= SEEN_S0 | SEEN_A | + ctx->flags |= SEEN_A | SEEN_X; emit_sltu(r_s0, r_A, r_X, ctx); } @@ -1100,7 +976,7 @@ jmp_cmp: /* A > (K|X) ? scratch = 0 */ if (condt & MIPS_COND_GT) { /* Checking for equality */ - ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X; + ctx->flags |= SEEN_A | SEEN_X; if (condt & MIPS_COND_K) emit_load_imm(r_s0, k, ctx); else @@ -1123,7 +999,7 @@ jmp_cmp: } else { /* A == K|X */ if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_S0 | SEEN_A; + ctx->flags |= SEEN_A; emit_load_imm(r_s0, k, ctx); /* jump true */ b_off = b_imm(i + inst->jt + 1, ctx); @@ -1153,7 +1029,7 @@ jmp_cmp: } break; case BPF_JMP | BPF_JSET | BPF_K: - ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A; + ctx->flags |= SEEN_A; /* pc += (A & K) ? pc -> jt : pc -> jf */ emit_load_imm(r_s1, k, ctx); emit_and(r_s0, r_A, r_s1, ctx); @@ -1167,7 +1043,7 @@ jmp_cmp: emit_nop(ctx); break; case BPF_JMP | BPF_JSET | BPF_X: - ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A; + ctx->flags |= SEEN_X | SEEN_A; /* pc += (A & X) ? pc -> jt : pc -> jf */ emit_and(r_s0, r_A, r_X, ctx); /* jump true */ @@ -1251,7 +1127,7 @@ jmp_cmp: break; case BPF_ANC | SKF_AD_IFINDEX: /* A = skb->dev->ifindex */ - ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0; + ctx->flags |= SEEN_SKB | SEEN_A; off = offsetof(struct sk_buff, dev); /* Load *dev pointer */ emit_load_ptr(r_s0, r_skb, off, ctx); @@ -1278,7 +1154,7 @@ jmp_cmp: break; case BPF_ANC | SKF_AD_VLAN_TAG: case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A; + ctx->flags |= SEEN_SKB | SEEN_A; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); off = offsetof(struct sk_buff, vlan_tci); diff --git a/arch/mips/net/bpf_jit.h b/arch/mips/net/bpf_jit.h index 3a5751b4335a..8f9f54841123 100644 --- a/arch/mips/net/bpf_jit.h +++ b/arch/mips/net/bpf_jit.h @@ -15,9 +15,10 @@ /* Registers used by JIT */ #define MIPS_R_ZERO 0 #define MIPS_R_V0 2 -#define MIPS_R_V1 3 #define MIPS_R_A0 4 #define MIPS_R_A1 5 +#define MIPS_R_T4 12 +#define MIPS_R_T5 13 #define MIPS_R_T6 14 #define MIPS_R_T7 15 #define MIPS_R_S0 16 @@ -41,4 +42,43 @@ #define MIPS_COND_X (0x1 << 5) #define MIPS_COND_K (0x1 << 6) +#define r_ret MIPS_R_V0 + +/* + * Use 2 scratch registers to avoid pipeline interlocks. + * There is no overhead during epilogue and prologue since + * any of the $s0-$s6 registers will only be preserved if + * they are going to actually be used. + */ +#define r_skb_hl MIPS_R_S0 /* skb header length */ +#define r_skb_data MIPS_R_S1 /* skb actual data */ +#define r_off MIPS_R_S2 +#define r_A MIPS_R_S3 +#define r_X MIPS_R_S4 +#define r_skb MIPS_R_S5 +#define r_M MIPS_R_S6 +#define r_skb_len MIPS_R_S7 +#define r_s0 MIPS_R_T4 /* scratch reg 1 */ +#define r_s1 MIPS_R_T5 /* scratch reg 2 */ +#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ +#define r_tmp MIPS_R_T7 /* No need to preserve this */ +#define r_zero MIPS_R_ZERO +#define r_sp MIPS_R_SP +#define r_ra MIPS_R_RA + +#ifndef __ASSEMBLY__ + +/* Declare ASM helpers */ + +#define DECLARE_LOAD_FUNC(func) \ + extern u8 func(unsigned long *skb, int offset); \ + extern u8 func##_negative(unsigned long *skb, int offset); \ + extern u8 func##_positive(unsigned long *skb, int offset) + +DECLARE_LOAD_FUNC(sk_load_word); +DECLARE_LOAD_FUNC(sk_load_half); +DECLARE_LOAD_FUNC(sk_load_byte); + +#endif + #endif /* BPF_JIT_MIPS_OP_H */ diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S new file mode 100644 index 000000000000..e92726099be0 --- /dev/null +++ b/arch/mips/net/bpf_jit_asm.S @@ -0,0 +1,238 @@ +/* + * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF + * compiler. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + * Author: Markos Chandras <[email protected]> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include <asm/asm.h> +#include <asm/regdef.h> +#include "bpf_jit.h" + +/* ABI + * + * r_skb_hl skb header length + * r_skb_data skb data + * r_off(a1) offset register + * r_A BPF register A + * r_X PF register X + * r_skb(a0) *skb + * r_M *scratch memory + * r_skb_le skb length + * r_s0 Scratch register 0 + * r_s1 Scratch register 1 + * + * On entry: + * a0: *skb + * a1: offset (imm or imm + X) + * + * All non-BPF-ABI registers are free for use. On return, we only + * care about r_ret. The BPF-ABI registers are assumed to remain + * unmodified during the entire filter operation. + */ + +#define skb a0 +#define offset a1 +#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ + + /* We know better :) so prevent assembler reordering etc */ + .set noreorder + +#define is_offset_negative(TYPE) \ + /* If offset is negative we have more work to do */ \ + slti t0, offset, 0; \ + bgtz t0, bpf_slow_path_##TYPE##_neg; \ + /* Be careful what follows in DS. */ + +#define is_offset_in_header(SIZE, TYPE) \ + /* Reading from header? */ \ + addiu $r_s0, $r_skb_hl, -SIZE; \ + slt t0, $r_s0, offset; \ + bgtz t0, bpf_slow_path_##TYPE; \ + +LEAF(sk_load_word) + is_offset_negative(word) + .globl sk_load_word_positive +sk_load_word_positive: + is_offset_in_header(4, word) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lw $r_A, 0(t1) +#ifdef CONFIG_CPU_LITTLE_ENDIAN + wsbh t0, $r_A + rotr $r_A, t0, 16 +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_word) + +LEAF(sk_load_half) + is_offset_negative(half) + .globl sk_load_half_positive +sk_load_half_positive: + is_offset_in_header(2, half) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lh $r_A, 0(t1) +#ifdef CONFIG_CPU_LITTLE_ENDIAN + wsbh t0, $r_A + seh $r_A, t0 +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_half) + +LEAF(sk_load_byte) + is_offset_negative(byte) + .globl sk_load_byte_positive +sk_load_byte_positive: + is_offset_in_header(1, byte) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lb $r_A, 0(t1) + jr $r_ra + move $r_ret, zero + END(sk_load_byte) + +/* + * call skb_copy_bits: + * (prototype in linux/skbuff.h) + * + * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) + * + * o32 mandates we leave 4 spaces for argument registers in case + * the callee needs to use them. Even though we don't care about + * the argument registers ourselves, we need to allocate that space + * to remain ABI compliant since the callee may want to use that space. + * We also allocate 2 more spaces for $r_ra and our return register (*to). + * + * n64 is a bit different. The *caller* will allocate the space to preserve + * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no + * good reason but it does not matter that much really. + * + * (void *to) is returned in r_s0 + * + */ +#define bpf_slow_path_common(SIZE) \ + /* Quick check. Are we within reasonable boundaries? */ \ + LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ + sltu $r_s0, offset, $r_s1; \ + beqz $r_s0, fault; \ + /* Load 4th argument in DS */ \ + LONG_ADDIU a3, zero, SIZE; \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, skb_copy_bits; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + /* Assign low slot to a2 */ \ + move a2, $r_sp; \ + jalr t0; \ + /* Reset our destination slot (DS but it's ok) */ \ + INT_S zero, (4 * SZREG)($r_sp); \ + /* \ + * skb_copy_bits returns 0 on success and -EFAULT \ + * on error. Our data live in a2. Do not bother with \ + * our data if an error has been returned. \ + */ \ + /* Restore our frame */ \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + INT_L $r_s0, (4 * SZREG)($r_sp); \ + bltz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + move $r_ret, zero; \ + +NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) + bpf_slow_path_common(4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN + wsbh t0, $r_s0 + jr $r_ra + rotr $r_A, t0, 16 +#endif + jr $r_ra + move $r_A, $r_s0 + + END(bpf_slow_path_word) + +NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) + bpf_slow_path_common(2) +#ifdef CONFIG_CPU_LITTLE_ENDIAN + jr $r_ra + wsbh $r_A, $r_s0 +#endif + jr $r_ra + move $r_A, $r_s0 + + END(bpf_slow_path_half) + +NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) + bpf_slow_path_common(1) + jr $r_ra + move $r_A, $r_s0 + + END(bpf_slow_path_byte) + +/* + * Negative entry points + */ + .macro bpf_is_end_of_data + li t0, SKF_LL_OFF + /* Reading link layer data? */ + slt t1, offset, t0 + bgtz t1, fault + /* Be careful what follows in DS. */ + .endm +/* + * call skb_copy_bits: + * (prototype in linux/filter.h) + * + * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, + * int k, unsigned int size) + * + * see above (bpf_slow_path_common) for ABI restrictions + */ +#define bpf_negative_common(SIZE) \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + jalr t0; \ + li a2, SIZE; \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + /* Check return pointer */ \ + beqz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + /* Preserve our pointer */ \ + move $r_s0, v0; \ + /* Set return value */ \ + move $r_ret, zero; \ + +bpf_slow_path_word_neg: + bpf_is_end_of_data +NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) + bpf_negative_common(4) + jr $r_ra + lw $r_A, 0($r_s0) + END(sk_load_word_negative) + +bpf_slow_path_half_neg: + bpf_is_end_of_data +NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) + bpf_negative_common(2) + jr $r_ra + lhu $r_A, 0($r_s0) + END(sk_load_half_negative) + +bpf_slow_path_byte_neg: + bpf_is_end_of_data +NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) + bpf_negative_common(1) + jr $r_ra + lbu $r_A, 0($r_s0) + END(sk_load_byte_negative) + +fault: + jr $r_ra + addiu $r_ret, zero, 1 diff --git a/arch/mips/netlogic/xlr/platform-flash.c b/arch/mips/netlogic/xlr/platform-flash.c index 6d3c727e0ef8..f03131fec41d 100644 --- a/arch/mips/netlogic/xlr/platform-flash.c +++ b/arch/mips/netlogic/xlr/platform-flash.c @@ -78,8 +78,6 @@ static struct platform_device xlr_nor_dev = { .resource = xlr_nor_res, }; -const char *xlr_part_probes[] = { "cmdlinepart", NULL }; - /* * Use "gen_nand" driver for NAND flash * @@ -111,7 +109,6 @@ struct platform_nand_data xlr_nand_data = { .nr_partitions = ARRAY_SIZE(xlr_nand_parts), .chip_delay = 50, .partitions = xlr_nand_parts, - .part_probe_types = xlr_part_probes, }, .ctrl = { .cmd_ctrl = xlr_nand_ctrl, diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 07a18228e63a..dadb30306a0a 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -320,7 +320,7 @@ static int ar2315_pci_host_setup(struct ar2315_pci_ctrl *apc) static void ar2315_pci_irq_handler(unsigned irq, struct irq_desc *desc) { - struct ar2315_pci_ctrl *apc = irq_get_handler_data(irq); + struct ar2315_pci_ctrl *apc = irq_desc_get_handler_data(desc); u32 pending = ar2315_pci_reg_read(apc, AR2315_PCI_ISR) & ar2315_pci_reg_read(apc, AR2315_PCI_IMR); unsigned pci_irq = 0; diff --git a/arch/mips/pci/pci-ar71xx.c b/arch/mips/pci/pci-ar71xx.c index 9e62ad31d4b5..283157f8dc64 100644 --- a/arch/mips/pci/pci-ar71xx.c +++ b/arch/mips/pci/pci-ar71xx.c @@ -232,7 +232,7 @@ static void ar71xx_pci_irq_handler(unsigned int irq, struct irq_desc *desc) void __iomem *base = ath79_reset_base; u32 pending; - apc = irq_get_handler_data(irq); + apc = irq_desc_get_handler_data(desc); pending = __raw_readl(base + AR71XX_RESET_REG_PCI_INT_STATUS) & __raw_readl(base + AR71XX_RESET_REG_PCI_INT_ENABLE); @@ -318,23 +318,13 @@ static void ar71xx_pci_irq_init(struct ar71xx_pci_controller *apc) static void ar71xx_pci_reset(void) { - void __iomem *ddr_base = ath79_ddr_base; - ath79_device_reset_set(AR71XX_RESET_PCI_BUS | AR71XX_RESET_PCI_CORE); mdelay(100); ath79_device_reset_clear(AR71XX_RESET_PCI_BUS | AR71XX_RESET_PCI_CORE); mdelay(100); - __raw_writel(AR71XX_PCI_WIN0_OFFS, ddr_base + AR71XX_DDR_REG_PCI_WIN0); - __raw_writel(AR71XX_PCI_WIN1_OFFS, ddr_base + AR71XX_DDR_REG_PCI_WIN1); - __raw_writel(AR71XX_PCI_WIN2_OFFS, ddr_base + AR71XX_DDR_REG_PCI_WIN2); - __raw_writel(AR71XX_PCI_WIN3_OFFS, ddr_base + AR71XX_DDR_REG_PCI_WIN3); - __raw_writel(AR71XX_PCI_WIN4_OFFS, ddr_base + AR71XX_DDR_REG_PCI_WIN4); - __raw_writel(AR71XX_PCI_WIN5_OFFS, ddr_base + AR71XX_DDR_REG_PCI_WIN5); - __raw_writel(AR71XX_PCI_WIN6_OFFS, ddr_base + AR71XX_DDR_REG_PCI_WIN6); - __raw_writel(AR71XX_PCI_WIN7_OFFS, ddr_base + AR71XX_DDR_REG_PCI_WIN7); - + ath79_ddr_set_pci_windows(); mdelay(100); } diff --git a/arch/mips/pci/pci-ar724x.c b/arch/mips/pci/pci-ar724x.c index a1b7d2a1b0d5..0af362b5af92 100644 --- a/arch/mips/pci/pci-ar724x.c +++ b/arch/mips/pci/pci-ar724x.c @@ -231,7 +231,7 @@ static void ar724x_pci_irq_handler(unsigned int irq, struct irq_desc *desc) void __iomem *base; u32 pending; - apc = irq_get_handler_data(irq); + apc = irq_desc_get_handler_data(desc); base = apc->ctrl_base; pending = __raw_readl(base + AR724X_PCI_REG_INT_STATUS) & diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index ec9be8ca4ada..80fafe646e74 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -134,7 +134,7 @@ static void rt3883_pci_irq_handler(unsigned int irq, struct irq_desc *desc) struct rt3883_pci_controller *rpc; u32 pending; - rpc = irq_get_handler_data(irq); + rpc = irq_desc_get_handler_data(desc); pending = rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIINT) & rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA); diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 7cf91b92e9d1..da301e0a2f1f 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -100,7 +100,7 @@ static void ralink_intc_irq_handler(unsigned int irq, struct irq_desc *desc) u32 pending = rt_intc_r32(INTC_REG_STATUS0); if (pending) { - struct irq_domain *domain = irq_get_handler_data(irq); + struct irq_domain *domain = irq_desc_get_handler_data(desc); generic_handle_irq(irq_find_mapping(domain, __ffs(pending))); } else { spurious_interrupt(); diff --git a/arch/mips/sgi-ip27/Makefile b/arch/mips/sgi-ip27/Makefile index da8f6816d346..ab4affa626c7 100644 --- a/arch/mips/sgi-ip27/Makefile +++ b/arch/mips/sgi-ip27/Makefile @@ -2,9 +2,9 @@ # Makefile for the IP27 specific kernel interface routines under Linux. # -obj-y := ip27-berr.o ip27-irq.o ip27-init.o ip27-klconfig.o ip27-klnuma.o \ - ip27-memory.o ip27-nmi.o ip27-reset.o ip27-timer.o ip27-hubio.o \ - ip27-xtalk.o +obj-y := ip27-berr.o ip27-irq.o ip27-irqno.o ip27-init.o ip27-klconfig.o \ + ip27-klnuma.o ip27-memory.o ip27-nmi.o ip27-reset.o ip27-timer.o \ + ip27-hubio.o ip27-xtalk.o obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o obj-$(CONFIG_PCI) += ip27-irq-pci.o diff --git a/arch/mips/sgi-ip27/ip27-irqno.c b/arch/mips/sgi-ip27/ip27-irqno.c new file mode 100644 index 000000000000..957ab58e1c00 --- /dev/null +++ b/arch/mips/sgi-ip27/ip27-irqno.c @@ -0,0 +1,48 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/types.h> + +#include <asm/barrier.h> + +static DECLARE_BITMAP(irq_map, NR_IRQS); + +int allocate_irqno(void) +{ + int irq; + +again: + irq = find_first_zero_bit(irq_map, NR_IRQS); + + if (irq >= NR_IRQS) + return -ENOSPC; + + if (test_and_set_bit(irq, irq_map)) + goto again; + + return irq; +} + +/* + * Allocate the 16 legacy interrupts for i8259 devices. This happens early + * in the kernel initialization so treating allocation failure as BUG() is + * ok. + */ +void __init alloc_legacy_irqno(void) +{ + int i; + + for (i = 0; i <= 16; i++) + BUG_ON(test_and_set_bit(i, irq_map)); +} + +void free_irqno(unsigned int irq) +{ + smp_mb__before_atomic(); + clear_bit(irq, irq_map); + smp_mb__after_atomic(); +} diff --git a/arch/mips/sibyte/Kconfig b/arch/mips/sibyte/Kconfig index 5fbd3605d24f..a8bb972fd9fd 100644 --- a/arch/mips/sibyte/Kconfig +++ b/arch/mips/sibyte/Kconfig @@ -3,7 +3,7 @@ config SIBYTE_SB1250 select CEVT_SB1250 select CSRC_SB1250 select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select SIBYTE_ENABLE_LDT_IF_PCI select SIBYTE_HAS_ZBUS_PROFILING select SIBYTE_SB1xxx_SOC @@ -13,7 +13,7 @@ config SIBYTE_BCM1120 bool select CEVT_SB1250 select CSRC_SB1250 - select IRQ_CPU + select IRQ_MIPS_CPU select SIBYTE_BCM112X select SIBYTE_HAS_ZBUS_PROFILING select SIBYTE_SB1xxx_SOC @@ -23,7 +23,7 @@ config SIBYTE_BCM1125 select CEVT_SB1250 select CSRC_SB1250 select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select SIBYTE_BCM112X select SIBYTE_HAS_ZBUS_PROFILING select SIBYTE_SB1xxx_SOC @@ -33,7 +33,7 @@ config SIBYTE_BCM1125H select CEVT_SB1250 select CSRC_SB1250 select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select SIBYTE_BCM112X select SIBYTE_ENABLE_LDT_IF_PCI select SIBYTE_HAS_ZBUS_PROFILING @@ -43,7 +43,7 @@ config SIBYTE_BCM112X bool select CEVT_SB1250 select CSRC_SB1250 - select IRQ_CPU + select IRQ_MIPS_CPU select SIBYTE_SB1xxx_SOC select SIBYTE_HAS_ZBUS_PROFILING @@ -52,7 +52,7 @@ config SIBYTE_BCM1x80 select CEVT_BCM1480 select CSRC_BCM1480 select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select SIBYTE_HAS_ZBUS_PROFILING select SIBYTE_SB1xxx_SOC select SYS_SUPPORTS_SMP @@ -62,7 +62,7 @@ config SIBYTE_BCM1x55 select CEVT_BCM1480 select CSRC_BCM1480 select HW_HAS_PCI - select IRQ_CPU + select IRQ_MIPS_CPU select SIBYTE_SB1xxx_SOC select SIBYTE_HAS_ZBUS_PROFILING select SYS_SUPPORTS_SMP @@ -70,7 +70,7 @@ config SIBYTE_BCM1x55 config SIBYTE_SB1xxx_SOC bool select DMA_COHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select SWAP_IO_SPACE select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig index 6d40bc783459..8c337d60f790 100644 --- a/arch/mips/txx9/Kconfig +++ b/arch/mips/txx9/Kconfig @@ -8,7 +8,7 @@ config MACH_TX49XX select MACH_TXX9 select CEVT_R4K select CSRC_R4K - select IRQ_CPU + select IRQ_MIPS_CPU select SYS_HAS_CPU_TX49XX select SYS_SUPPORTS_64BIT_KERNEL diff --git a/arch/mips/vr41xx/Kconfig b/arch/mips/vr41xx/Kconfig index c1be6b37fb2a..74927b4d4f0b 100644 --- a/arch/mips/vr41xx/Kconfig +++ b/arch/mips/vr41xx/Kconfig @@ -8,7 +8,7 @@ config CASIO_E55 select CEVT_R4K select CSRC_R4K select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select ISA select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN @@ -18,7 +18,7 @@ config IBM_WORKPAD select CEVT_R4K select CSRC_R4K select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select ISA select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN @@ -28,7 +28,7 @@ config TANBAC_TB022X select CEVT_R4K select CSRC_R4K select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select HW_HAS_PCI select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_LITTLE_ENDIAN @@ -44,7 +44,7 @@ config VICTOR_MPC30X select CEVT_R4K select CSRC_R4K select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select HW_HAS_PCI select PCI_VR41XX select SYS_SUPPORTS_32BIT_KERNEL @@ -55,7 +55,7 @@ config ZAO_CAPCELLA select CEVT_R4K select CSRC_R4K select DMA_NONCOHERENT - select IRQ_CPU + select IRQ_MIPS_CPU select HW_HAS_PCI select PCI_VR41XX select SYS_SUPPORTS_32BIT_KERNEL diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um index 6e67847f5272..28a9885e3a37 100644 --- a/arch/um/Kconfig.um +++ b/arch/um/Kconfig.um @@ -44,23 +44,9 @@ config HOSTFS If you'd like to be able to work with files stored on the host, say Y or M here; otherwise say N. -config HPPFS - tristate "HoneyPot ProcFS" - depends on PROC_FS - help - hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc - entries to be overridden, removed, or fabricated from the host. - Its purpose is to allow a UML to appear to be a physical machine - by removing or changing anything in /proc which gives away the - identity of a UML. - - See <http://user-mode-linux.sf.net/old/hppfs.html> for more information. - - You only need this if you are setting up a UML honeypot. Otherwise, - it is safe to say 'N' here. - config MCONSOLE bool "Management console" + depends on PROC_FS default y help The user mode linux management console is a low-level interface to diff --git a/arch/um/Makefile b/arch/um/Makefile index 17d4460b1af3..098ab3333e7c 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -68,9 +68,10 @@ KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ KBUILD_AFLAGS += $(ARCH_INCLUDE) -USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\ - $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \ - $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include +USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ + $(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \ + -D_FILE_OFFSET_BITS=64 -idirafter include \ + -D__KERNEL__ -D__UM_HOST__ #This will adjust *FLAGS accordingly to the platform. include $(ARCH_DIR)/Makefile-os-$(OS) diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c index f99b32a4dbff..3aa8b0d52a48 100644 --- a/arch/um/drivers/harddog_user.c +++ b/arch/um/drivers/harddog_user.c @@ -9,8 +9,8 @@ #include <os.h> struct dog_data { - int stdin; - int stdout; + int stdin_fd; + int stdout_fd; int close_me[2]; }; @@ -18,11 +18,11 @@ static void pre_exec(void *d) { struct dog_data *data = d; - dup2(data->stdin, 0); - dup2(data->stdout, 1); - dup2(data->stdout, 2); - close(data->stdin); - close(data->stdout); + dup2(data->stdin_fd, 0); + dup2(data->stdout_fd, 1); + dup2(data->stdout_fd, 2); + close(data->stdin_fd); + close(data->stdout_fd); close(data->close_me[0]); close(data->close_me[1]); } @@ -49,8 +49,8 @@ int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) goto out_close_in; } - data.stdin = out_fds[0]; - data.stdout = in_fds[1]; + data.stdin_fd = out_fds[0]; + data.stdout_fd = in_fds[1]; data.close_me[0] = out_fds[1]; data.close_me[1] = in_fds[0]; diff --git a/arch/um/drivers/mconsole.h b/arch/um/drivers/mconsole.h index 8b22535c62ce..44af7379ea19 100644 --- a/arch/um/drivers/mconsole.h +++ b/arch/um/drivers/mconsole.h @@ -7,7 +7,7 @@ #ifndef __MCONSOLE_H__ #define __MCONSOLE_H__ -#ifndef __KERNEL__ +#ifdef __UM_HOST__ #include <stdint.h> #define u32 uint32_t #endif diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c index cd14157b556d..e697a4136707 100644 --- a/arch/um/drivers/net_user.c +++ b/arch/um/drivers/net_user.c @@ -166,7 +166,7 @@ int net_sendto(int fd, void *buf, int len, void *to, int sock_len) struct change_pre_exec_data { int close_me; - int stdout; + int stdout_fd; }; static void change_pre_exec(void *arg) @@ -174,7 +174,7 @@ static void change_pre_exec(void *arg) struct change_pre_exec_data *data = arg; close(data->close_me); - dup2(data->stdout, 1); + dup2(data->stdout_fd, 1); } static int change_tramp(char **argv, char *output, int output_len) @@ -189,7 +189,7 @@ static int change_tramp(char **argv, char *output, int output_len) return err; } pe_data.close_me = fds[0]; - pe_data.stdout = fds[1]; + pe_data.stdout_fd = fds[1]; pid = run_helper(change_pre_exec, &pe_data, argv); if (pid > 0) /* Avoid hang as we won't get data in failure case. */ diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c index 55c290d925f3..0d6b66c64a81 100644 --- a/arch/um/drivers/slip_user.c +++ b/arch/um/drivers/slip_user.c @@ -55,8 +55,8 @@ static int set_up_tty(int fd) } struct slip_pre_exec_data { - int stdin; - int stdout; + int stdin_fd; + int stdout_fd; int close_me; }; @@ -64,9 +64,9 @@ static void slip_pre_exec(void *arg) { struct slip_pre_exec_data *data = arg; - if (data->stdin >= 0) - dup2(data->stdin, 0); - dup2(data->stdout, 1); + if (data->stdin_fd >= 0) + dup2(data->stdin_fd, 0); + dup2(data->stdout_fd, 1); if (data->close_me >= 0) close(data->close_me); } @@ -85,8 +85,8 @@ static int slip_tramp(char **argv, int fd) } err = 0; - pe_data.stdin = fd; - pe_data.stdout = fds[1]; + pe_data.stdin_fd = fd; + pe_data.stdout_fd = fds[1]; pe_data.close_me = fds[0]; err = run_helper(slip_pre_exec, &pe_data, argv); if (err < 0) diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c index c999d187abb9..98b6a41a254e 100644 --- a/arch/um/drivers/slirp_user.c +++ b/arch/um/drivers/slirp_user.c @@ -20,18 +20,18 @@ static int slirp_user_init(void *data, void *dev) } struct slirp_pre_exec_data { - int stdin; - int stdout; + int stdin_fd; + int stdout_fd; }; static void slirp_pre_exec(void *arg) { struct slirp_pre_exec_data *data = arg; - if (data->stdin != -1) - dup2(data->stdin, 0); - if (data->stdout != -1) - dup2(data->stdout, 1); + if (data->stdin_fd != -1) + dup2(data->stdin_fd, 0); + if (data->stdout_fd != -1) + dup2(data->stdout_fd, 1); } static int slirp_tramp(char **argv, int fd) @@ -39,8 +39,8 @@ static int slirp_tramp(char **argv, int fd) struct slirp_pre_exec_data pe_data; int pid; - pe_data.stdin = fd; - pe_data.stdout = fd; + pe_data.stdin_fd = fd; + pe_data.stdout_fd = fd; pid = run_helper(slirp_pre_exec, &pe_data, argv); return pid; diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index b7df3ae9be51..3d63ff6f583f 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -21,7 +21,6 @@ generic-y += param.h generic-y += pci.h generic-y += percpu.h generic-y += preempt.h -generic-y += sections.h generic-y += switch_to.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h index cb9b3c47ca8e..2966adbbdf6c 100644 --- a/arch/um/include/asm/ptrace-generic.h +++ b/arch/um/include/asm/ptrace-generic.h @@ -8,7 +8,6 @@ #ifndef __ASSEMBLY__ -#include <asm/ptrace-abi.h> #include <sysdep/ptrace.h> struct pt_regs { @@ -37,7 +36,7 @@ extern int putreg(struct task_struct *child, int regno, unsigned long value); extern int arch_copy_tls(struct task_struct *new); extern void clear_flushed_tls(struct task_struct *task); -extern void syscall_trace_enter(struct pt_regs *regs); +extern int syscall_trace_enter(struct pt_regs *regs); extern void syscall_trace_leave(struct pt_regs *regs); #endif diff --git a/arch/um/include/asm/sections.h b/arch/um/include/asm/sections.h new file mode 100644 index 000000000000..cafcf684d947 --- /dev/null +++ b/arch/um/include/asm/sections.h @@ -0,0 +1,9 @@ +#ifndef __UM_SECTIONS_H +#define __UM_SECTIONS_H + +#include <asm-generic/sections.h> + +extern char __binary_start[]; +extern char __syscall_stub_start[], __syscall_stub_end[]; + +#endif diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index b30c85b141d9..53968aaf76f9 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -10,7 +10,7 @@ #include <asm/types.h> #include <asm/page.h> -#include <asm/uaccess.h> +#include <asm/segment.h> struct thread_info { struct task_struct *task; /* main task structure */ diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 3f22fbf7ca1d..3705620ca298 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -1,178 +1,52 @@ /* * Copyright (C) 2002 Jeff Dike ([email protected]) + * Copyright (C) 2015 Richard Weinberger ([email protected]) * Licensed under the GPL */ #ifndef __UM_UACCESS_H #define __UM_UACCESS_H -/* thread_info has a mm_segment_t in it, so put the definition up here */ -typedef struct { - unsigned long seg; -} mm_segment_t; - -#include <linux/thread_info.h> -#include <linux/errno.h> -#include <asm/processor.h> +#include <asm/thread_info.h> #include <asm/elf.h> -#define VERIFY_READ 0 -#define VERIFY_WRITE 1 - -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) -#define USER_DS MAKE_MM_SEG(TASK_SIZE) - -#define get_ds() (KERNEL_DS) -#define get_fs() (current_thread_info()->addr_limit) -#define set_fs(x) (current_thread_info()->addr_limit = (x)) - -#define segment_eq(a, b) ((a).seg == (b).seg) - #define __under_task_size(addr, size) \ (((unsigned long) (addr) < TASK_SIZE) && \ (((unsigned long) (addr) + (size)) < TASK_SIZE)) -#define __access_ok_vsyscall(type, addr, size) \ - ((type == VERIFY_READ) && \ - ((unsigned long) (addr) >= FIXADDR_USER_START) && \ +#define __access_ok_vsyscall(addr, size) \ + (((unsigned long) (addr) >= FIXADDR_USER_START) && \ ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ ((unsigned long) (addr) + (size) >= (unsigned long)(addr))) #define __addr_range_nowrap(addr, size) \ ((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) -#define access_ok(type, addr, size) \ - (__addr_range_nowrap(addr, size) && \ - (__under_task_size(addr, size) || \ - __access_ok_vsyscall(type, addr, size) || \ - segment_eq(get_fs(), KERNEL_DS))) - -extern int copy_from_user(void *to, const void __user *from, int n); -extern int copy_to_user(void __user *to, const void *from, int n); - -/* - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ - -extern int strncpy_from_user(char *dst, const char __user *src, int count); - -/* - * __clear_user: - Zero a block of memory in user space, with less checking. - * @to: Destination address, in user space. - * @n: Number of bytes to zero. - * - * Zero a block of memory in user space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be cleared. - * On success, this will be zero. - */ -extern int __clear_user(void __user *mem, int len); - -/* - * clear_user: - Zero a block of memory in user space. - * @to: Destination address, in user space. - * @n: Number of bytes to zero. - * - * Zero a block of memory in user space. - * - * Returns number of bytes that could not be cleared. - * On success, this will be zero. - */ -extern int clear_user(void __user *mem, int len); - -/* - * strlen_user: - Get the size of a string in user space. - * @str: The string to measure. - * @n: The maximum valid length - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * If the string is too long, returns a value greater than @n. - */ -extern int strnlen_user(const void __user *str, int len); - -#define __copy_from_user(to, from, n) copy_from_user(to, from, n) - -#define __copy_to_user(to, from, n) copy_to_user(to, from, n) - +extern long __copy_from_user(void *to, const void __user *from, unsigned long n); +extern long __copy_to_user(void __user *to, const void *from, unsigned long n); +extern long __strncpy_from_user(char *dst, const char __user *src, long count); +extern long __strnlen_user(const void __user *str, long len); +extern unsigned long __clear_user(void __user *mem, unsigned long len); +static inline int __access_ok(unsigned long addr, unsigned long size); + +/* Teach asm-generic/uaccess.h that we have C functions for these. */ +#define __access_ok __access_ok +#define __clear_user __clear_user +#define __copy_to_user __copy_to_user +#define __copy_from_user __copy_from_user +#define __strnlen_user __strnlen_user +#define __strncpy_from_user __strncpy_from_user #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user -#define __get_user(x, ptr) \ -({ \ - const __typeof__(*(ptr)) __user *__private_ptr = (ptr); \ - __typeof__(x) __private_val; \ - int __private_ret = -EFAULT; \ - (x) = (__typeof__(*(__private_ptr)))0; \ - if (__copy_from_user((__force void *)&__private_val, (__private_ptr),\ - sizeof(*(__private_ptr))) == 0) { \ - (x) = (__typeof__(*(__private_ptr))) __private_val; \ - __private_ret = 0; \ - } \ - __private_ret; \ -}) - -#define get_user(x, ptr) \ -({ \ - const __typeof__((*(ptr))) __user *private_ptr = (ptr); \ - (access_ok(VERIFY_READ, private_ptr, sizeof(*private_ptr)) ? \ - __get_user(x, private_ptr) : ((x) = (__typeof__(*ptr))0, -EFAULT)); \ -}) - -#define __put_user(x, ptr) \ -({ \ - __typeof__(*(ptr)) __user *__private_ptr = ptr; \ - __typeof__(*(__private_ptr)) __private_val; \ - int __private_ret = -EFAULT; \ - __private_val = (__typeof__(*(__private_ptr))) (x); \ - if (__copy_to_user((__private_ptr), &__private_val, \ - sizeof(*(__private_ptr))) == 0) { \ - __private_ret = 0; \ - } \ - __private_ret; \ -}) - -#define put_user(x, ptr) \ -({ \ - __typeof__(*(ptr)) __user *private_ptr = (ptr); \ - (access_ok(VERIFY_WRITE, private_ptr, sizeof(*private_ptr)) ? \ - __put_user(x, private_ptr) : -EFAULT); \ -}) - -#define strlen_user(str) strnlen_user(str, ~0U >> 1) +#include <asm-generic/uaccess.h> -struct exception_table_entry +static inline int __access_ok(unsigned long addr, unsigned long size) { - unsigned long insn; - unsigned long fixup; -}; + return __addr_range_nowrap(addr, size) && + (__under_task_size(addr, size) || + __access_ok_vsyscall(addr, size) || + segment_eq(get_fs(), KERNEL_DS)); +} #endif diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h index b3906f860a87..233e2593eee0 100644 --- a/arch/um/include/shared/init.h +++ b/arch/um/include/shared/init.h @@ -40,28 +40,8 @@ typedef int (*initcall_t)(void); typedef void (*exitcall_t)(void); -#ifndef __KERNEL__ -#ifndef __section -# define __section(S) __attribute__ ((__section__(#S))) -#endif - -#if __GNUC__ == 3 - -#if __GNUC_MINOR__ >= 3 -# define __used __attribute__((__used__)) -#else -# define __used __attribute__((__unused__)) -#endif - -#else -#if __GNUC__ == 4 -# define __used __attribute__((__used__)) -#endif -#endif - -#else #include <linux/compiler.h> -#endif + /* These are for everybody (although not all archs will actually discard it in modules) */ #define __init __section(.init.text) @@ -131,7 +111,7 @@ extern struct uml_param __uml_setup_start, __uml_setup_end; #define __uml_postsetup_call __used __section(.uml.postsetup.init) #define __uml_exit_call __used __section(.uml.exitcall.exit) -#ifndef __KERNEL__ +#ifdef __UM_HOST__ #define __define_initcall(level,fn) \ static initcall_t __initcall_##fn __used \ diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index d824528f6f62..ad3fa3ae6d34 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -301,4 +301,6 @@ extern int get_pty(void); /* sys-$ARCH/task_size.c */ extern unsigned long os_get_top_address(void); +long syscall(long number, ...); + #endif diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index cef068563336..4cff19f6207a 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -17,7 +17,7 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) /* This is to get size_t */ -#ifdef __KERNEL__ +#ifndef __UM_HOST__ #include <linux/types.h> #else #include <stddef.h> diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 543c04756939..232b22307fdd 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -42,3 +42,5 @@ EXPORT_SYMBOL(os_makedev); EXPORT_SYMBOL(add_sigio_fd); EXPORT_SYMBOL(ignore_sigio_fd); EXPORT_SYMBOL(sigio_broken); + +EXPORT_SYMBOL(syscall); diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 9034fc8056b4..4c9861b421fd 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -8,6 +8,7 @@ #include <linux/mm.h> #include <linux/pfn.h> #include <asm/page.h> +#include <asm/sections.h> #include <as-layout.h> #include <init.h> #include <kern.h> @@ -55,8 +56,6 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, } } -extern int __syscall_stub_start; - /** * setup_physmem() - Setup physical memory for UML * @start: Start address of the physical kernel memory, @@ -110,8 +109,8 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end, * Special kludge - This page will be mapped in to userspace processes * from physmem_fd, so it needs to be written out there. */ - os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); - os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); + os_seek_file(physmem_fd, __pa(__syscall_stub_start)); + os_write_file(physmem_fd, __syscall_stub_start, PAGE_SIZE); os_fsync_file(physmem_fd); bootmap_size = init_bootmem(pfn, pfn + delta); diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 174ee5017264..6a826cbb15c4 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/tracehook.h> #include <asm/uaccess.h> +#include <asm/ptrace-abi.h> void user_enable_single_step(struct task_struct *child) { @@ -131,7 +132,7 @@ static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check */ -void syscall_trace_enter(struct pt_regs *regs) +int syscall_trace_enter(struct pt_regs *regs) { audit_syscall_entry(UPT_SYSCALL_NR(®s->regs), UPT_SYSCALL_ARG1(®s->regs), @@ -140,9 +141,9 @@ void syscall_trace_enter(struct pt_regs *regs) UPT_SYSCALL_ARG4(®s->regs)); if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; + return 0; - tracehook_report_syscall_entry(regs); + return tracehook_report_syscall_entry(regs); } void syscall_trace_leave(struct pt_regs *regs) diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 94abdcc1d6ad..fda1deba1757 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -8,12 +8,11 @@ #include <linux/slab.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> +#include <asm/sections.h> #include <as-layout.h> #include <os.h> #include <skas.h> -extern int __syscall_stub_start; - static int init_stub_pte(struct mm_struct *mm, unsigned long proc, unsigned long kernel) { @@ -93,7 +92,7 @@ void uml_setup_stubs(struct mm_struct *mm) int err, ret; ret = init_stub_pte(mm, STUB_CODE, - (unsigned long) &__syscall_stub_start); + (unsigned long) __syscall_stub_start); if (ret) goto out; @@ -101,7 +100,7 @@ void uml_setup_stubs(struct mm_struct *mm) if (ret) goto out; - mm->context.stub_pages[0] = virt_to_page(&__syscall_stub_start); + mm->context.stub_pages[0] = virt_to_page(__syscall_stub_start); mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack); /* dup_mmap already holds mmap_sem */ diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index c0681e097432..d9ec0068b623 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -18,7 +18,10 @@ void handle_syscall(struct uml_pt_regs *r) long result; int syscall; - syscall_trace_enter(regs); + if (syscall_trace_enter(regs)) { + result = -ENOSYS; + goto out; + } /* * This should go in the declaration of syscall, but when I do that, @@ -34,6 +37,7 @@ void handle_syscall(struct uml_pt_regs *r) result = -ENOSYS; else result = EXECUTE_SYSCALL(syscall, regs); +out: PT_REGS_SET_SYSCALL_RETURN(regs, result); syscall_trace_leave(regs); diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index 4ffb644d6c07..85ac8adb069b 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -87,10 +87,10 @@ static int do_op_one_page(unsigned long addr, int len, int is_write, return n; } -static int buffer_op(unsigned long addr, int len, int is_write, - int (*op)(unsigned long, int, void *), void *arg) +static long buffer_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long, int, void *), void *arg) { - int size, remain, n; + long size, remain, n; size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); remain = len; @@ -139,18 +139,16 @@ static int copy_chunk_from_user(unsigned long from, int len, void *arg) return 0; } -int copy_from_user(void *to, const void __user *from, int n) +long __copy_from_user(void *to, const void __user *from, unsigned long n) { if (segment_eq(get_fs(), KERNEL_DS)) { memcpy(to, (__force void*)from, n); return 0; } - return access_ok(VERIFY_READ, from, n) ? - buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): - n; + return buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to); } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(__copy_from_user); static int copy_chunk_to_user(unsigned long to, int len, void *arg) { @@ -161,18 +159,16 @@ static int copy_chunk_to_user(unsigned long to, int len, void *arg) return 0; } -int copy_to_user(void __user *to, const void *from, int n) +long __copy_to_user(void __user *to, const void *from, unsigned long n) { if (segment_eq(get_fs(), KERNEL_DS)) { memcpy((__force void *) to, from, n); return 0; } - return access_ok(VERIFY_WRITE, to, n) ? - buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : - n; + return buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from); } -EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(__copy_to_user); static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) { @@ -188,9 +184,9 @@ static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) return 0; } -int strncpy_from_user(char *dst, const char __user *src, int count) +long __strncpy_from_user(char *dst, const char __user *src, long count) { - int n; + long n; char *ptr = dst; if (segment_eq(get_fs(), KERNEL_DS)) { @@ -198,16 +194,13 @@ int strncpy_from_user(char *dst, const char __user *src, int count) return strnlen(dst, count); } - if (!access_ok(VERIFY_READ, src, 1)) - return -EFAULT; - n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, &ptr); if (n != 0) return -EFAULT; return strnlen(dst, count); } -EXPORT_SYMBOL(strncpy_from_user); +EXPORT_SYMBOL(__strncpy_from_user); static int clear_chunk(unsigned long addr, int len, void *unused) { @@ -215,22 +208,16 @@ static int clear_chunk(unsigned long addr, int len, void *unused) return 0; } -int __clear_user(void __user *mem, int len) -{ - return buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL); -} - -int clear_user(void __user *mem, int len) +unsigned long __clear_user(void __user *mem, unsigned long len) { if (segment_eq(get_fs(), KERNEL_DS)) { memset((__force void*)mem, 0, len); return 0; } - return access_ok(VERIFY_WRITE, mem, len) ? - buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len; + return buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL); } -EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(__clear_user); static int strnlen_chunk(unsigned long str, int len, void *arg) { @@ -244,7 +231,7 @@ static int strnlen_chunk(unsigned long str, int len, void *arg) return 0; } -int strnlen_user(const void __user *str, int len) +long __strnlen_user(const void __user *str, long len) { int count = 0, n; @@ -256,4 +243,4 @@ int strnlen_user(const void __user *str, int len) return count + 1; return 0; } -EXPORT_SYMBOL(strnlen_user); +EXPORT_SYMBOL(__strnlen_user); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 47ff9b7f3e5d..557232f758b6 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -220,6 +220,11 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } + else if (!is_user && address < TASK_SIZE) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Kernel tried to access user memory at addr 0x%lx, ip 0x%lx", + address, ip); + } if (SEGV_IS_FIXABLE(&fi)) err = handle_page_fault(address, ip, is_write, is_user, diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 07f798f4bcee..16630e75f056 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -248,8 +248,6 @@ EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) -extern char __binary_start; - int __init linux_main(int argc, char **argv) { unsigned long avail, diff; @@ -294,7 +292,7 @@ int __init linux_main(int argc, char **argv) physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); } - uml_physmem = (unsigned long) &__binary_start & PAGE_MASK; + uml_physmem = (unsigned long) __binary_start & PAGE_MASK; /* Reserve up to 4M after the current brk */ uml_reserved = ROUND_4M(brk_start) + (1 << 22); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c index 14126d9176aa..c2e6e1dad876 100644 --- a/arch/um/os-Linux/drivers/tuntap_user.c +++ b/arch/um/os-Linux/drivers/tuntap_user.c @@ -47,7 +47,7 @@ static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, } struct tuntap_pre_exec_data { - int stdout; + int stdout_fd; int close_me; }; @@ -55,7 +55,7 @@ static void tuntap_pre_exec(void *arg) { struct tuntap_pre_exec_data *data = arg; - dup2(data->stdout, 1); + dup2(data->stdout_fd, 1); close(data->close_me); } @@ -74,7 +74,7 @@ static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, sprintf(version_buf, "%d", UML_NET_VERSION); - data.stdout = remote; + data.stdout_fd = remote; data.close_me = me; pid = run_helper(tuntap_pre_exec, &data, argv); diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 08d90fba952c..26e0164895e4 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -13,6 +13,7 @@ #include <sys/socket.h> #include <sys/stat.h> #include <sys/un.h> +#include <sys/types.h> #include <os.h> static void copy_stat(struct uml_stat *dst, const struct stat64 *src) diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 7b605e4dfffa..036d0dbc7b52 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -112,9 +112,11 @@ void timer_init(void) void set_sigstack(void *sig_stack, int size) { - stack_t stack = ((stack_t) { .ss_flags = 0, - .ss_sp = (__ptr_t) sig_stack, - .ss_size = size - sizeof(void *) }); + stack_t stack = { + .ss_flags = 0, + .ss_sp = sig_stack, + .ss_size = size - sizeof(void *) + }; if (sigaltstack(&stack, NULL) != 0) panic("enabling signal stack failed, errno = %d\n", errno); diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index e7f8c945a573..35015e3e1e87 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -18,7 +18,7 @@ #include <sysdep/ptrace.h> #include <sysdep/stub.h> -extern unsigned long batch_syscall_stub, __syscall_stub_start; +extern char batch_syscall_stub[], __syscall_stub_start[]; extern void wait_stub_done(int pid); @@ -38,8 +38,8 @@ static int __init init_syscall_regs(void) { get_safe_registers(syscall_regs, NULL); syscall_regs[REGS_IP_INDEX] = STUB_CODE + - ((unsigned long) &batch_syscall_stub - - (unsigned long) &__syscall_stub_start); + ((unsigned long) batch_syscall_stub - + (unsigned long) __syscall_stub_start); return 0; } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 7a9777570a62..3dddedba3a07 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -174,7 +174,7 @@ static void handle_trap(int pid, struct uml_pt_regs *regs, handle_syscall(regs); } -extern int __syscall_stub_start; +extern char __syscall_stub_start[]; static int userspace_tramp(void *stack) { @@ -197,7 +197,7 @@ static int userspace_tramp(void *stack) * This has a pte, but it can't be mapped in with the usual * tlb_flush mechanism because this is part of that mechanism */ - fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); + fd = phys_mapping(to_phys(__syscall_stub_start), &offset); addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); if (addr == MAP_FAILED) { @@ -223,7 +223,7 @@ static int userspace_tramp(void *stack) unsigned long v = STUB_CODE + (unsigned long) stub_segv_handler - - (unsigned long) &__syscall_stub_start; + (unsigned long) __syscall_stub_start; set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE); sigemptyset(&sa.sa_mask); @@ -447,7 +447,7 @@ static int __init init_thread_regs(void) /* Set parent's instruction pointer to start of clone-stub */ thread_regs[REGS_IP_INDEX] = STUB_CODE + (unsigned long) stub_clone_handler - - (unsigned long) &__syscall_stub_start; + (unsigned long) __syscall_stub_start; thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE - sizeof(void *); #ifdef __SIGNAL_FRAMESIZE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4fcf0ade7e91..d05a42357ef0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86 select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_PMEM_API select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI @@ -1419,6 +1420,9 @@ source "mm/Kconfig" config X86_PMEM_LEGACY bool "Support non-standard NVDIMMs and ADR protected memory" + depends on PHYS_ADDR_T_64BIT + depends on BLK_DEV + select LIBNVDIMM help Treat memory marked using the non-standard e820 type of 12 as used by the Intel Sandy Bridge-EP reference BIOS as protected memory. diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 48304b89b601..2c82bd150d43 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1224,6 +1224,10 @@ static efi_status_t setup_e820(struct boot_params *params, e820_type = E820_NVS; break; + case EFI_PERSISTENT_MEMORY: + e820_type = E820_PMEM; + break; + default: continue; } diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index b6f7457d12e4..9bf3ea14b9f0 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -4,6 +4,7 @@ /* Caches aren't brain-dead on the intel. */ #include <asm-generic/cacheflush.h> #include <asm/special_insns.h> +#include <asm/uaccess.h> /* * The set_memory_* API can be used to change various attributes of a virtual @@ -108,4 +109,75 @@ static inline int rodata_test(void) } #endif +#ifdef ARCH_HAS_NOCACHE_UACCESS + +/** + * arch_memcpy_to_pmem - copy data to persistent memory + * @dst: destination buffer for the copy + * @src: source buffer for the copy + * @n: length of the copy in bytes + * + * Copy data to persistent memory media via non-temporal stores so that + * a subsequent arch_wmb_pmem() can flush cpu and memory controller + * write buffers to guarantee durability. + */ +static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t n) +{ + int unwritten; + + /* + * We are copying between two kernel buffers, if + * __copy_from_user_inatomic_nocache() returns an error (page + * fault) we would have already reported a general protection fault + * before the WARN+BUG. + */ + unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, + (void __user *) src, n); + if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", + __func__, dst, src, unwritten)) + BUG(); +} + +/** + * arch_wmb_pmem - synchronize writes to persistent memory + * + * After a series of arch_memcpy_to_pmem() operations this drains data + * from cpu write buffers and any platform (memory controller) buffers + * to ensure that written data is durable on persistent memory media. + */ +static inline void arch_wmb_pmem(void) +{ + /* + * wmb() to 'sfence' all previous writes such that they are + * architecturally visible to 'pcommit'. Note, that we've + * already arranged for pmem writes to avoid the cache via + * arch_memcpy_to_pmem(). + */ + wmb(); + pcommit_sfence(); +} + +static inline bool __arch_has_wmb_pmem(void) +{ +#ifdef CONFIG_X86_64 + /* + * We require that wmb() be an 'sfence', that is only guaranteed on + * 64-bit builds + */ + return static_cpu_has(X86_FEATURE_PCOMMIT); +#else + return false; +#endif +} +#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */ +extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n); +extern void arch_wmb_pmem(void); + +static inline bool __arch_has_wmb_pmem(void) +{ + return false; +} +#endif + #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 83ec9b1d77cc..cc9c61bc1abe 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -248,6 +248,12 @@ static inline void flush_write_buffers(void) #endif } +static inline void __pmem *arch_memremap_pmem(resource_size_t offset, + unsigned long size) +{ + return (void __force __pmem *) ioremap_cache(offset, size); +} + #endif /* __KERNEL__ */ extern void native_io_delay(void); diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index 960a8a9dc4ab..0f457e6eab18 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -32,6 +32,7 @@ #define E820_ACPI 3 #define E820_NVS 4 #define E820_UNUSABLE 5 +#define E820_PMEM 7 /* * This is a non-standardized way to represent ADR or NVDIMM regions that diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c8dda42cb6a3..a102564d08eb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -149,6 +149,7 @@ static void __init e820_print_type(u32 type) case E820_UNUSABLE: printk(KERN_CONT "unusable"); break; + case E820_PMEM: case E820_PRAM: printk(KERN_CONT "persistent (type %u)", type); break; @@ -918,11 +919,32 @@ static inline const char *e820_type_to_string(int e820_type) case E820_ACPI: return "ACPI Tables"; case E820_NVS: return "ACPI Non-volatile Storage"; case E820_UNUSABLE: return "Unusable memory"; - case E820_PRAM: return "Persistent RAM"; + case E820_PRAM: return "Persistent Memory (legacy)"; + case E820_PMEM: return "Persistent Memory"; default: return "reserved"; } } +static bool do_mark_busy(u32 type, struct resource *res) +{ + /* this is the legacy bios/dos rom-shadow + mmio region */ + if (res->start < (1ULL<<20)) + return true; + + /* + * Treat persistent memory like device memory, i.e. reserve it + * for exclusive use of a driver + */ + switch (type) { + case E820_RESERVED: + case E820_PRAM: + case E820_PMEM: + return false; + default: + return true; + } +} + /* * Mark e820 reserved areas as busy for the resource manager. */ @@ -952,9 +974,7 @@ void __init e820_reserve_resources(void) * pci device BAR resource and insert them later in * pcibios_resource_survey() */ - if (((e820.map[i].type != E820_RESERVED) && - (e820.map[i].type != E820_PRAM)) || - res->start < (1ULL<<20)) { + if (do_mark_busy(e820.map[i].type, res)) { res->flags |= IORESOURCE_BUSY; insert_resource(&iomem_resource, res); } diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 3420c874ddc5..64f90f53bb85 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -1,53 +1,82 @@ /* * Copyright (c) 2015, Christoph Hellwig. + * Copyright (c) 2015, Intel Corporation. */ -#include <linux/memblock.h> #include <linux/platform_device.h> -#include <linux/slab.h> +#include <linux/libnvdimm.h> +#include <linux/module.h> #include <asm/e820.h> -#include <asm/page_types.h> -#include <asm/setup.h> -static __init void register_pmem_device(struct resource *res) +static void e820_pmem_release(struct device *dev) { - struct platform_device *pdev; - int error; + struct nvdimm_bus *nvdimm_bus = dev->platform_data; - pdev = platform_device_alloc("pmem", PLATFORM_DEVID_AUTO); - if (!pdev) - return; + if (nvdimm_bus) + nvdimm_bus_unregister(nvdimm_bus); +} - error = platform_device_add_resources(pdev, res, 1); - if (error) - goto out_put_pdev; +static struct platform_device e820_pmem = { + .name = "e820_pmem", + .id = -1, + .dev = { + .release = e820_pmem_release, + }, +}; - error = platform_device_add(pdev); - if (error) - goto out_put_pdev; - return; +static const struct attribute_group *e820_pmem_attribute_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; -out_put_pdev: - dev_warn(&pdev->dev, "failed to add 'pmem' (persistent memory) device!\n"); - platform_device_put(pdev); -} +static const struct attribute_group *e820_pmem_region_attribute_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + NULL, +}; -static __init int register_pmem_devices(void) +static __init int register_e820_pmem(void) { - int i; + static struct nvdimm_bus_descriptor nd_desc; + struct device *dev = &e820_pmem.dev; + struct nvdimm_bus *nvdimm_bus; + int rc, i; + + rc = platform_device_register(&e820_pmem); + if (rc) + return rc; + + nd_desc.attr_groups = e820_pmem_attribute_groups; + nd_desc.provider_name = "e820"; + nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); + if (!nvdimm_bus) + goto err; + dev->platform_data = nvdimm_bus; for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; + struct resource res = { + .flags = IORESOURCE_MEM, + .start = ei->addr, + .end = ei->addr + ei->size - 1, + }; + struct nd_region_desc ndr_desc; + + if (ei->type != E820_PRAM) + continue; - if (ei->type == E820_PRAM) { - struct resource res = { - .flags = IORESOURCE_MEM, - .start = ei->addr, - .end = ei->addr + ei->size - 1, - }; - register_pmem_device(&res); - } + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.res = &res; + ndr_desc.attr_groups = e820_pmem_region_attribute_groups; + ndr_desc.numa_node = NUMA_NO_NODE; + if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) + goto err; } return 0; + + err: + dev_err(dev, "failed to register legacy persistent memory ranges\n"); + platform_device_unregister(&e820_pmem); + return -ENXIO; } -device_initcall(register_pmem_devices); +device_initcall(register_e820_pmem); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index c1c382c58c60..cfba30f27392 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -174,6 +174,9 @@ static void __init do_add_efi_memmap(void) case EFI_UNUSABLE_MEMORY: e820_type = E820_UNUSABLE; break; + case EFI_PERSISTENT_MEMORY: + e820_type = E820_PMEM; + break; default: /* * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h index 4b181b74454f..ee940185e89f 100644 --- a/arch/x86/um/asm/checksum.h +++ b/arch/x86/um/asm/checksum.h @@ -3,6 +3,7 @@ #include <linux/string.h> #include <linux/in6.h> +#include <linux/uaccess.h> /* * computes the checksum of a memory block at buff, length len, diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 0a656b727b1a..548197212a45 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -200,8 +200,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_i387_struct elf_fpregset_t; -#define task_pt_regs(t) (&(t)->thread.regs) - struct task_struct; extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu); diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 2a206d2b14ab..233ee09c1ce8 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -28,6 +28,8 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() #define cpu_relax_lowlatency() cpu_relax() +#define task_pt_regs(t) (&(t)->thread.regs) + #include <asm/processor-generic.h> #endif diff --git a/arch/x86/um/asm/segment.h b/arch/x86/um/asm/segment.h index 45183fcd10b6..41dd5e1f3cd7 100644 --- a/arch/x86/um/asm/segment.h +++ b/arch/x86/um/asm/segment.h @@ -7,4 +7,12 @@ extern int host_gdt_entry_tls_min; #define GDT_ENTRY_TLS_MIN host_gdt_entry_tls_min #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) +typedef struct { + unsigned long seg; +} mm_segment_t; + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) +#define KERNEL_DS MAKE_MM_SEG(~0UL) +#define USER_DS MAKE_MM_SEG(TASK_SIZE) + #endif diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 5c0b711d2433..9701a4fd7bf2 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -6,6 +6,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/uaccess.h> #include <asm/unistd.h> #include <os.h> #include <skas.h> diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c index f40281e5d6a2..744afdc18cf3 100644 --- a/arch/x86/um/mem_32.c +++ b/arch/x86/um/mem_32.c @@ -7,8 +7,7 @@ */ #include <linux/mm.h> -#include <asm/page.h> -#include <asm/mman.h> +#include <asm/elf.h> static struct vm_area_struct gate_vma; diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c index f8fecaddcc0d..7642e2e2aa61 100644 --- a/arch/x86/um/mem_64.c +++ b/arch/x86/um/mem_64.c @@ -1,6 +1,5 @@ #include <linux/mm.h> -#include <asm/page.h> -#include <asm/mman.h> +#include <asm/elf.h> const char *arch_vma_name(struct vm_area_struct *vma) { diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index ce3dd4f36f3f..a29756f2d940 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -6,6 +6,7 @@ #include <linux/mm.h> #include <linux/sched.h> #include <asm/uaccess.h> +#include <asm/ptrace-abi.h> #include <skas.h> extern int arch_switch_tls(struct task_struct *to); diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index 3b52bf0b418a..a629694ee750 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -11,6 +11,7 @@ #define __FRAME_OFFSETS #include <asm/ptrace.h> #include <asm/uaccess.h> +#include <asm/ptrace-abi.h> /* * determines which flags the user has access to. diff --git a/arch/x86/um/shared/sysdep/tls.h b/arch/x86/um/shared/sysdep/tls.h index 27cce00c6b30..a682db13df23 100644 --- a/arch/x86/um/shared/sysdep/tls.h +++ b/arch/x86/um/shared/sysdep/tls.h @@ -1,7 +1,7 @@ #ifndef _SYSDEP_TLS_H #define _SYSDEP_TLS_H -# ifndef __KERNEL__ +#ifdef __UM_HOST__ /* Change name to avoid conflicts with the original one from <asm/ldt.h>, which * may be named user_desc (but in 2.4 and in header matching its API was named @@ -22,11 +22,11 @@ typedef struct um_dup_user_desc { #endif } user_desc_t; -# else /* __KERNEL__ */ +#else /* __UM_HOST__ */ typedef struct user_desc user_desc_t; -# endif /* __KERNEL__ */ +#endif /* __UM_HOST__ */ extern int os_set_thread_area(user_desc_t *info, int pid); extern int os_get_thread_area(user_desc_t *info, int pid); diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 592491d1d70d..06934a8a4872 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -541,7 +541,8 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, */ /* x86-64 should always use SA_RESTORER. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) - err |= __put_user(ksig->ka.sa.sa_restorer, &frame->pretcode); + err |= __put_user((void *)ksig->ka.sa.sa_restorer, + &frame->pretcode); else /* could use a vstub here */ return err; diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index adb08eb5c22a..e6552275320b 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -6,6 +6,7 @@ */ #include <linux/sched.h> +#include <linux/uaccess.h> #include <asm/prctl.h> /* XXX This should get the constants from libc */ #include <os.h> diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index 80ffa5b9982d..48e38584d5c1 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/syscalls.h> #include <asm/uaccess.h> +#include <asm/ptrace-abi.h> #include <os.h> #include <skas.h> #include <sysdep/tls.h> diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c index d22363cb854e..3ad714373d7f 100644 --- a/arch/x86/um/tls_64.c +++ b/arch/x86/um/tls_64.c @@ -1,4 +1,5 @@ #include <linux/sched.h> +#include <asm/ptrace-abi.h> void clear_flushed_tls(struct task_struct *task) { diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index 916cda4cd5b4..237c6831e095 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -10,6 +10,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <asm/page.h> +#include <asm/elf.h> #include <linux/init.h> static unsigned int __read_mostly vdso_enabled = 1; diff --git a/crypto/asymmetric_keys/asymmetric_keys.h b/crypto/asymmetric_keys/asymmetric_keys.h index f97330886d58..3f5b537ab33e 100644 --- a/crypto/asymmetric_keys/asymmetric_keys.h +++ b/crypto/asymmetric_keys/asymmetric_keys.h @@ -11,6 +11,9 @@ extern struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id); +extern int __asymmetric_key_hex_to_key_id(const char *id, + struct asymmetric_key_id *match_id, + size_t hexlen); static inline const struct asymmetric_key_ids *asymmetric_key_ids(const struct key *key) { diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index bcbbbd794e1d..b0e4ed23d668 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -104,6 +104,15 @@ static bool asymmetric_match_key_ids( return false; } +/* helper function can be called directly with pre-allocated memory */ +inline int __asymmetric_key_hex_to_key_id(const char *id, + struct asymmetric_key_id *match_id, + size_t hexlen) +{ + match_id->len = hexlen; + return hex2bin(match_id->data, id, hexlen); +} + /** * asymmetric_key_hex_to_key_id - Convert a hex string into a key ID. * @id: The ID as a hex string. @@ -111,21 +120,20 @@ static bool asymmetric_match_key_ids( struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id) { struct asymmetric_key_id *match_id; - size_t hexlen; + size_t asciihexlen; int ret; if (!*id) return ERR_PTR(-EINVAL); - hexlen = strlen(id); - if (hexlen & 1) + asciihexlen = strlen(id); + if (asciihexlen & 1) return ERR_PTR(-EINVAL); - match_id = kmalloc(sizeof(struct asymmetric_key_id) + hexlen / 2, + match_id = kmalloc(sizeof(struct asymmetric_key_id) + asciihexlen / 2, GFP_KERNEL); if (!match_id) return ERR_PTR(-ENOMEM); - match_id->len = hexlen / 2; - ret = hex2bin(match_id->data, id, hexlen / 2); + ret = __asymmetric_key_hex_to_key_id(id, match_id, asciihexlen / 2); if (ret < 0) { kfree(match_id); return ERR_PTR(-EINVAL); diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index a6c42031628e..24f17e6c5904 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -28,17 +28,30 @@ static bool use_builtin_keys; static struct asymmetric_key_id *ca_keyid; #ifndef MODULE +static struct { + struct asymmetric_key_id id; + unsigned char data[10]; +} cakey; + static int __init ca_keys_setup(char *str) { if (!str) /* default system keyring */ return 1; if (strncmp(str, "id:", 3) == 0) { - struct asymmetric_key_id *p; - p = asymmetric_key_hex_to_key_id(str + 3); - if (p == ERR_PTR(-EINVAL)) - pr_err("Unparsable hex string in ca_keys\n"); - else if (!IS_ERR(p)) + struct asymmetric_key_id *p = &cakey.id; + size_t hexlen = (strlen(str) - 3) / 2; + int ret; + + if (hexlen == 0 || hexlen > sizeof(cakey.data)) { + pr_err("Missing or invalid ca_keys id\n"); + return 1; + } + + ret = __asymmetric_key_hex_to_key_id(str + 3, p, hexlen); + if (ret < 0) + pr_err("Unparsable ca_keys id hex string\n"); + else ca_keyid = p; /* owner key 'id:xxxxxx' */ } else if (strcmp(str, "builtin") == 0) { use_builtin_keys = true; diff --git a/drivers/Kconfig b/drivers/Kconfig index c0cc96bab9e7..6e973b8e3a3b 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -182,4 +182,6 @@ source "drivers/thunderbolt/Kconfig" source "drivers/android/Kconfig" +source "drivers/nvdimm/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 9a02fb7c5106..b64b49f6e01b 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ +obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 35da507411a0..f15db002be8e 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -386,6 +386,32 @@ config ACPI_REDUCED_HARDWARE_ONLY If you are unsure what to do, do not enable this option. +config ACPI_NFIT + tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" + depends on PHYS_ADDR_T_64BIT + depends on BLK_DEV + select LIBNVDIMM + help + Infrastructure to probe ACPI 6 compliant platforms for + NVDIMMs (NFIT) and register a libnvdimm device tree. In + addition to storage devices this also enables libnvdimm to pass + ACPI._DSM messages for platform/dimm configuration. + + To compile this driver as a module, choose M here: + the module will be called nfit. + +config ACPI_NFIT_DEBUG + bool "NFIT DSM debug" + depends on ACPI_NFIT + depends on DYNAMIC_DEBUG + default n + help + Enabling this option causes the nfit driver to dump the + input and output buffers of _DSM operations on the ACPI0012 + device and its children. This can be very verbose, so leave + it disabled unless you are debugging a hardware / firmware + issue. + source "drivers/acpi/apei/Kconfig" config ACPI_EXTLOG diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 73d840bef455..8321430d7f24 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o obj-$(CONFIG_ACPI_PROCESSOR) += processor.o obj-y += container.o obj-$(CONFIG_ACPI_THERMAL) += thermal.o +obj-$(CONFIG_ACPI_NFIT) += nfit.o obj-y += acpi_memhotplug.o obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o obj-$(CONFIG_ACPI_BATTERY) += battery.o diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c new file mode 100644 index 000000000000..2161fa178c8d --- /dev/null +++ b/drivers/acpi/nfit.c @@ -0,0 +1,1587 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/list_sort.h> +#include <linux/libnvdimm.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/ndctl.h> +#include <linux/list.h> +#include <linux/acpi.h> +#include <linux/sort.h> +#include <linux/io.h> +#include "nfit.h" + +/* + * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is + * irrelevant. + */ +#include <asm-generic/io-64-nonatomic-hi-lo.h> + +static bool force_enable_dimms; +module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); + +static u8 nfit_uuid[NFIT_UUID_MAX][16]; + +const u8 *to_nfit_uuid(enum nfit_uuids id) +{ + return nfit_uuid[id]; +} +EXPORT_SYMBOL(to_nfit_uuid); + +static struct acpi_nfit_desc *to_acpi_nfit_desc( + struct nvdimm_bus_descriptor *nd_desc) +{ + return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); +} + +static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) +{ + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + + /* + * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct + * acpi_device. + */ + if (!nd_desc->provider_name + || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0) + return NULL; + + return to_acpi_device(acpi_desc->dev); +} + +static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + const struct nd_cmd_desc *desc = NULL; + union acpi_object in_obj, in_buf, *out_obj; + struct device *dev = acpi_desc->dev; + const char *cmd_name, *dimm_name; + unsigned long dsm_mask; + acpi_handle handle; + const u8 *uuid; + u32 offset; + int rc, i; + + if (nvdimm) { + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct acpi_device *adev = nfit_mem->adev; + + if (!adev) + return -ENOTTY; + dimm_name = nvdimm_name(nvdimm); + cmd_name = nvdimm_cmd_name(cmd); + dsm_mask = nfit_mem->dsm_mask; + desc = nd_cmd_dimm_desc(cmd); + uuid = to_nfit_uuid(NFIT_DEV_DIMM); + handle = adev->handle; + } else { + struct acpi_device *adev = to_acpi_dev(acpi_desc); + + cmd_name = nvdimm_bus_cmd_name(cmd); + dsm_mask = nd_desc->dsm_mask; + desc = nd_cmd_bus_desc(cmd); + uuid = to_nfit_uuid(NFIT_DEV_BUS); + handle = adev->handle; + dimm_name = "bus"; + } + + if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) + return -ENOTTY; + + if (!test_bit(cmd, &dsm_mask)) + return -ENOTTY; + + in_obj.type = ACPI_TYPE_PACKAGE; + in_obj.package.count = 1; + in_obj.package.elements = &in_buf; + in_buf.type = ACPI_TYPE_BUFFER; + in_buf.buffer.pointer = buf; + in_buf.buffer.length = 0; + + /* libnvdimm has already validated the input envelope */ + for (i = 0; i < desc->in_num; i++) + in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc, + i, buf); + + if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) { + dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__, + dimm_name, cmd_name, in_buf.buffer.length); + print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, + 4, in_buf.buffer.pointer, min_t(u32, 128, + in_buf.buffer.length), true); + } + + out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj); + if (!out_obj) { + dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, + cmd_name); + return -EINVAL; + } + + if (out_obj->package.type != ACPI_TYPE_BUFFER) { + dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n", + __func__, dimm_name, cmd_name, out_obj->type); + rc = -EINVAL; + goto out; + } + + if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) { + dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, + dimm_name, cmd_name, out_obj->buffer.length); + print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, + 4, out_obj->buffer.pointer, min_t(u32, 128, + out_obj->buffer.length), true); + } + + for (i = 0, offset = 0; i < desc->out_num; i++) { + u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf, + (u32 *) out_obj->buffer.pointer); + + if (offset + out_size > out_obj->buffer.length) { + dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n", + __func__, dimm_name, cmd_name, i); + break; + } + + if (in_buf.buffer.length + offset + out_size > buf_len) { + dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n", + __func__, dimm_name, cmd_name, i); + rc = -ENXIO; + goto out; + } + memcpy(buf + in_buf.buffer.length + offset, + out_obj->buffer.pointer + offset, out_size); + offset += out_size; + } + if (offset + in_buf.buffer.length < buf_len) { + if (i >= 1) { + /* + * status valid, return the number of bytes left + * unfilled in the output buffer + */ + rc = buf_len - offset - in_buf.buffer.length; + } else { + dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", + __func__, dimm_name, cmd_name, buf_len, + offset); + rc = -ENXIO; + } + } else + rc = 0; + + out: + ACPI_FREE(out_obj); + + return rc; +} + +static const char *spa_type_name(u16 type) +{ + static const char *to_name[] = { + [NFIT_SPA_VOLATILE] = "volatile", + [NFIT_SPA_PM] = "pmem", + [NFIT_SPA_DCR] = "dimm-control-region", + [NFIT_SPA_BDW] = "block-data-window", + [NFIT_SPA_VDISK] = "volatile-disk", + [NFIT_SPA_VCD] = "volatile-cd", + [NFIT_SPA_PDISK] = "persistent-disk", + [NFIT_SPA_PCD] = "persistent-cd", + + }; + + if (type > NFIT_SPA_PCD) + return "unknown"; + + return to_name[type]; +} + +static int nfit_spa_type(struct acpi_nfit_system_address *spa) +{ + int i; + + for (i = 0; i < NFIT_UUID_MAX; i++) + if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0) + return i; + return -1; +} + +static bool add_spa(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + struct device *dev = acpi_desc->dev; + struct nfit_spa *nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), + GFP_KERNEL); + + if (!nfit_spa) + return false; + INIT_LIST_HEAD(&nfit_spa->list); + nfit_spa->spa = spa; + list_add_tail(&nfit_spa->list, &acpi_desc->spas); + dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__, + spa->range_index, + spa_type_name(nfit_spa_type(spa))); + return true; +} + +static bool add_memdev(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_memory_map *memdev) +{ + struct device *dev = acpi_desc->dev; + struct nfit_memdev *nfit_memdev = devm_kzalloc(dev, + sizeof(*nfit_memdev), GFP_KERNEL); + + if (!nfit_memdev) + return false; + INIT_LIST_HEAD(&nfit_memdev->list); + nfit_memdev->memdev = memdev; + list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); + dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n", + __func__, memdev->device_handle, memdev->range_index, + memdev->region_index); + return true; +} + +static bool add_dcr(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_control_region *dcr) +{ + struct device *dev = acpi_desc->dev; + struct nfit_dcr *nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), + GFP_KERNEL); + + if (!nfit_dcr) + return false; + INIT_LIST_HEAD(&nfit_dcr->list); + nfit_dcr->dcr = dcr; + list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs); + dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__, + dcr->region_index, dcr->windows); + return true; +} + +static bool add_bdw(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_data_region *bdw) +{ + struct device *dev = acpi_desc->dev; + struct nfit_bdw *nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), + GFP_KERNEL); + + if (!nfit_bdw) + return false; + INIT_LIST_HEAD(&nfit_bdw->list); + nfit_bdw->bdw = bdw; + list_add_tail(&nfit_bdw->list, &acpi_desc->bdws); + dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__, + bdw->region_index, bdw->windows); + return true; +} + +static bool add_idt(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_interleave *idt) +{ + struct device *dev = acpi_desc->dev; + struct nfit_idt *nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), + GFP_KERNEL); + + if (!nfit_idt) + return false; + INIT_LIST_HEAD(&nfit_idt->list); + nfit_idt->idt = idt; + list_add_tail(&nfit_idt->list, &acpi_desc->idts); + dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__, + idt->interleave_index, idt->line_count); + return true; +} + +static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, + const void *end) +{ + struct device *dev = acpi_desc->dev; + struct acpi_nfit_header *hdr; + void *err = ERR_PTR(-ENOMEM); + + if (table >= end) + return NULL; + + hdr = table; + switch (hdr->type) { + case ACPI_NFIT_TYPE_SYSTEM_ADDRESS: + if (!add_spa(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_MEMORY_MAP: + if (!add_memdev(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_CONTROL_REGION: + if (!add_dcr(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_DATA_REGION: + if (!add_bdw(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_INTERLEAVE: + if (!add_idt(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_FLUSH_ADDRESS: + dev_dbg(dev, "%s: flush\n", __func__); + break; + case ACPI_NFIT_TYPE_SMBIOS: + dev_dbg(dev, "%s: smbios\n", __func__); + break; + default: + dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type); + break; + } + + return table + hdr->length; +} + +static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc, + struct nfit_mem *nfit_mem) +{ + u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle; + u16 dcr = nfit_mem->dcr->region_index; + struct nfit_spa *nfit_spa; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + u16 range_index = nfit_spa->spa->range_index; + int type = nfit_spa_type(nfit_spa->spa); + struct nfit_memdev *nfit_memdev; + + if (type != NFIT_SPA_BDW) + continue; + + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { + if (nfit_memdev->memdev->range_index != range_index) + continue; + if (nfit_memdev->memdev->device_handle != device_handle) + continue; + if (nfit_memdev->memdev->region_index != dcr) + continue; + + nfit_mem->spa_bdw = nfit_spa->spa; + return; + } + } + + dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n", + nfit_mem->spa_dcr->range_index); + nfit_mem->bdw = NULL; +} + +static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, + struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa) +{ + u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; + struct nfit_memdev *nfit_memdev; + struct nfit_dcr *nfit_dcr; + struct nfit_bdw *nfit_bdw; + struct nfit_idt *nfit_idt; + u16 idt_idx, range_index; + + list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) { + if (nfit_dcr->dcr->region_index != dcr) + continue; + nfit_mem->dcr = nfit_dcr->dcr; + break; + } + + if (!nfit_mem->dcr) { + dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n", + spa->range_index, __to_nfit_memdev(nfit_mem) + ? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR"); + return -ENODEV; + } + + /* + * We've found enough to create an nvdimm, optionally + * find an associated BDW + */ + list_add(&nfit_mem->list, &acpi_desc->dimms); + + list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) { + if (nfit_bdw->bdw->region_index != dcr) + continue; + nfit_mem->bdw = nfit_bdw->bdw; + break; + } + + if (!nfit_mem->bdw) + return 0; + + nfit_mem_find_spa_bdw(acpi_desc, nfit_mem); + + if (!nfit_mem->spa_bdw) + return 0; + + range_index = nfit_mem->spa_bdw->range_index; + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { + if (nfit_memdev->memdev->range_index != range_index || + nfit_memdev->memdev->region_index != dcr) + continue; + nfit_mem->memdev_bdw = nfit_memdev->memdev; + idt_idx = nfit_memdev->memdev->interleave_index; + list_for_each_entry(nfit_idt, &acpi_desc->idts, list) { + if (nfit_idt->idt->interleave_index != idt_idx) + continue; + nfit_mem->idt_bdw = nfit_idt->idt; + break; + } + break; + } + + return 0; +} + +static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + struct nfit_mem *nfit_mem, *found; + struct nfit_memdev *nfit_memdev; + int type = nfit_spa_type(spa); + u16 dcr; + + switch (type) { + case NFIT_SPA_DCR: + case NFIT_SPA_PM: + break; + default: + return 0; + } + + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { + int rc; + + if (nfit_memdev->memdev->range_index != spa->range_index) + continue; + found = NULL; + dcr = nfit_memdev->memdev->region_index; + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) + if (__to_nfit_memdev(nfit_mem)->region_index == dcr) { + found = nfit_mem; + break; + } + + if (found) + nfit_mem = found; + else { + nfit_mem = devm_kzalloc(acpi_desc->dev, + sizeof(*nfit_mem), GFP_KERNEL); + if (!nfit_mem) + return -ENOMEM; + INIT_LIST_HEAD(&nfit_mem->list); + } + + if (type == NFIT_SPA_DCR) { + struct nfit_idt *nfit_idt; + u16 idt_idx; + + /* multiple dimms may share a SPA when interleaved */ + nfit_mem->spa_dcr = spa; + nfit_mem->memdev_dcr = nfit_memdev->memdev; + idt_idx = nfit_memdev->memdev->interleave_index; + list_for_each_entry(nfit_idt, &acpi_desc->idts, list) { + if (nfit_idt->idt->interleave_index != idt_idx) + continue; + nfit_mem->idt_dcr = nfit_idt->idt; + break; + } + } else { + /* + * A single dimm may belong to multiple SPA-PM + * ranges, record at least one in addition to + * any SPA-DCR range. + */ + nfit_mem->memdev_pmem = nfit_memdev->memdev; + } + + if (found) + continue; + + rc = nfit_mem_add(acpi_desc, nfit_mem, spa); + if (rc) + return rc; + } + + return 0; +} + +static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b) +{ + struct nfit_mem *a = container_of(_a, typeof(*a), list); + struct nfit_mem *b = container_of(_b, typeof(*b), list); + u32 handleA, handleB; + + handleA = __to_nfit_memdev(a)->device_handle; + handleB = __to_nfit_memdev(b)->device_handle; + if (handleA < handleB) + return -1; + else if (handleA > handleB) + return 1; + return 0; +} + +static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + + /* + * For each SPA-DCR or SPA-PMEM address range find its + * corresponding MEMDEV(s). From each MEMDEV find the + * corresponding DCR. Then, if we're operating on a SPA-DCR, + * try to find a SPA-BDW and a corresponding BDW that references + * the DCR. Throw it all into an nfit_mem object. Note, that + * BDWs are optional. + */ + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + int rc; + + rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa); + if (rc) + return rc; + } + + list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp); + + return 0; +} + +static ssize_t revision_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + + return sprintf(buf, "%d\n", acpi_desc->nfit->header.revision); +} +static DEVICE_ATTR_RO(revision); + +static struct attribute *acpi_nfit_attributes[] = { + &dev_attr_revision.attr, + NULL, +}; + +static struct attribute_group acpi_nfit_attribute_group = { + .name = "nfit", + .attrs = acpi_nfit_attributes, +}; + +const struct attribute_group *acpi_nfit_attribute_groups[] = { + &nvdimm_bus_attribute_group, + &acpi_nfit_attribute_group, + NULL, +}; +EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups); + +static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + + return __to_nfit_memdev(nfit_mem); +} + +static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + + return nfit_mem->dcr; +} + +static ssize_t handle_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev); + + return sprintf(buf, "%#x\n", memdev->device_handle); +} +static DEVICE_ATTR_RO(handle); + +static ssize_t phys_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev); + + return sprintf(buf, "%#x\n", memdev->physical_id); +} +static DEVICE_ATTR_RO(phys_id); + +static ssize_t vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->vendor_id); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t rev_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->revision_id); +} +static DEVICE_ATTR_RO(rev_id); + +static ssize_t device_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->device_id); +} +static DEVICE_ATTR_RO(device); + +static ssize_t format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->code); +} +static DEVICE_ATTR_RO(format); + +static ssize_t serial_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->serial_number); +} +static DEVICE_ATTR_RO(serial); + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u16 flags = to_nfit_memdev(dev)->flags; + + return sprintf(buf, "%s%s%s%s%s\n", + flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "", + flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "", + flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "", + flags & ACPI_NFIT_MEM_ARMED ? "arm " : "", + flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart " : ""); +} +static DEVICE_ATTR_RO(flags); + +static struct attribute *acpi_nfit_dimm_attributes[] = { + &dev_attr_handle.attr, + &dev_attr_phys_id.attr, + &dev_attr_vendor.attr, + &dev_attr_device.attr, + &dev_attr_format.attr, + &dev_attr_serial.attr, + &dev_attr_rev_id.attr, + &dev_attr_flags.attr, + NULL, +}; + +static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + if (to_nfit_dcr(dev)) + return a->mode; + else + return 0; +} + +static struct attribute_group acpi_nfit_dimm_attribute_group = { + .name = "nfit", + .attrs = acpi_nfit_dimm_attributes, + .is_visible = acpi_nfit_dimm_attr_visible, +}; + +static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = { + &nvdimm_attribute_group, + &nd_device_attribute_group, + &acpi_nfit_dimm_attribute_group, + NULL, +}; + +static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc, + u32 device_handle) +{ + struct nfit_mem *nfit_mem; + + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) + if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle) + return nfit_mem->nvdimm; + + return NULL; +} + +static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, + struct nfit_mem *nfit_mem, u32 device_handle) +{ + struct acpi_device *adev, *adev_dimm; + struct device *dev = acpi_desc->dev; + const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM); + unsigned long long sta; + int i, rc = -ENODEV; + acpi_status status; + + nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en; + adev = to_acpi_dev(acpi_desc); + if (!adev) + return 0; + + adev_dimm = acpi_find_child_device(adev, device_handle, false); + nfit_mem->adev = adev_dimm; + if (!adev_dimm) { + dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n", + device_handle); + return force_enable_dimms ? 0 : -ENODEV; + } + + status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta); + if (status == AE_NOT_FOUND) { + dev_dbg(dev, "%s missing _STA, assuming enabled...\n", + dev_name(&adev_dimm->dev)); + rc = 0; + } else if (ACPI_FAILURE(status)) + dev_err(dev, "%s failed to retrieve_STA, disabling...\n", + dev_name(&adev_dimm->dev)); + else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0) + dev_info(dev, "%s disabled by firmware\n", + dev_name(&adev_dimm->dev)); + else + rc = 0; + + for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++) + if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i)) + set_bit(i, &nfit_mem->dsm_mask); + + return force_enable_dimms ? 0 : rc; +} + +static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_mem *nfit_mem; + int dimm_count = 0; + + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { + struct nvdimm *nvdimm; + unsigned long flags = 0; + u32 device_handle; + u16 mem_flags; + int rc; + + device_handle = __to_nfit_memdev(nfit_mem)->device_handle; + nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle); + if (nvdimm) { + /* + * If for some reason we find multiple DCRs the + * first one wins + */ + dev_err(acpi_desc->dev, "duplicate DCR detected: %s\n", + nvdimm_name(nvdimm)); + continue; + } + + if (nfit_mem->bdw && nfit_mem->memdev_pmem) + flags |= NDD_ALIASING; + + mem_flags = __to_nfit_memdev(nfit_mem)->flags; + if (mem_flags & ACPI_NFIT_MEM_ARMED) + flags |= NDD_UNARMED; + + rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle); + if (rc) + continue; + + nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, + acpi_nfit_dimm_attribute_groups, + flags, &nfit_mem->dsm_mask); + if (!nvdimm) + return -ENOMEM; + + nfit_mem->nvdimm = nvdimm; + dimm_count++; + + if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0) + continue; + + dev_info(acpi_desc->dev, "%s: failed: %s%s%s%s\n", + nvdimm_name(nvdimm), + mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "", + mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "", + mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "", + mem_flags & ACPI_NFIT_MEM_ARMED ? "arm " : ""); + + } + + return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count); +} + +static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) +{ + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS); + struct acpi_device *adev; + int i; + + adev = to_acpi_dev(acpi_desc); + if (!adev) + return; + + for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++) + if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) + set_bit(i, &nd_desc->dsm_mask); +} + +static ssize_t range_index_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region); + + return sprintf(buf, "%d\n", nfit_spa->spa->range_index); +} +static DEVICE_ATTR_RO(range_index); + +static struct attribute *acpi_nfit_region_attributes[] = { + &dev_attr_range_index.attr, + NULL, +}; + +static struct attribute_group acpi_nfit_region_attribute_group = { + .name = "nfit", + .attrs = acpi_nfit_region_attributes, +}; + +static const struct attribute_group *acpi_nfit_region_attribute_groups[] = { + &nd_region_attribute_group, + &nd_mapping_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + &acpi_nfit_region_attribute_group, + NULL, +}; + +/* enough info to uniquely specify an interleave set */ +struct nfit_set_info { + struct nfit_set_info_map { + u64 region_offset; + u32 serial_number; + u32 pad; + } mapping[0]; +}; + +static size_t sizeof_nfit_set_info(int num_mappings) +{ + return sizeof(struct nfit_set_info) + + num_mappings * sizeof(struct nfit_set_info_map); +} + +static int cmp_map(const void *m0, const void *m1) +{ + const struct nfit_set_info_map *map0 = m0; + const struct nfit_set_info_map *map1 = m1; + + return memcmp(&map0->region_offset, &map1->region_offset, + sizeof(u64)); +} + +/* Retrieve the nth entry referencing this spa */ +static struct acpi_nfit_memory_map *memdev_from_spa( + struct acpi_nfit_desc *acpi_desc, u16 range_index, int n) +{ + struct nfit_memdev *nfit_memdev; + + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) + if (nfit_memdev->memdev->range_index == range_index) + if (n-- == 0) + return nfit_memdev->memdev; + return NULL; +} + +static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, + struct nd_region_desc *ndr_desc, + struct acpi_nfit_system_address *spa) +{ + int i, spa_type = nfit_spa_type(spa); + struct device *dev = acpi_desc->dev; + struct nd_interleave_set *nd_set; + u16 nr = ndr_desc->num_mappings; + struct nfit_set_info *info; + + if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE) + /* pass */; + else + return 0; + + nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); + if (!nd_set) + return -ENOMEM; + + info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); + if (!info) + return -ENOMEM; + for (i = 0; i < nr; i++) { + struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; + struct nfit_set_info_map *map = &info->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, + spa->range_index, i); + + if (!memdev || !nfit_mem->dcr) { + dev_err(dev, "%s: failed to find DCR\n", __func__); + return -ENODEV; + } + + map->region_offset = memdev->region_offset; + map->serial_number = nfit_mem->dcr->serial_number; + } + + sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), + cmp_map, NULL); + nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + ndr_desc->nd_set = nd_set; + devm_kfree(dev, info); + + return 0; +} + +static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio) +{ + struct acpi_nfit_interleave *idt = mmio->idt; + u32 sub_line_offset, line_index, line_offset; + u64 line_no, table_skip_count, table_offset; + + line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset); + table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index); + line_offset = idt->line_offset[line_index] + * mmio->line_size; + table_offset = table_skip_count * mmio->table_size; + + return mmio->base_offset + line_offset + table_offset + sub_line_offset; +} + +static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) +{ + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; + u64 offset = nfit_blk->stat_offset + mmio->size * bw; + + if (mmio->num_lines) + offset = to_interleave_offset(offset, mmio); + + return readq(mmio->base + offset); +} + +static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, + resource_size_t dpa, unsigned int len, unsigned int write) +{ + u64 cmd, offset; + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; + + enum { + BCW_OFFSET_MASK = (1ULL << 48)-1, + BCW_LEN_SHIFT = 48, + BCW_LEN_MASK = (1ULL << 8) - 1, + BCW_CMD_SHIFT = 56, + }; + + cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK; + len = len >> L1_CACHE_SHIFT; + cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT; + cmd |= ((u64) write) << BCW_CMD_SHIFT; + + offset = nfit_blk->cmd_offset + mmio->size * bw; + if (mmio->num_lines) + offset = to_interleave_offset(offset, mmio); + + writeq(cmd, mmio->base + offset); + /* FIXME: conditionally perform read-back if mandated by firmware */ +} + +static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, + resource_size_t dpa, void *iobuf, size_t len, int rw, + unsigned int lane) +{ + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW]; + unsigned int copied = 0; + u64 base_offset; + int rc; + + base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES + + lane * mmio->size; + /* TODO: non-temporal access, flush hints, cache management etc... */ + write_blk_ctl(nfit_blk, lane, dpa, len, rw); + while (len) { + unsigned int c; + u64 offset; + + if (mmio->num_lines) { + u32 line_offset; + + offset = to_interleave_offset(base_offset + copied, + mmio); + div_u64_rem(offset, mmio->line_size, &line_offset); + c = min_t(size_t, len, mmio->line_size - line_offset); + } else { + offset = base_offset + nfit_blk->bdw_offset; + c = len; + } + + if (rw) + memcpy(mmio->aperture + offset, iobuf + copied, c); + else + memcpy(iobuf + copied, mmio->aperture + offset, c); + + copied += c; + len -= c; + } + rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; + return rc; +} + +static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr, + resource_size_t dpa, void *iobuf, u64 len, int rw) +{ + struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr); + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW]; + struct nd_region *nd_region = nfit_blk->nd_region; + unsigned int lane, copied = 0; + int rc = 0; + + lane = nd_region_acquire_lane(nd_region); + while (len) { + u64 c = min(len, mmio->size); + + rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied, + iobuf + copied, c, rw, lane); + if (rc) + break; + + copied += c; + len -= c; + } + nd_region_release_lane(nd_region, lane); + + return rc; +} + +static void nfit_spa_mapping_release(struct kref *kref) +{ + struct nfit_spa_mapping *spa_map = to_spa_map(kref); + struct acpi_nfit_system_address *spa = spa_map->spa; + struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc; + + WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); + dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index); + iounmap(spa_map->iomem); + release_mem_region(spa->address, spa->length); + list_del(&spa_map->list); + kfree(spa_map); +} + +static struct nfit_spa_mapping *find_spa_mapping( + struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + struct nfit_spa_mapping *spa_map; + + WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); + list_for_each_entry(spa_map, &acpi_desc->spa_maps, list) + if (spa_map->spa == spa) + return spa_map; + + return NULL; +} + +static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + struct nfit_spa_mapping *spa_map; + + mutex_lock(&acpi_desc->spa_map_mutex); + spa_map = find_spa_mapping(acpi_desc, spa); + + if (spa_map) + kref_put(&spa_map->kref, nfit_spa_mapping_release); + mutex_unlock(&acpi_desc->spa_map_mutex); +} + +static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + resource_size_t start = spa->address; + resource_size_t n = spa->length; + struct nfit_spa_mapping *spa_map; + struct resource *res; + + WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); + + spa_map = find_spa_mapping(acpi_desc, spa); + if (spa_map) { + kref_get(&spa_map->kref); + return spa_map->iomem; + } + + spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL); + if (!spa_map) + return NULL; + + INIT_LIST_HEAD(&spa_map->list); + spa_map->spa = spa; + kref_init(&spa_map->kref); + spa_map->acpi_desc = acpi_desc; + + res = request_mem_region(start, n, dev_name(acpi_desc->dev)); + if (!res) + goto err_mem; + + /* TODO: cacheability based on the spa type */ + spa_map->iomem = ioremap_nocache(start, n); + if (!spa_map->iomem) + goto err_map; + + list_add_tail(&spa_map->list, &acpi_desc->spa_maps); + return spa_map->iomem; + + err_map: + release_mem_region(start, n); + err_mem: + kfree(spa_map); + return NULL; +} + +/** + * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges + * @nvdimm_bus: NFIT-bus that provided the spa table entry + * @nfit_spa: spa table to map + * + * In the case where block-data-window apertures and + * dimm-control-regions are interleaved they will end up sharing a + * single request_mem_region() + ioremap() for the address range. In + * the style of devm nfit_spa_map() mappings are automatically dropped + * when all region devices referencing the same mapping are disabled / + * unbound. + */ +static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + void __iomem *iomem; + + mutex_lock(&acpi_desc->spa_map_mutex); + iomem = __nfit_spa_map(acpi_desc, spa); + mutex_unlock(&acpi_desc->spa_map_mutex); + + return iomem; +} + +static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, + struct acpi_nfit_interleave *idt, u16 interleave_ways) +{ + if (idt) { + mmio->num_lines = idt->line_count; + mmio->line_size = idt->line_size; + if (interleave_ways == 0) + return -ENXIO; + mmio->table_size = mmio->num_lines * interleave_ways + * mmio->line_size; + } + + return 0; +} + +static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, + struct device *dev) +{ + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_blk_mmio *mmio; + struct nfit_blk *nfit_blk; + struct nfit_mem *nfit_mem; + struct nvdimm *nvdimm; + int rc; + + nvdimm = nd_blk_region_to_dimm(ndbr); + nfit_mem = nvdimm_provider_data(nvdimm); + if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { + dev_dbg(dev, "%s: missing%s%s%s\n", __func__, + nfit_mem ? "" : " nfit_mem", + nfit_mem->dcr ? "" : " dcr", + nfit_mem->bdw ? "" : " bdw"); + return -ENXIO; + } + + nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL); + if (!nfit_blk) + return -ENOMEM; + nd_blk_region_set_provider_data(ndbr, nfit_blk); + nfit_blk->nd_region = to_nd_region(dev); + + /* map block aperture memory */ + nfit_blk->bdw_offset = nfit_mem->bdw->offset; + mmio = &nfit_blk->mmio[BDW]; + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw); + if (!mmio->base) { + dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, + nvdimm_name(nvdimm)); + return -ENOMEM; + } + mmio->size = nfit_mem->bdw->size; + mmio->base_offset = nfit_mem->memdev_bdw->region_offset; + mmio->idt = nfit_mem->idt_bdw; + mmio->spa = nfit_mem->spa_bdw; + rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw, + nfit_mem->memdev_bdw->interleave_ways); + if (rc) { + dev_dbg(dev, "%s: %s failed to init bdw interleave\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + + /* map block control memory */ + nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; + nfit_blk->stat_offset = nfit_mem->dcr->status_offset; + mmio = &nfit_blk->mmio[DCR]; + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr); + if (!mmio->base) { + dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, + nvdimm_name(nvdimm)); + return -ENOMEM; + } + mmio->size = nfit_mem->dcr->window_size; + mmio->base_offset = nfit_mem->memdev_dcr->region_offset; + mmio->idt = nfit_mem->idt_dcr; + mmio->spa = nfit_mem->spa_dcr; + rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr, + nfit_mem->memdev_dcr->interleave_ways); + if (rc) { + dev_dbg(dev, "%s: %s failed to init dcr interleave\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + + if (mmio->line_size == 0) + return 0; + + if ((u32) nfit_blk->cmd_offset % mmio->line_size + + 8 > mmio->line_size) { + dev_dbg(dev, "cmd_offset crosses interleave boundary\n"); + return -ENXIO; + } else if ((u32) nfit_blk->stat_offset % mmio->line_size + + 8 > mmio->line_size) { + dev_dbg(dev, "stat_offset crosses interleave boundary\n"); + return -ENXIO; + } + + return 0; +} + +static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, + struct device *dev) +{ + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr); + int i; + + if (!nfit_blk) + return; /* never enabled */ + + /* auto-free BLK spa mappings */ + for (i = 0; i < 2; i++) { + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i]; + + if (mmio->base) + nfit_spa_unmap(acpi_desc, mmio->spa); + } + nd_blk_region_set_provider_data(ndbr, NULL); + /* devm will free nfit_blk */ +} + +static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, + struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, + struct acpi_nfit_memory_map *memdev, + struct acpi_nfit_system_address *spa) +{ + struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, + memdev->device_handle); + struct nd_blk_region_desc *ndbr_desc; + struct nfit_mem *nfit_mem; + int blk_valid = 0; + + if (!nvdimm) { + dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n", + spa->range_index, memdev->device_handle); + return -ENODEV; + } + + nd_mapping->nvdimm = nvdimm; + switch (nfit_spa_type(spa)) { + case NFIT_SPA_PM: + case NFIT_SPA_VOLATILE: + nd_mapping->start = memdev->address; + nd_mapping->size = memdev->region_size; + break; + case NFIT_SPA_DCR: + nfit_mem = nvdimm_provider_data(nvdimm); + if (!nfit_mem || !nfit_mem->bdw) { + dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n", + spa->range_index, nvdimm_name(nvdimm)); + } else { + nd_mapping->size = nfit_mem->bdw->capacity; + nd_mapping->start = nfit_mem->bdw->start_address; + ndr_desc->num_lanes = nfit_mem->bdw->windows; + blk_valid = 1; + } + + ndr_desc->nd_mapping = nd_mapping; + ndr_desc->num_mappings = blk_valid; + ndbr_desc = to_blk_region_desc(ndr_desc); + ndbr_desc->enable = acpi_nfit_blk_region_enable; + ndbr_desc->disable = acpi_nfit_blk_region_disable; + ndbr_desc->do_io = acpi_desc->blk_do_io; + if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc)) + return -ENOMEM; + break; + } + + return 0; +} + +static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS]; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + struct nd_blk_region_desc ndbr_desc; + struct nd_region_desc *ndr_desc; + struct nfit_memdev *nfit_memdev; + struct nvdimm_bus *nvdimm_bus; + struct resource res; + int count = 0, rc; + + if (spa->range_index == 0) { + dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n", + __func__); + return 0; + } + + memset(&res, 0, sizeof(res)); + memset(&nd_mappings, 0, sizeof(nd_mappings)); + memset(&ndbr_desc, 0, sizeof(ndbr_desc)); + res.start = spa->address; + res.end = res.start + spa->length - 1; + ndr_desc = &ndbr_desc.ndr_desc; + ndr_desc->res = &res; + ndr_desc->provider_data = nfit_spa; + ndr_desc->attr_groups = acpi_nfit_region_attribute_groups; + if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) + ndr_desc->numa_node = acpi_map_pxm_to_online_node( + spa->proximity_domain); + else + ndr_desc->numa_node = NUMA_NO_NODE; + + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { + struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; + struct nd_mapping *nd_mapping; + + if (memdev->range_index != spa->range_index) + continue; + if (count >= ND_MAX_MAPPINGS) { + dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n", + spa->range_index, ND_MAX_MAPPINGS); + return -ENXIO; + } + nd_mapping = &nd_mappings[count++]; + rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc, + memdev, spa); + if (rc) + return rc; + } + + ndr_desc->nd_mapping = nd_mappings; + ndr_desc->num_mappings = count; + rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); + if (rc) + return rc; + + nvdimm_bus = acpi_desc->nvdimm_bus; + if (nfit_spa_type(spa) == NFIT_SPA_PM) { + if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc)) + return -ENOMEM; + } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { + if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc)) + return -ENOMEM; + } + return 0; +} + +static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + int rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + + if (rc) + return rc; + } + return 0; +} + +int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) +{ + struct device *dev = acpi_desc->dev; + const void *end; + u8 *data; + int rc; + + INIT_LIST_HEAD(&acpi_desc->spa_maps); + INIT_LIST_HEAD(&acpi_desc->spas); + INIT_LIST_HEAD(&acpi_desc->dcrs); + INIT_LIST_HEAD(&acpi_desc->bdws); + INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->memdevs); + INIT_LIST_HEAD(&acpi_desc->dimms); + mutex_init(&acpi_desc->spa_map_mutex); + + data = (u8 *) acpi_desc->nfit; + end = data + sz; + data += sizeof(struct acpi_table_nfit); + while (!IS_ERR_OR_NULL(data)) + data = add_table(acpi_desc, data, end); + + if (IS_ERR(data)) { + dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__, + PTR_ERR(data)); + return PTR_ERR(data); + } + + if (nfit_mem_init(acpi_desc) != 0) + return -ENOMEM; + + acpi_nfit_init_dsms(acpi_desc); + + rc = acpi_nfit_register_dimms(acpi_desc); + if (rc) + return rc; + + return acpi_nfit_register_regions(acpi_desc); +} +EXPORT_SYMBOL_GPL(acpi_nfit_init); + +static int acpi_nfit_add(struct acpi_device *adev) +{ + struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; + struct device *dev = &adev->dev; + struct acpi_table_header *tbl; + acpi_status status = AE_OK; + acpi_size sz; + int rc; + + status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz); + if (ACPI_FAILURE(status)) { + dev_err(dev, "failed to find NFIT\n"); + return -ENXIO; + } + + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) + return -ENOMEM; + + dev_set_drvdata(dev, acpi_desc); + acpi_desc->dev = dev; + acpi_desc->nfit = (struct acpi_table_nfit *) tbl; + acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io; + nd_desc = &acpi_desc->nd_desc; + nd_desc->provider_name = "ACPI.NFIT"; + nd_desc->ndctl = acpi_nfit_ctl; + nd_desc->attr_groups = acpi_nfit_attribute_groups; + + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENXIO; + + rc = acpi_nfit_init(acpi_desc, sz); + if (rc) { + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return rc; + } + return 0; +} + +static int acpi_nfit_remove(struct acpi_device *adev) +{ + struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return 0; +} + +static const struct acpi_device_id acpi_nfit_ids[] = { + { "ACPI0012", 0 }, + { "", 0 }, +}; +MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids); + +static struct acpi_driver acpi_nfit_driver = { + .name = KBUILD_MODNAME, + .ids = acpi_nfit_ids, + .ops = { + .add = acpi_nfit_add, + .remove = acpi_nfit_remove, + }, +}; + +static __init int nfit_init(void) +{ + BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40); + BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56); + BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48); + BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20); + BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9); + BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80); + BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40); + + acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]); + acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]); + acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]); + acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]); + acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]); + acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]); + acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]); + acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]); + acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); + acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); + + return acpi_bus_register_driver(&acpi_nfit_driver); +} + +static __exit void nfit_exit(void) +{ + acpi_bus_unregister_driver(&acpi_nfit_driver); +} + +module_init(nfit_init); +module_exit(nfit_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h new file mode 100644 index 000000000000..81f2e8c5a79c --- /dev/null +++ b/drivers/acpi/nfit.h @@ -0,0 +1,158 @@ +/* + * NVDIMM Firmware Interface Table - NFIT + * + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __NFIT_H__ +#define __NFIT_H__ +#include <linux/libnvdimm.h> +#include <linux/types.h> +#include <linux/uuid.h> +#include <linux/acpi.h> +#include <acpi/acuuid.h> + +#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba" +#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" +#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ + | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ + | ACPI_NFIT_MEM_ARMED) + +enum nfit_uuids { + NFIT_SPA_VOLATILE, + NFIT_SPA_PM, + NFIT_SPA_DCR, + NFIT_SPA_BDW, + NFIT_SPA_VDISK, + NFIT_SPA_VCD, + NFIT_SPA_PDISK, + NFIT_SPA_PCD, + NFIT_DEV_BUS, + NFIT_DEV_DIMM, + NFIT_UUID_MAX, +}; + +struct nfit_spa { + struct acpi_nfit_system_address *spa; + struct list_head list; +}; + +struct nfit_dcr { + struct acpi_nfit_control_region *dcr; + struct list_head list; +}; + +struct nfit_bdw { + struct acpi_nfit_data_region *bdw; + struct list_head list; +}; + +struct nfit_idt { + struct acpi_nfit_interleave *idt; + struct list_head list; +}; + +struct nfit_memdev { + struct acpi_nfit_memory_map *memdev; + struct list_head list; +}; + +/* assembled tables for a given dimm/memory-device */ +struct nfit_mem { + struct nvdimm *nvdimm; + struct acpi_nfit_memory_map *memdev_dcr; + struct acpi_nfit_memory_map *memdev_pmem; + struct acpi_nfit_memory_map *memdev_bdw; + struct acpi_nfit_control_region *dcr; + struct acpi_nfit_data_region *bdw; + struct acpi_nfit_system_address *spa_dcr; + struct acpi_nfit_system_address *spa_bdw; + struct acpi_nfit_interleave *idt_dcr; + struct acpi_nfit_interleave *idt_bdw; + struct list_head list; + struct acpi_device *adev; + unsigned long dsm_mask; +}; + +struct acpi_nfit_desc { + struct nvdimm_bus_descriptor nd_desc; + struct acpi_table_nfit *nfit; + struct mutex spa_map_mutex; + struct list_head spa_maps; + struct list_head memdevs; + struct list_head dimms; + struct list_head spas; + struct list_head dcrs; + struct list_head bdws; + struct list_head idts; + struct nvdimm_bus *nvdimm_bus; + struct device *dev; + unsigned long dimm_dsm_force_en; + int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw); +}; + +enum nd_blk_mmio_selector { + BDW, + DCR, +}; + +struct nfit_blk { + struct nfit_blk_mmio { + union { + void __iomem *base; + void *aperture; + }; + u64 size; + u64 base_offset; + u32 line_size; + u32 num_lines; + u32 table_size; + struct acpi_nfit_interleave *idt; + struct acpi_nfit_system_address *spa; + } mmio[2]; + struct nd_region *nd_region; + u64 bdw_offset; /* post interleave offset */ + u64 stat_offset; + u64 cmd_offset; +}; + +struct nfit_spa_mapping { + struct acpi_nfit_desc *acpi_desc; + struct acpi_nfit_system_address *spa; + struct list_head list; + struct kref kref; + void __iomem *iomem; +}; + +static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref) +{ + return container_of(kref, struct nfit_spa_mapping, kref); +} + +static inline struct acpi_nfit_memory_map *__to_nfit_memdev( + struct nfit_mem *nfit_mem) +{ + if (nfit_mem->memdev_dcr) + return nfit_mem->memdev_dcr; + return nfit_mem->memdev_pmem; +} + +static inline struct acpi_nfit_desc *to_acpi_desc( + struct nvdimm_bus_descriptor *nd_desc) +{ + return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); +} + +const u8 *to_nfit_uuid(enum nfit_uuids id); +int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); +extern const struct attribute_group *acpi_nfit_attribute_groups[]; +#endif /* __NFIT_H__ */ diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 1333cbdc3ea2..acaa3b4ea504 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -29,6 +29,8 @@ #include <linux/errno.h> #include <linux/acpi.h> #include <linux/numa.h> +#include <linux/nodemask.h> +#include <linux/topology.h> #define PREFIX "ACPI: " @@ -70,7 +72,12 @@ static void __acpi_map_pxm_to_node(int pxm, int node) int acpi_map_pxm_to_node(int pxm) { - int node = pxm_to_node_map[pxm]; + int node; + + if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) + return NUMA_NO_NODE; + + node = pxm_to_node_map[pxm]; if (node == NUMA_NO_NODE) { if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) @@ -83,6 +90,45 @@ int acpi_map_pxm_to_node(int pxm) return node; } +/** + * acpi_map_pxm_to_online_node - Map proximity ID to online node + * @pxm: ACPI proximity ID + * + * This is similar to acpi_map_pxm_to_node(), but always returns an online + * node. When the mapped node from a given proximity ID is offline, it + * looks up the node distance table and returns the nearest online node. + * + * ACPI device drivers, which are called after the NUMA initialization has + * completed in the kernel, can call this interface to obtain their device + * NUMA topology from ACPI tables. Such drivers do not have to deal with + * offline nodes. A node may be offline when a device proximity ID is + * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. + * "numa=off" on x86. + */ +int acpi_map_pxm_to_online_node(int pxm) +{ + int node, n, dist, min_dist; + + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE) + node = 0; + + if (!node_online(node)) { + min_dist = INT_MAX; + for_each_online_node(n) { + dist = node_distance(node, n); + if (dist < min_dist) { + min_dist = dist; + node = n; + } + } + } + + return node; +} +EXPORT_SYMBOL(acpi_map_pxm_to_online_node); + static void __init acpi_table_print_srat_entry(struct acpi_subtable_header *header) { @@ -328,8 +374,6 @@ int acpi_get_node(acpi_handle handle) int pxm; pxm = acpi_get_pxm(handle); - if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) - return NUMA_NO_NODE; return acpi_map_pxm_to_node(pxm); } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 3ccef9eba6f9..1b8094d4d7af 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -404,18 +404,6 @@ config BLK_DEV_RAM_DAX and will prevent RAM block device backing store memory from being allocated from highmem (only a problem for highmem systems). -config BLK_DEV_PMEM - tristate "Persistent memory block device support" - depends on HAS_IOMEM - help - Saying Y here will allow you to use a contiguous range of reserved - memory as one or more persistent block devices. - - To compile this driver as a module, choose M here: the module will be - called 'pmem'. - - If unsure, say N. - config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media" depends on !UML diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 9cc6c18a1c7e..02b688d1438d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -14,7 +14,6 @@ obj-$(CONFIG_PS3_VRAM) += ps3vram.o obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o -obj-$(CONFIG_BLK_DEV_PMEM) += pmem.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index b26ceee3585e..44f9d20c19ac 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -267,7 +267,7 @@ static int crb_acpi_add(struct acpi_device *device) memcpy_fromio(&pa, &priv->cca->cmd_pa, 8); pa = le64_to_cpu(pa); - priv->cmd = devm_ioremap_nocache(dev, le64_to_cpu(pa), + priv->cmd = devm_ioremap_nocache(dev, pa, ioread32(&priv->cca->cmd_size)); if (!priv->cmd) { dev_err(dev, "ioremap of the command buffer failed\n"); @@ -276,7 +276,7 @@ static int crb_acpi_add(struct acpi_device *device) memcpy_fromio(&pa, &priv->cca->rsp_pa, 8); pa = le64_to_cpu(pa); - priv->rsp = devm_ioremap_nocache(dev, le64_to_cpu(pa), + priv->rsp = devm_ioremap_nocache(dev, pa, ioread32(&priv->cca->rsp_size)); if (!priv->rsp) { dev_err(dev, "ioremap of the response buffer failed\n"); diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 42ffa5e7a1e0..27ebf9511cb4 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -578,6 +578,9 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, goto cleanup; } + ibmvtpm->dev = dev; + ibmvtpm->vdev = vio_dev; + crq_q = &ibmvtpm->crq_queue; crq_q->crq_addr = (struct ibmvtpm_crq *)get_zeroed_page(GFP_KERNEL); if (!crq_q->crq_addr) { @@ -622,8 +625,6 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, crq_q->index = 0; - ibmvtpm->dev = dev; - ibmvtpm->vdev = vio_dev; TPM_VPRIV(chip) = (void *)ibmvtpm; spin_lock_init(&ibmvtpm->rtce_lock); diff --git a/drivers/char/tpm/tpm_of.c b/drivers/char/tpm/tpm_of.c index c002d1bd9caf..eebe6256918f 100644 --- a/drivers/char/tpm/tpm_of.c +++ b/drivers/char/tpm/tpm_of.c @@ -49,7 +49,7 @@ int read_log(struct tpm_bios_log *log) basep = of_get_property(np, "linux,sml-base", NULL); if (basep == NULL) { - pr_err(KERN_ERR "%s: ERROR - SML not found\n", __func__); + pr_err("%s: ERROR - SML not found\n", __func__); goto cleanup_eio; } diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 5b6af6a9319f..8732e4c5bf3c 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -24,7 +24,7 @@ obj-$(CONFIG_COMMON_CLK_CDCE706) += clk-cdce706.o obj-$(CONFIG_ARCH_CLPS711X) += clk-clps711x.o obj-$(CONFIG_ARCH_EFM32) += clk-efm32gg.o obj-$(CONFIG_ARCH_HIGHBANK) += clk-highbank.o -obj-$(CONFIG_MACH_LOONGSON1) += clk-ls1x.o +obj-$(CONFIG_MACH_LOONGSON32) += clk-ls1x.o obj-$(CONFIG_COMMON_CLK_MAX_GEN) += clk-max-gen.o obj-$(CONFIG_COMMON_CLK_MAX77686) += clk-max77686.o obj-$(CONFIG_COMMON_CLK_MAX77802) += clk-max77802.o @@ -51,6 +51,7 @@ obj-$(CONFIG_ARCH_HI3xxx) += hisilicon/ obj-$(CONFIG_ARCH_HIP04) += hisilicon/ obj-$(CONFIG_ARCH_HIX5HD2) += hisilicon/ obj-$(CONFIG_ARCH_MXC) += imx/ +obj-$(CONFIG_MACH_INGENIC) += ingenic/ obj-$(CONFIG_COMMON_CLK_KEYSTONE) += keystone/ ifeq ($(CONFIG_COMMON_CLK), y) obj-$(CONFIG_ARCH_MMP) += mmp/ diff --git a/drivers/clk/ingenic/Makefile b/drivers/clk/ingenic/Makefile new file mode 100644 index 000000000000..cd47b0664c2b --- /dev/null +++ b/drivers/clk/ingenic/Makefile @@ -0,0 +1,3 @@ +obj-y += cgu.o +obj-$(CONFIG_MACH_JZ4740) += jz4740-cgu.o +obj-$(CONFIG_MACH_JZ4780) += jz4780-cgu.o diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c new file mode 100644 index 000000000000..b936cdd1a13c --- /dev/null +++ b/drivers/clk/ingenic/cgu.c @@ -0,0 +1,711 @@ +/* + * Ingenic SoC CGU driver + * + * Copyright (c) 2013-2015 Imagination Technologies + * Author: Paul Burton <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/bitops.h> +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/delay.h> +#include <linux/math64.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include "cgu.h" + +#define MHZ (1000 * 1000) + +/** + * ingenic_cgu_gate_get() - get the value of clock gate register bit + * @cgu: reference to the CGU whose registers should be read + * @info: info struct describing the gate bit + * + * Retrieves the state of the clock gate bit described by info. The + * caller must hold cgu->lock. + * + * Return: true if the gate bit is set, else false. + */ +static inline bool +ingenic_cgu_gate_get(struct ingenic_cgu *cgu, + const struct ingenic_cgu_gate_info *info) +{ + return readl(cgu->base + info->reg) & BIT(info->bit); +} + +/** + * ingenic_cgu_gate_set() - set the value of clock gate register bit + * @cgu: reference to the CGU whose registers should be modified + * @info: info struct describing the gate bit + * @val: non-zero to gate a clock, otherwise zero + * + * Sets the given gate bit in order to gate or ungate a clock. + * + * The caller must hold cgu->lock. + */ +static inline void +ingenic_cgu_gate_set(struct ingenic_cgu *cgu, + const struct ingenic_cgu_gate_info *info, bool val) +{ + u32 clkgr = readl(cgu->base + info->reg); + + if (val) + clkgr |= BIT(info->bit); + else + clkgr &= ~BIT(info->bit); + + writel(clkgr, cgu->base + info->reg); +} + +/* + * PLL operations + */ + +static unsigned long +ingenic_pll_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + const struct ingenic_cgu_pll_info *pll_info; + unsigned m, n, od_enc, od; + bool bypass, enable; + unsigned long flags; + u32 ctl; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + BUG_ON(clk_info->type != CGU_CLK_PLL); + pll_info = &clk_info->pll; + + spin_lock_irqsave(&cgu->lock, flags); + ctl = readl(cgu->base + pll_info->reg); + spin_unlock_irqrestore(&cgu->lock, flags); + + m = (ctl >> pll_info->m_shift) & GENMASK(pll_info->m_bits - 1, 0); + m += pll_info->m_offset; + n = (ctl >> pll_info->n_shift) & GENMASK(pll_info->n_bits - 1, 0); + n += pll_info->n_offset; + od_enc = ctl >> pll_info->od_shift; + od_enc &= GENMASK(pll_info->od_bits - 1, 0); + bypass = !!(ctl & BIT(pll_info->bypass_bit)); + enable = !!(ctl & BIT(pll_info->enable_bit)); + + if (bypass) + return parent_rate; + + if (!enable) + return 0; + + for (od = 0; od < pll_info->od_max; od++) { + if (pll_info->od_encoding[od] == od_enc) + break; + } + BUG_ON(od == pll_info->od_max); + od++; + + return div_u64((u64)parent_rate * m, n * od); +} + +static unsigned long +ingenic_pll_calc(const struct ingenic_cgu_clk_info *clk_info, + unsigned long rate, unsigned long parent_rate, + unsigned *pm, unsigned *pn, unsigned *pod) +{ + const struct ingenic_cgu_pll_info *pll_info; + unsigned m, n, od; + + pll_info = &clk_info->pll; + od = 1; + + /* + * The frequency after the input divider must be between 10 and 50 MHz. + * The highest divider yields the best resolution. + */ + n = parent_rate / (10 * MHZ); + n = min_t(unsigned, n, 1 << clk_info->pll.n_bits); + n = max_t(unsigned, n, pll_info->n_offset); + + m = (rate / MHZ) * od * n / (parent_rate / MHZ); + m = min_t(unsigned, m, 1 << clk_info->pll.m_bits); + m = max_t(unsigned, m, pll_info->m_offset); + + if (pm) + *pm = m; + if (pn) + *pn = n; + if (pod) + *pod = od; + + return div_u64((u64)parent_rate * m, n * od); +} + +static long +ingenic_pll_round_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long *prate) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + BUG_ON(clk_info->type != CGU_CLK_PLL); + + return ingenic_pll_calc(clk_info, req_rate, *prate, NULL, NULL, NULL); +} + +static int +ingenic_pll_set_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long parent_rate) +{ + const unsigned timeout = 100; + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + const struct ingenic_cgu_pll_info *pll_info; + unsigned long rate, flags; + unsigned m, n, od, i; + u32 ctl; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + BUG_ON(clk_info->type != CGU_CLK_PLL); + pll_info = &clk_info->pll; + + rate = ingenic_pll_calc(clk_info, req_rate, parent_rate, + &m, &n, &od); + if (rate != req_rate) + pr_info("ingenic-cgu: request '%s' rate %luHz, actual %luHz\n", + clk_info->name, req_rate, rate); + + spin_lock_irqsave(&cgu->lock, flags); + ctl = readl(cgu->base + pll_info->reg); + + ctl &= ~(GENMASK(pll_info->m_bits - 1, 0) << pll_info->m_shift); + ctl |= (m - pll_info->m_offset) << pll_info->m_shift; + + ctl &= ~(GENMASK(pll_info->n_bits - 1, 0) << pll_info->n_shift); + ctl |= (n - pll_info->n_offset) << pll_info->n_shift; + + ctl &= ~(GENMASK(pll_info->od_bits - 1, 0) << pll_info->od_shift); + ctl |= pll_info->od_encoding[od - 1] << pll_info->od_shift; + + ctl &= ~BIT(pll_info->bypass_bit); + ctl |= BIT(pll_info->enable_bit); + + writel(ctl, cgu->base + pll_info->reg); + + /* wait for the PLL to stabilise */ + for (i = 0; i < timeout; i++) { + ctl = readl(cgu->base + pll_info->reg); + if (ctl & BIT(pll_info->stable_bit)) + break; + mdelay(1); + } + + spin_unlock_irqrestore(&cgu->lock, flags); + + if (i == timeout) + return -EBUSY; + + return 0; +} + +static const struct clk_ops ingenic_pll_ops = { + .recalc_rate = ingenic_pll_recalc_rate, + .round_rate = ingenic_pll_round_rate, + .set_rate = ingenic_pll_set_rate, +}; + +/* + * Operations for all non-PLL clocks + */ + +static u8 ingenic_clk_get_parent(struct clk_hw *hw) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + u32 reg; + u8 i, hw_idx, idx = 0; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + + if (clk_info->type & CGU_CLK_MUX) { + reg = readl(cgu->base + clk_info->mux.reg); + hw_idx = (reg >> clk_info->mux.shift) & + GENMASK(clk_info->mux.bits - 1, 0); + + /* + * Convert the hardware index to the parent index by skipping + * over any -1's in the parents array. + */ + for (i = 0; i < hw_idx; i++) { + if (clk_info->parents[i] != -1) + idx++; + } + } + + return idx; +} + +static int ingenic_clk_set_parent(struct clk_hw *hw, u8 idx) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + unsigned long flags; + u8 curr_idx, hw_idx, num_poss; + u32 reg, mask; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + + if (clk_info->type & CGU_CLK_MUX) { + /* + * Convert the parent index to the hardware index by adding + * 1 for any -1 in the parents array preceding the given + * index. That is, we want the index of idx'th entry in + * clk_info->parents which does not equal -1. + */ + hw_idx = curr_idx = 0; + num_poss = 1 << clk_info->mux.bits; + for (; hw_idx < num_poss; hw_idx++) { + if (clk_info->parents[hw_idx] == -1) + continue; + if (curr_idx == idx) + break; + curr_idx++; + } + + /* idx should always be a valid parent */ + BUG_ON(curr_idx != idx); + + mask = GENMASK(clk_info->mux.bits - 1, 0); + mask <<= clk_info->mux.shift; + + spin_lock_irqsave(&cgu->lock, flags); + + /* write the register */ + reg = readl(cgu->base + clk_info->mux.reg); + reg &= ~mask; + reg |= hw_idx << clk_info->mux.shift; + writel(reg, cgu->base + clk_info->mux.reg); + + spin_unlock_irqrestore(&cgu->lock, flags); + return 0; + } + + return idx ? -EINVAL : 0; +} + +static unsigned long +ingenic_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + unsigned long rate = parent_rate; + u32 div_reg, div; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + + if (clk_info->type & CGU_CLK_DIV) { + div_reg = readl(cgu->base + clk_info->div.reg); + div = (div_reg >> clk_info->div.shift) & + GENMASK(clk_info->div.bits - 1, 0); + div += 1; + + rate /= div; + } + + return rate; +} + +static unsigned +ingenic_clk_calc_div(const struct ingenic_cgu_clk_info *clk_info, + unsigned long parent_rate, unsigned long req_rate) +{ + unsigned div; + + /* calculate the divide */ + div = DIV_ROUND_UP(parent_rate, req_rate); + + /* and impose hardware constraints */ + div = min_t(unsigned, div, 1 << clk_info->div.bits); + div = max_t(unsigned, div, 1); + + return div; +} + +static long +ingenic_clk_round_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long *parent_rate) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + long rate = *parent_rate; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + + if (clk_info->type & CGU_CLK_DIV) + rate /= ingenic_clk_calc_div(clk_info, *parent_rate, req_rate); + else if (clk_info->type & CGU_CLK_FIXDIV) + rate /= clk_info->fixdiv.div; + + return rate; +} + +static int +ingenic_clk_set_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long parent_rate) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + const unsigned timeout = 100; + unsigned long rate, flags; + unsigned div, i; + u32 reg, mask; + int ret = 0; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + + if (clk_info->type & CGU_CLK_DIV) { + div = ingenic_clk_calc_div(clk_info, parent_rate, req_rate); + rate = parent_rate / div; + + if (rate != req_rate) + return -EINVAL; + + spin_lock_irqsave(&cgu->lock, flags); + reg = readl(cgu->base + clk_info->div.reg); + + /* update the divide */ + mask = GENMASK(clk_info->div.bits - 1, 0); + reg &= ~(mask << clk_info->div.shift); + reg |= (div - 1) << clk_info->div.shift; + + /* clear the stop bit */ + if (clk_info->div.stop_bit != -1) + reg &= ~BIT(clk_info->div.stop_bit); + + /* set the change enable bit */ + if (clk_info->div.ce_bit != -1) + reg |= BIT(clk_info->div.ce_bit); + + /* update the hardware */ + writel(reg, cgu->base + clk_info->div.reg); + + /* wait for the change to take effect */ + if (clk_info->div.busy_bit != -1) { + for (i = 0; i < timeout; i++) { + reg = readl(cgu->base + clk_info->div.reg); + if (!(reg & BIT(clk_info->div.busy_bit))) + break; + mdelay(1); + } + if (i == timeout) + ret = -EBUSY; + } + + spin_unlock_irqrestore(&cgu->lock, flags); + return ret; + } + + return -EINVAL; +} + +static int ingenic_clk_enable(struct clk_hw *hw) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + unsigned long flags; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + + if (clk_info->type & CGU_CLK_GATE) { + /* ungate the clock */ + spin_lock_irqsave(&cgu->lock, flags); + ingenic_cgu_gate_set(cgu, &clk_info->gate, false); + spin_unlock_irqrestore(&cgu->lock, flags); + } + + return 0; +} + +static void ingenic_clk_disable(struct clk_hw *hw) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + unsigned long flags; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + + if (clk_info->type & CGU_CLK_GATE) { + /* gate the clock */ + spin_lock_irqsave(&cgu->lock, flags); + ingenic_cgu_gate_set(cgu, &clk_info->gate, true); + spin_unlock_irqrestore(&cgu->lock, flags); + } +} + +static int ingenic_clk_is_enabled(struct clk_hw *hw) +{ + struct ingenic_clk *ingenic_clk = to_ingenic_clk(hw); + struct ingenic_cgu *cgu = ingenic_clk->cgu; + const struct ingenic_cgu_clk_info *clk_info; + unsigned long flags; + int enabled = 1; + + clk_info = &cgu->clock_info[ingenic_clk->idx]; + + if (clk_info->type & CGU_CLK_GATE) { + spin_lock_irqsave(&cgu->lock, flags); + enabled = !ingenic_cgu_gate_get(cgu, &clk_info->gate); + spin_unlock_irqrestore(&cgu->lock, flags); + } + + return enabled; +} + +static const struct clk_ops ingenic_clk_ops = { + .get_parent = ingenic_clk_get_parent, + .set_parent = ingenic_clk_set_parent, + + .recalc_rate = ingenic_clk_recalc_rate, + .round_rate = ingenic_clk_round_rate, + .set_rate = ingenic_clk_set_rate, + + .enable = ingenic_clk_enable, + .disable = ingenic_clk_disable, + .is_enabled = ingenic_clk_is_enabled, +}; + +/* + * Setup functions. + */ + +static int ingenic_register_clock(struct ingenic_cgu *cgu, unsigned idx) +{ + const struct ingenic_cgu_clk_info *clk_info = &cgu->clock_info[idx]; + struct clk_init_data clk_init; + struct ingenic_clk *ingenic_clk = NULL; + struct clk *clk, *parent; + const char *parent_names[4]; + unsigned caps, i, num_possible; + int err = -EINVAL; + + BUILD_BUG_ON(ARRAY_SIZE(clk_info->parents) > ARRAY_SIZE(parent_names)); + + if (clk_info->type == CGU_CLK_EXT) { + clk = of_clk_get_by_name(cgu->np, clk_info->name); + if (IS_ERR(clk)) { + pr_err("%s: no external clock '%s' provided\n", + __func__, clk_info->name); + err = -ENODEV; + goto out; + } + err = clk_register_clkdev(clk, clk_info->name, NULL); + if (err) { + clk_put(clk); + goto out; + } + cgu->clocks.clks[idx] = clk; + return 0; + } + + if (!clk_info->type) { + pr_err("%s: no clock type specified for '%s'\n", __func__, + clk_info->name); + goto out; + } + + ingenic_clk = kzalloc(sizeof(*ingenic_clk), GFP_KERNEL); + if (!ingenic_clk) { + err = -ENOMEM; + goto out; + } + + ingenic_clk->hw.init = &clk_init; + ingenic_clk->cgu = cgu; + ingenic_clk->idx = idx; + + clk_init.name = clk_info->name; + clk_init.flags = 0; + clk_init.parent_names = parent_names; + + caps = clk_info->type; + + if (caps & (CGU_CLK_MUX | CGU_CLK_CUSTOM)) { + clk_init.num_parents = 0; + + if (caps & CGU_CLK_MUX) + num_possible = 1 << clk_info->mux.bits; + else + num_possible = ARRAY_SIZE(clk_info->parents); + + for (i = 0; i < num_possible; i++) { + if (clk_info->parents[i] == -1) + continue; + + parent = cgu->clocks.clks[clk_info->parents[i]]; + parent_names[clk_init.num_parents] = + __clk_get_name(parent); + clk_init.num_parents++; + } + + BUG_ON(!clk_init.num_parents); + BUG_ON(clk_init.num_parents > ARRAY_SIZE(parent_names)); + } else { + BUG_ON(clk_info->parents[0] == -1); + clk_init.num_parents = 1; + parent = cgu->clocks.clks[clk_info->parents[0]]; + parent_names[0] = __clk_get_name(parent); + } + + if (caps & CGU_CLK_CUSTOM) { + clk_init.ops = clk_info->custom.clk_ops; + + caps &= ~CGU_CLK_CUSTOM; + + if (caps) { + pr_err("%s: custom clock may not be combined with type 0x%x\n", + __func__, caps); + goto out; + } + } else if (caps & CGU_CLK_PLL) { + clk_init.ops = &ingenic_pll_ops; + + caps &= ~CGU_CLK_PLL; + + if (caps) { + pr_err("%s: PLL may not be combined with type 0x%x\n", + __func__, caps); + goto out; + } + } else { + clk_init.ops = &ingenic_clk_ops; + } + + /* nothing to do for gates or fixed dividers */ + caps &= ~(CGU_CLK_GATE | CGU_CLK_FIXDIV); + + if (caps & CGU_CLK_MUX) { + if (!(caps & CGU_CLK_MUX_GLITCHFREE)) + clk_init.flags |= CLK_SET_PARENT_GATE; + + caps &= ~(CGU_CLK_MUX | CGU_CLK_MUX_GLITCHFREE); + } + + if (caps & CGU_CLK_DIV) { + caps &= ~CGU_CLK_DIV; + } else { + /* pass rate changes to the parent clock */ + clk_init.flags |= CLK_SET_RATE_PARENT; + } + + if (caps) { + pr_err("%s: unknown clock type 0x%x\n", __func__, caps); + goto out; + } + + clk = clk_register(NULL, &ingenic_clk->hw); + if (IS_ERR(clk)) { + pr_err("%s: failed to register clock '%s'\n", __func__, + clk_info->name); + err = PTR_ERR(clk); + goto out; + } + + err = clk_register_clkdev(clk, clk_info->name, NULL); + if (err) + goto out; + + cgu->clocks.clks[idx] = clk; +out: + if (err) + kfree(ingenic_clk); + return err; +} + +struct ingenic_cgu * +ingenic_cgu_new(const struct ingenic_cgu_clk_info *clock_info, + unsigned num_clocks, struct device_node *np) +{ + struct ingenic_cgu *cgu; + + cgu = kzalloc(sizeof(*cgu), GFP_KERNEL); + if (!cgu) + goto err_out; + + cgu->base = of_iomap(np, 0); + if (!cgu->base) { + pr_err("%s: failed to map CGU registers\n", __func__); + goto err_out_free; + } + + cgu->np = np; + cgu->clock_info = clock_info; + cgu->clocks.clk_num = num_clocks; + + spin_lock_init(&cgu->lock); + + return cgu; + +err_out_free: + kfree(cgu); +err_out: + return NULL; +} + +int ingenic_cgu_register_clocks(struct ingenic_cgu *cgu) +{ + unsigned i; + int err; + + cgu->clocks.clks = kcalloc(cgu->clocks.clk_num, sizeof(struct clk *), + GFP_KERNEL); + if (!cgu->clocks.clks) { + err = -ENOMEM; + goto err_out; + } + + for (i = 0; i < cgu->clocks.clk_num; i++) { + err = ingenic_register_clock(cgu, i); + if (err) + goto err_out_unregister; + } + + err = of_clk_add_provider(cgu->np, of_clk_src_onecell_get, + &cgu->clocks); + if (err) + goto err_out_unregister; + + return 0; + +err_out_unregister: + for (i = 0; i < cgu->clocks.clk_num; i++) { + if (!cgu->clocks.clks[i]) + continue; + if (cgu->clock_info[i].type & CGU_CLK_EXT) + clk_put(cgu->clocks.clks[i]); + else + clk_unregister(cgu->clocks.clks[i]); + } + kfree(cgu->clocks.clks); +err_out: + return err; +} diff --git a/drivers/clk/ingenic/cgu.h b/drivers/clk/ingenic/cgu.h new file mode 100644 index 000000000000..99347e2b97e8 --- /dev/null +++ b/drivers/clk/ingenic/cgu.h @@ -0,0 +1,223 @@ +/* + * Ingenic SoC CGU driver + * + * Copyright (c) 2013-2015 Imagination Technologies + * Author: Paul Burton <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __DRIVERS_CLK_INGENIC_CGU_H__ +#define __DRIVERS_CLK_INGENIC_CGU_H__ + +#include <linux/bitops.h> +#include <linux/of.h> +#include <linux/spinlock.h> + +/** + * struct ingenic_cgu_pll_info - information about a PLL + * @reg: the offset of the PLL's control register within the CGU + * @m_shift: the number of bits to shift the multiplier value by (ie. the + * index of the lowest bit of the multiplier value in the PLL's + * control register) + * @m_bits: the size of the multiplier field in bits + * @m_offset: the multiplier value which encodes to 0 in the PLL's control + * register + * @n_shift: the number of bits to shift the divider value by (ie. the + * index of the lowest bit of the divider value in the PLL's + * control register) + * @n_bits: the size of the divider field in bits + * @n_offset: the divider value which encodes to 0 in the PLL's control + * register + * @od_shift: the number of bits to shift the post-VCO divider value by (ie. + * the index of the lowest bit of the post-VCO divider value in + * the PLL's control register) + * @od_bits: the size of the post-VCO divider field in bits + * @od_max: the maximum post-VCO divider value + * @od_encoding: a pointer to an array mapping post-VCO divider values to + * their encoded values in the PLL control register, or -1 for + * unsupported values + * @bypass_bit: the index of the bypass bit in the PLL control register + * @enable_bit: the index of the enable bit in the PLL control register + * @stable_bit: the index of the stable bit in the PLL control register + */ +struct ingenic_cgu_pll_info { + unsigned reg; + const s8 *od_encoding; + u8 m_shift, m_bits, m_offset; + u8 n_shift, n_bits, n_offset; + u8 od_shift, od_bits, od_max; + u8 bypass_bit; + u8 enable_bit; + u8 stable_bit; +}; + +/** + * struct ingenic_cgu_mux_info - information about a clock mux + * @reg: offset of the mux control register within the CGU + * @shift: number of bits to shift the mux value by (ie. the index of + * the lowest bit of the mux value within its control register) + * @bits: the size of the mux value in bits + */ +struct ingenic_cgu_mux_info { + unsigned reg; + u8 shift; + u8 bits; +}; + +/** + * struct ingenic_cgu_div_info - information about a divider + * @reg: offset of the divider control register within the CGU + * @shift: number of bits to shift the divide value by (ie. the index of + * the lowest bit of the divide value within its control register) + * @bits: the size of the divide value in bits + * @ce_bit: the index of the change enable bit within reg, or -1 if there + * isn't one + * @busy_bit: the index of the busy bit within reg, or -1 if there isn't one + * @stop_bit: the index of the stop bit within reg, or -1 if there isn't one + */ +struct ingenic_cgu_div_info { + unsigned reg; + u8 shift; + u8 bits; + s8 ce_bit; + s8 busy_bit; + s8 stop_bit; +}; + +/** + * struct ingenic_cgu_fixdiv_info - information about a fixed divider + * @div: the divider applied to the parent clock + */ +struct ingenic_cgu_fixdiv_info { + unsigned div; +}; + +/** + * struct ingenic_cgu_gate_info - information about a clock gate + * @reg: offset of the gate control register within the CGU + * @bit: offset of the bit in the register that controls the gate + */ +struct ingenic_cgu_gate_info { + unsigned reg; + u8 bit; +}; + +/** + * struct ingenic_cgu_custom_info - information about a custom (SoC) clock + * @clk_ops: custom clock operation callbacks + */ +struct ingenic_cgu_custom_info { + struct clk_ops *clk_ops; +}; + +/** + * struct ingenic_cgu_clk_info - information about a clock + * @name: name of the clock + * @type: a bitmask formed from CGU_CLK_* values + * @parents: an array of the indices of potential parents of this clock + * within the clock_info array of the CGU, or -1 in entries + * which correspond to no valid parent + * @pll: information valid if type includes CGU_CLK_PLL + * @gate: information valid if type includes CGU_CLK_GATE + * @mux: information valid if type includes CGU_CLK_MUX + * @div: information valid if type includes CGU_CLK_DIV + * @fixdiv: information valid if type includes CGU_CLK_FIXDIV + * @custom: information valid if type includes CGU_CLK_CUSTOM + */ +struct ingenic_cgu_clk_info { + const char *name; + + enum { + CGU_CLK_NONE = 0, + CGU_CLK_EXT = BIT(0), + CGU_CLK_PLL = BIT(1), + CGU_CLK_GATE = BIT(2), + CGU_CLK_MUX = BIT(3), + CGU_CLK_MUX_GLITCHFREE = BIT(4), + CGU_CLK_DIV = BIT(5), + CGU_CLK_FIXDIV = BIT(6), + CGU_CLK_CUSTOM = BIT(7), + } type; + + int parents[4]; + + union { + struct ingenic_cgu_pll_info pll; + + struct { + struct ingenic_cgu_gate_info gate; + struct ingenic_cgu_mux_info mux; + struct ingenic_cgu_div_info div; + struct ingenic_cgu_fixdiv_info fixdiv; + }; + + struct ingenic_cgu_custom_info custom; + }; +}; + +/** + * struct ingenic_cgu - data about the CGU + * @np: the device tree node that caused the CGU to be probed + * @base: the ioremap'ed base address of the CGU registers + * @clock_info: an array containing information about implemented clocks + * @clocks: used to provide clocks to DT, allows lookup of struct clk* + * @lock: lock to be held whilst manipulating CGU registers + */ +struct ingenic_cgu { + struct device_node *np; + void __iomem *base; + + const struct ingenic_cgu_clk_info *clock_info; + struct clk_onecell_data clocks; + + spinlock_t lock; +}; + +/** + * struct ingenic_clk - private data for a clock + * @hw: see Documentation/clk.txt + * @cgu: a pointer to the CGU data + * @idx: the index of this clock in cgu->clock_info + */ +struct ingenic_clk { + struct clk_hw hw; + struct ingenic_cgu *cgu; + unsigned idx; +}; + +#define to_ingenic_clk(_hw) container_of(_hw, struct ingenic_clk, hw) + +/** + * ingenic_cgu_new() - create a new CGU instance + * @clock_info: an array of clock information structures describing the clocks + * which are implemented by the CGU + * @num_clocks: the number of entries in clock_info + * @np: the device tree node which causes this CGU to be probed + * + * Return: a pointer to the CGU instance if initialisation is successful, + * otherwise NULL. + */ +struct ingenic_cgu * +ingenic_cgu_new(const struct ingenic_cgu_clk_info *clock_info, + unsigned num_clocks, struct device_node *np); + +/** + * ingenic_cgu_register_clocks() - Registers the clocks + * @cgu: pointer to cgu data + * + * Register the clocks described by the CGU with the common clock framework. + * + * Return: 0 on success or -errno if unsuccesful. + */ +int ingenic_cgu_register_clocks(struct ingenic_cgu *cgu); + +#endif /* __DRIVERS_CLK_INGENIC_CGU_H__ */ diff --git a/drivers/clk/ingenic/jz4740-cgu.c b/drivers/clk/ingenic/jz4740-cgu.c new file mode 100644 index 000000000000..305a26c2a800 --- /dev/null +++ b/drivers/clk/ingenic/jz4740-cgu.c @@ -0,0 +1,303 @@ +/* + * Ingenic JZ4740 SoC CGU driver + * + * Copyright (c) 2015 Imagination Technologies + * Author: Paul Burton <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk-provider.h> +#include <linux/delay.h> +#include <linux/of.h> +#include <dt-bindings/clock/jz4740-cgu.h> +#include <asm/mach-jz4740/clock.h> +#include "cgu.h" + +/* CGU register offsets */ +#define CGU_REG_CPCCR 0x00 +#define CGU_REG_LCR 0x04 +#define CGU_REG_CPPCR 0x10 +#define CGU_REG_CLKGR 0x20 +#define CGU_REG_SCR 0x24 +#define CGU_REG_I2SCDR 0x60 +#define CGU_REG_LPCDR 0x64 +#define CGU_REG_MSCCDR 0x68 +#define CGU_REG_UHCCDR 0x6c +#define CGU_REG_SSICDR 0x74 + +/* bits within a PLL control register */ +#define PLLCTL_M_SHIFT 23 +#define PLLCTL_M_MASK (0x1ff << PLLCTL_M_SHIFT) +#define PLLCTL_N_SHIFT 18 +#define PLLCTL_N_MASK (0x1f << PLLCTL_N_SHIFT) +#define PLLCTL_OD_SHIFT 16 +#define PLLCTL_OD_MASK (0x3 << PLLCTL_OD_SHIFT) +#define PLLCTL_STABLE (1 << 10) +#define PLLCTL_BYPASS (1 << 9) +#define PLLCTL_ENABLE (1 << 8) + +/* bits within the LCR register */ +#define LCR_SLEEP (1 << 0) + +/* bits within the CLKGR register */ +#define CLKGR_UDC (1 << 11) + +static struct ingenic_cgu *cgu; + +static const s8 pll_od_encoding[4] = { + 0x0, 0x1, -1, 0x3, +}; + +static const struct ingenic_cgu_clk_info jz4740_cgu_clocks[] = { + + /* External clocks */ + + [JZ4740_CLK_EXT] = { "ext", CGU_CLK_EXT }, + [JZ4740_CLK_RTC] = { "rtc", CGU_CLK_EXT }, + + [JZ4740_CLK_PLL] = { + "pll", CGU_CLK_PLL, + .parents = { JZ4740_CLK_EXT, -1, -1, -1 }, + .pll = { + .reg = CGU_REG_CPPCR, + .m_shift = 23, + .m_bits = 9, + .m_offset = 2, + .n_shift = 18, + .n_bits = 5, + .n_offset = 2, + .od_shift = 16, + .od_bits = 2, + .od_max = 4, + .od_encoding = pll_od_encoding, + .stable_bit = 10, + .bypass_bit = 9, + .enable_bit = 8, + }, + }, + + /* Muxes & dividers */ + + [JZ4740_CLK_PLL_HALF] = { + "pll half", CGU_CLK_DIV, + .parents = { JZ4740_CLK_PLL, -1, -1, -1 }, + .div = { CGU_REG_CPCCR, 21, 1, -1, -1, -1 }, + }, + + [JZ4740_CLK_CCLK] = { + "cclk", CGU_CLK_DIV, + .parents = { JZ4740_CLK_PLL, -1, -1, -1 }, + .div = { CGU_REG_CPCCR, 0, 4, 22, -1, -1 }, + }, + + [JZ4740_CLK_HCLK] = { + "hclk", CGU_CLK_DIV, + .parents = { JZ4740_CLK_PLL, -1, -1, -1 }, + .div = { CGU_REG_CPCCR, 4, 4, 22, -1, -1 }, + }, + + [JZ4740_CLK_PCLK] = { + "pclk", CGU_CLK_DIV, + .parents = { JZ4740_CLK_PLL, -1, -1, -1 }, + .div = { CGU_REG_CPCCR, 8, 4, 22, -1, -1 }, + }, + + [JZ4740_CLK_MCLK] = { + "mclk", CGU_CLK_DIV, + .parents = { JZ4740_CLK_PLL, -1, -1, -1 }, + .div = { CGU_REG_CPCCR, 12, 4, 22, -1, -1 }, + }, + + [JZ4740_CLK_LCD] = { + "lcd", CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4740_CLK_PLL_HALF, -1, -1, -1 }, + .div = { CGU_REG_CPCCR, 16, 5, 22, -1, -1 }, + .gate = { CGU_REG_CLKGR, 10 }, + }, + + [JZ4740_CLK_LCD_PCLK] = { + "lcd_pclk", CGU_CLK_DIV, + .parents = { JZ4740_CLK_PLL_HALF, -1, -1, -1 }, + .div = { CGU_REG_LPCDR, 0, 11, -1, -1, -1 }, + }, + + [JZ4740_CLK_I2S] = { + "i2s", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4740_CLK_EXT, JZ4740_CLK_PLL_HALF, -1, -1 }, + .mux = { CGU_REG_CPCCR, 31, 1 }, + .div = { CGU_REG_I2SCDR, 0, 8, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 6 }, + }, + + [JZ4740_CLK_SPI] = { + "spi", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4740_CLK_EXT, JZ4740_CLK_PLL, -1, -1 }, + .mux = { CGU_REG_SSICDR, 31, 1 }, + .div = { CGU_REG_SSICDR, 0, 4, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 4 }, + }, + + [JZ4740_CLK_MMC] = { + "mmc", CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4740_CLK_PLL_HALF, -1, -1, -1 }, + .div = { CGU_REG_MSCCDR, 0, 5, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 7 }, + }, + + [JZ4740_CLK_UHC] = { + "uhc", CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4740_CLK_PLL_HALF, -1, -1, -1 }, + .div = { CGU_REG_UHCCDR, 0, 4, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 14 }, + }, + + [JZ4740_CLK_UDC] = { + "udc", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { JZ4740_CLK_EXT, JZ4740_CLK_PLL_HALF, -1, -1 }, + .mux = { CGU_REG_CPCCR, 29, 1 }, + .div = { CGU_REG_CPCCR, 23, 6, -1, -1, -1 }, + .gate = { CGU_REG_SCR, 6 }, + }, + + /* Gate-only clocks */ + + [JZ4740_CLK_UART0] = { + "uart0", CGU_CLK_GATE, + .parents = { JZ4740_CLK_EXT, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 0 }, + }, + + [JZ4740_CLK_UART1] = { + "uart1", CGU_CLK_GATE, + .parents = { JZ4740_CLK_EXT, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 15 }, + }, + + [JZ4740_CLK_DMA] = { + "dma", CGU_CLK_GATE, + .parents = { JZ4740_CLK_PCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 12 }, + }, + + [JZ4740_CLK_IPU] = { + "ipu", CGU_CLK_GATE, + .parents = { JZ4740_CLK_PCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 13 }, + }, + + [JZ4740_CLK_ADC] = { + "adc", CGU_CLK_GATE, + .parents = { JZ4740_CLK_EXT, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 8 }, + }, + + [JZ4740_CLK_I2C] = { + "i2c", CGU_CLK_GATE, + .parents = { JZ4740_CLK_EXT, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 3 }, + }, + + [JZ4740_CLK_AIC] = { + "aic", CGU_CLK_GATE, + .parents = { JZ4740_CLK_EXT, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR, 5 }, + }, +}; + +static void __init jz4740_cgu_init(struct device_node *np) +{ + int retval; + + cgu = ingenic_cgu_new(jz4740_cgu_clocks, + ARRAY_SIZE(jz4740_cgu_clocks), np); + if (!cgu) { + pr_err("%s: failed to initialise CGU\n", __func__); + return; + } + + retval = ingenic_cgu_register_clocks(cgu); + if (retval) + pr_err("%s: failed to register CGU Clocks\n", __func__); +} +CLK_OF_DECLARE(jz4740_cgu, "ingenic,jz4740-cgu", jz4740_cgu_init); + +void jz4740_clock_set_wait_mode(enum jz4740_wait_mode mode) +{ + uint32_t lcr = readl(cgu->base + CGU_REG_LCR); + + switch (mode) { + case JZ4740_WAIT_MODE_IDLE: + lcr &= ~LCR_SLEEP; + break; + + case JZ4740_WAIT_MODE_SLEEP: + lcr |= LCR_SLEEP; + break; + } + + writel(lcr, cgu->base + CGU_REG_LCR); +} + +void jz4740_clock_udc_disable_auto_suspend(void) +{ + uint32_t clkgr = readl(cgu->base + CGU_REG_CLKGR); + + clkgr &= ~CLKGR_UDC; + writel(clkgr, cgu->base + CGU_REG_CLKGR); +} +EXPORT_SYMBOL_GPL(jz4740_clock_udc_disable_auto_suspend); + +void jz4740_clock_udc_enable_auto_suspend(void) +{ + uint32_t clkgr = readl(cgu->base + CGU_REG_CLKGR); + + clkgr |= CLKGR_UDC; + writel(clkgr, cgu->base + CGU_REG_CLKGR); +} +EXPORT_SYMBOL_GPL(jz4740_clock_udc_enable_auto_suspend); + +#define JZ_CLOCK_GATE_UART0 BIT(0) +#define JZ_CLOCK_GATE_TCU BIT(1) +#define JZ_CLOCK_GATE_DMAC BIT(12) + +void jz4740_clock_suspend(void) +{ + uint32_t clkgr, cppcr; + + clkgr = readl(cgu->base + CGU_REG_CLKGR); + clkgr |= JZ_CLOCK_GATE_TCU | JZ_CLOCK_GATE_DMAC | JZ_CLOCK_GATE_UART0; + writel(clkgr, cgu->base + CGU_REG_CLKGR); + + cppcr = readl(cgu->base + CGU_REG_CPPCR); + cppcr &= ~BIT(jz4740_cgu_clocks[JZ4740_CLK_PLL].pll.enable_bit); + writel(cppcr, cgu->base + CGU_REG_CPPCR); +} + +void jz4740_clock_resume(void) +{ + uint32_t clkgr, cppcr, stable; + + cppcr = readl(cgu->base + CGU_REG_CPPCR); + cppcr |= BIT(jz4740_cgu_clocks[JZ4740_CLK_PLL].pll.enable_bit); + writel(cppcr, cgu->base + CGU_REG_CPPCR); + + stable = BIT(jz4740_cgu_clocks[JZ4740_CLK_PLL].pll.stable_bit); + do { + cppcr = readl(cgu->base + CGU_REG_CPPCR); + } while (!(cppcr & stable)); + + clkgr = readl(cgu->base + CGU_REG_CLKGR); + clkgr &= ~JZ_CLOCK_GATE_TCU; + clkgr &= ~JZ_CLOCK_GATE_DMAC; + clkgr &= ~JZ_CLOCK_GATE_UART0; + writel(clkgr, cgu->base + CGU_REG_CLKGR); +} diff --git a/drivers/clk/ingenic/jz4780-cgu.c b/drivers/clk/ingenic/jz4780-cgu.c new file mode 100644 index 000000000000..431f962300b6 --- /dev/null +++ b/drivers/clk/ingenic/jz4780-cgu.c @@ -0,0 +1,733 @@ +/* + * Ingenic JZ4780 SoC CGU driver + * + * Copyright (c) 2013-2015 Imagination Technologies + * Author: Paul Burton <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk-provider.h> +#include <linux/delay.h> +#include <linux/of.h> +#include <dt-bindings/clock/jz4780-cgu.h> +#include "cgu.h" + +/* CGU register offsets */ +#define CGU_REG_CLOCKCONTROL 0x00 +#define CGU_REG_PLLCONTROL 0x0c +#define CGU_REG_APLL 0x10 +#define CGU_REG_MPLL 0x14 +#define CGU_REG_EPLL 0x18 +#define CGU_REG_VPLL 0x1c +#define CGU_REG_CLKGR0 0x20 +#define CGU_REG_OPCR 0x24 +#define CGU_REG_CLKGR1 0x28 +#define CGU_REG_DDRCDR 0x2c +#define CGU_REG_VPUCDR 0x30 +#define CGU_REG_USBPCR 0x3c +#define CGU_REG_USBRDT 0x40 +#define CGU_REG_USBVBFIL 0x44 +#define CGU_REG_USBPCR1 0x48 +#define CGU_REG_LP0CDR 0x54 +#define CGU_REG_I2SCDR 0x60 +#define CGU_REG_LP1CDR 0x64 +#define CGU_REG_MSC0CDR 0x68 +#define CGU_REG_UHCCDR 0x6c +#define CGU_REG_SSICDR 0x74 +#define CGU_REG_CIMCDR 0x7c +#define CGU_REG_PCMCDR 0x84 +#define CGU_REG_GPUCDR 0x88 +#define CGU_REG_HDMICDR 0x8c +#define CGU_REG_MSC1CDR 0xa4 +#define CGU_REG_MSC2CDR 0xa8 +#define CGU_REG_BCHCDR 0xac +#define CGU_REG_CLOCKSTATUS 0xd4 + +/* bits within the OPCR register */ +#define OPCR_SPENDN0 (1 << 7) +#define OPCR_SPENDN1 (1 << 6) + +/* bits within the USBPCR register */ +#define USBPCR_USB_MODE BIT(31) +#define USBPCR_IDPULLUP_MASK (0x3 << 28) +#define USBPCR_COMMONONN BIT(25) +#define USBPCR_VBUSVLDEXT BIT(24) +#define USBPCR_VBUSVLDEXTSEL BIT(23) +#define USBPCR_POR BIT(22) +#define USBPCR_OTG_DISABLE BIT(20) +#define USBPCR_COMPDISTUNE_MASK (0x7 << 17) +#define USBPCR_OTGTUNE_MASK (0x7 << 14) +#define USBPCR_SQRXTUNE_MASK (0x7 << 11) +#define USBPCR_TXFSLSTUNE_MASK (0xf << 7) +#define USBPCR_TXPREEMPHTUNE BIT(6) +#define USBPCR_TXHSXVTUNE_MASK (0x3 << 4) +#define USBPCR_TXVREFTUNE_MASK 0xf + +/* bits within the USBPCR1 register */ +#define USBPCR1_REFCLKSEL_SHIFT 26 +#define USBPCR1_REFCLKSEL_MASK (0x3 << USBPCR1_REFCLKSEL_SHIFT) +#define USBPCR1_REFCLKSEL_CORE (0x2 << USBPCR1_REFCLKSEL_SHIFT) +#define USBPCR1_REFCLKDIV_SHIFT 24 +#define USBPCR1_REFCLKDIV_MASK (0x3 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_19_2 (0x3 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_48 (0x2 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_24 (0x1 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_12 (0x0 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_USB_SEL BIT(28) +#define USBPCR1_WORD_IF0 BIT(19) +#define USBPCR1_WORD_IF1 BIT(18) + +/* bits within the USBRDT register */ +#define USBRDT_VBFIL_LD_EN BIT(25) +#define USBRDT_USBRDT_MASK 0x7fffff + +/* bits within the USBVBFIL register */ +#define USBVBFIL_IDDIGFIL_SHIFT 16 +#define USBVBFIL_IDDIGFIL_MASK (0xffff << USBVBFIL_IDDIGFIL_SHIFT) +#define USBVBFIL_USBVBFIL_MASK (0xffff) + +static struct ingenic_cgu *cgu; + +static u8 jz4780_otg_phy_get_parent(struct clk_hw *hw) +{ + /* we only use CLKCORE, revisit if that ever changes */ + return 0; +} + +static int jz4780_otg_phy_set_parent(struct clk_hw *hw, u8 idx) +{ + unsigned long flags; + u32 usbpcr1; + + if (idx > 0) + return -EINVAL; + + spin_lock_irqsave(&cgu->lock, flags); + + usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); + usbpcr1 &= ~USBPCR1_REFCLKSEL_MASK; + /* we only use CLKCORE */ + usbpcr1 |= USBPCR1_REFCLKSEL_CORE; + writel(usbpcr1, cgu->base + CGU_REG_USBPCR1); + + spin_unlock_irqrestore(&cgu->lock, flags); + return 0; +} + +static unsigned long jz4780_otg_phy_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + u32 usbpcr1; + unsigned refclk_div; + + usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); + refclk_div = usbpcr1 & USBPCR1_REFCLKDIV_MASK; + + switch (refclk_div) { + case USBPCR1_REFCLKDIV_12: + return 12000000; + + case USBPCR1_REFCLKDIV_24: + return 24000000; + + case USBPCR1_REFCLKDIV_48: + return 48000000; + + case USBPCR1_REFCLKDIV_19_2: + return 19200000; + } + + BUG(); + return parent_rate; +} + +static long jz4780_otg_phy_round_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long *parent_rate) +{ + if (req_rate < 15600000) + return 12000000; + + if (req_rate < 21600000) + return 19200000; + + if (req_rate < 36000000) + return 24000000; + + return 48000000; +} + +static int jz4780_otg_phy_set_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long parent_rate) +{ + unsigned long flags; + u32 usbpcr1, div_bits; + + switch (req_rate) { + case 12000000: + div_bits = USBPCR1_REFCLKDIV_12; + break; + + case 19200000: + div_bits = USBPCR1_REFCLKDIV_19_2; + break; + + case 24000000: + div_bits = USBPCR1_REFCLKDIV_24; + break; + + case 48000000: + div_bits = USBPCR1_REFCLKDIV_48; + break; + + default: + return -EINVAL; + } + + spin_lock_irqsave(&cgu->lock, flags); + + usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); + usbpcr1 &= ~USBPCR1_REFCLKDIV_MASK; + usbpcr1 |= div_bits; + writel(usbpcr1, cgu->base + CGU_REG_USBPCR1); + + spin_unlock_irqrestore(&cgu->lock, flags); + return 0; +} + +static struct clk_ops jz4780_otg_phy_ops = { + .get_parent = jz4780_otg_phy_get_parent, + .set_parent = jz4780_otg_phy_set_parent, + + .recalc_rate = jz4780_otg_phy_recalc_rate, + .round_rate = jz4780_otg_phy_round_rate, + .set_rate = jz4780_otg_phy_set_rate, +}; + +static const s8 pll_od_encoding[16] = { + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, +}; + +static const struct ingenic_cgu_clk_info jz4780_cgu_clocks[] = { + + /* External clocks */ + + [JZ4780_CLK_EXCLK] = { "ext", CGU_CLK_EXT }, + [JZ4780_CLK_RTCLK] = { "rtc", CGU_CLK_EXT }, + + /* PLLs */ + +#define DEF_PLL(name) { \ + .reg = CGU_REG_ ## name, \ + .m_shift = 19, \ + .m_bits = 13, \ + .m_offset = 1, \ + .n_shift = 13, \ + .n_bits = 6, \ + .n_offset = 1, \ + .od_shift = 9, \ + .od_bits = 4, \ + .od_max = 16, \ + .od_encoding = pll_od_encoding, \ + .stable_bit = 6, \ + .bypass_bit = 1, \ + .enable_bit = 0, \ +} + + [JZ4780_CLK_APLL] = { + "apll", CGU_CLK_PLL, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .pll = DEF_PLL(APLL), + }, + + [JZ4780_CLK_MPLL] = { + "mpll", CGU_CLK_PLL, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .pll = DEF_PLL(MPLL), + }, + + [JZ4780_CLK_EPLL] = { + "epll", CGU_CLK_PLL, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .pll = DEF_PLL(EPLL), + }, + + [JZ4780_CLK_VPLL] = { + "vpll", CGU_CLK_PLL, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .pll = DEF_PLL(VPLL), + }, + +#undef DEF_PLL + + /* Custom (SoC-specific) OTG PHY */ + + [JZ4780_CLK_OTGPHY] = { + "otg_phy", CGU_CLK_CUSTOM, + .parents = { -1, -1, JZ4780_CLK_EXCLK, -1 }, + .custom = { &jz4780_otg_phy_ops }, + }, + + /* Muxes & dividers */ + + [JZ4780_CLK_SCLKA] = { + "sclk_a", CGU_CLK_MUX, + .parents = { -1, JZ4780_CLK_APLL, JZ4780_CLK_EXCLK, + JZ4780_CLK_RTCLK }, + .mux = { CGU_REG_CLOCKCONTROL, 30, 2 }, + }, + + [JZ4780_CLK_CPUMUX] = { + "cpumux", CGU_CLK_MUX, + .parents = { -1, JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_EPLL }, + .mux = { CGU_REG_CLOCKCONTROL, 28, 2 }, + }, + + [JZ4780_CLK_CPU] = { + "cpu", CGU_CLK_DIV, + .parents = { JZ4780_CLK_CPUMUX, -1, -1, -1 }, + .div = { CGU_REG_CLOCKCONTROL, 0, 4, 22, -1, -1 }, + }, + + [JZ4780_CLK_L2CACHE] = { + "l2cache", CGU_CLK_DIV, + .parents = { JZ4780_CLK_CPUMUX, -1, -1, -1 }, + .div = { CGU_REG_CLOCKCONTROL, 4, 4, -1, -1, -1 }, + }, + + [JZ4780_CLK_AHB0] = { + "ahb0", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { -1, JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_EPLL }, + .mux = { CGU_REG_CLOCKCONTROL, 26, 2 }, + .div = { CGU_REG_CLOCKCONTROL, 8, 4, 21, -1, -1 }, + }, + + [JZ4780_CLK_AHB2PMUX] = { + "ahb2_apb_mux", CGU_CLK_MUX, + .parents = { -1, JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_RTCLK }, + .mux = { CGU_REG_CLOCKCONTROL, 24, 2 }, + }, + + [JZ4780_CLK_AHB2] = { + "ahb2", CGU_CLK_DIV, + .parents = { JZ4780_CLK_AHB2PMUX, -1, -1, -1 }, + .div = { CGU_REG_CLOCKCONTROL, 12, 4, 20, -1, -1 }, + }, + + [JZ4780_CLK_PCLK] = { + "pclk", CGU_CLK_DIV, + .parents = { JZ4780_CLK_AHB2PMUX, -1, -1, -1 }, + .div = { CGU_REG_CLOCKCONTROL, 16, 4, 20, -1, -1 }, + }, + + [JZ4780_CLK_DDR] = { + "ddr", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { -1, JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, -1 }, + .mux = { CGU_REG_DDRCDR, 30, 2 }, + .div = { CGU_REG_DDRCDR, 0, 4, 29, 28, 27 }, + }, + + [JZ4780_CLK_VPU] = { + "vpu", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_EPLL, -1 }, + .mux = { CGU_REG_VPUCDR, 30, 2 }, + .div = { CGU_REG_VPUCDR, 0, 4, 29, 28, 27 }, + .gate = { CGU_REG_CLKGR1, 2 }, + }, + + [JZ4780_CLK_I2SPLL] = { + "i2s_pll", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_EPLL, -1, -1 }, + .mux = { CGU_REG_I2SCDR, 30, 1 }, + .div = { CGU_REG_I2SCDR, 0, 8, 29, 28, 27 }, + }, + + [JZ4780_CLK_I2S] = { + "i2s", CGU_CLK_MUX, + .parents = { JZ4780_CLK_EXCLK, JZ4780_CLK_I2SPLL, -1, -1 }, + .mux = { CGU_REG_I2SCDR, 31, 1 }, + }, + + [JZ4780_CLK_LCD0PIXCLK] = { + "lcd0pixclk", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_VPLL, -1 }, + .mux = { CGU_REG_LP0CDR, 30, 2 }, + .div = { CGU_REG_LP0CDR, 0, 8, 28, 27, 26 }, + }, + + [JZ4780_CLK_LCD1PIXCLK] = { + "lcd1pixclk", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_VPLL, -1 }, + .mux = { CGU_REG_LP1CDR, 30, 2 }, + .div = { CGU_REG_LP1CDR, 0, 8, 28, 27, 26 }, + }, + + [JZ4780_CLK_MSCMUX] = { + "msc_mux", CGU_CLK_MUX, + .parents = { -1, JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, -1 }, + .mux = { CGU_REG_MSC0CDR, 30, 2 }, + }, + + [JZ4780_CLK_MSC0] = { + "msc0", CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4780_CLK_MSCMUX, -1, -1, -1 }, + .div = { CGU_REG_MSC0CDR, 0, 8, 29, 28, 27 }, + .gate = { CGU_REG_CLKGR0, 3 }, + }, + + [JZ4780_CLK_MSC1] = { + "msc1", CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4780_CLK_MSCMUX, -1, -1, -1 }, + .div = { CGU_REG_MSC1CDR, 0, 8, 29, 28, 27 }, + .gate = { CGU_REG_CLKGR0, 11 }, + }, + + [JZ4780_CLK_MSC2] = { + "msc2", CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4780_CLK_MSCMUX, -1, -1, -1 }, + .div = { CGU_REG_MSC2CDR, 0, 8, 29, 28, 27 }, + .gate = { CGU_REG_CLKGR0, 12 }, + }, + + [JZ4780_CLK_UHC] = { + "uhc", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_EPLL, JZ4780_CLK_OTGPHY }, + .mux = { CGU_REG_UHCCDR, 30, 2 }, + .div = { CGU_REG_UHCCDR, 0, 8, 29, 28, 27 }, + .gate = { CGU_REG_CLKGR0, 24 }, + }, + + [JZ4780_CLK_SSIPLL] = { + "ssi_pll", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, -1, -1 }, + .mux = { CGU_REG_SSICDR, 30, 1 }, + .div = { CGU_REG_SSICDR, 0, 8, 29, 28, 27 }, + }, + + [JZ4780_CLK_SSI] = { + "ssi", CGU_CLK_MUX, + .parents = { JZ4780_CLK_EXCLK, JZ4780_CLK_SSIPLL, -1, -1 }, + .mux = { CGU_REG_SSICDR, 31, 1 }, + }, + + [JZ4780_CLK_CIMMCLK] = { + "cim_mclk", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, -1, -1 }, + .mux = { CGU_REG_CIMCDR, 31, 1 }, + .div = { CGU_REG_CIMCDR, 0, 8, 30, 29, 28 }, + }, + + [JZ4780_CLK_PCMPLL] = { + "pcm_pll", CGU_CLK_MUX | CGU_CLK_DIV, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_EPLL, JZ4780_CLK_VPLL }, + .mux = { CGU_REG_PCMCDR, 29, 2 }, + .div = { CGU_REG_PCMCDR, 0, 8, 28, 27, 26 }, + }, + + [JZ4780_CLK_PCM] = { + "pcm", CGU_CLK_MUX | CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, JZ4780_CLK_PCMPLL, -1, -1 }, + .mux = { CGU_REG_PCMCDR, 31, 1 }, + .gate = { CGU_REG_CLKGR1, 3 }, + }, + + [JZ4780_CLK_GPU] = { + "gpu", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { -1, JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_EPLL }, + .mux = { CGU_REG_GPUCDR, 30, 2 }, + .div = { CGU_REG_GPUCDR, 0, 4, 29, 28, 27 }, + .gate = { CGU_REG_CLKGR1, 4 }, + }, + + [JZ4780_CLK_HDMI] = { + "hdmi", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_VPLL, -1 }, + .mux = { CGU_REG_HDMICDR, 30, 2 }, + .div = { CGU_REG_HDMICDR, 0, 8, 29, 28, 26 }, + .gate = { CGU_REG_CLKGR1, 9 }, + }, + + [JZ4780_CLK_BCH] = { + "bch", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + .parents = { -1, JZ4780_CLK_SCLKA, JZ4780_CLK_MPLL, + JZ4780_CLK_EPLL }, + .mux = { CGU_REG_BCHCDR, 30, 2 }, + .div = { CGU_REG_BCHCDR, 0, 4, 29, 28, 27 }, + .gate = { CGU_REG_CLKGR0, 1 }, + }, + + /* Gate-only clocks */ + + [JZ4780_CLK_NEMC] = { + "nemc", CGU_CLK_GATE, + .parents = { JZ4780_CLK_AHB2, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 0 }, + }, + + [JZ4780_CLK_OTG0] = { + "otg0", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 2 }, + }, + + [JZ4780_CLK_SSI0] = { + "ssi0", CGU_CLK_GATE, + .parents = { JZ4780_CLK_SSI, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 4 }, + }, + + [JZ4780_CLK_SMB0] = { + "smb0", CGU_CLK_GATE, + .parents = { JZ4780_CLK_PCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 5 }, + }, + + [JZ4780_CLK_SMB1] = { + "smb1", CGU_CLK_GATE, + .parents = { JZ4780_CLK_PCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 6 }, + }, + + [JZ4780_CLK_SCC] = { + "scc", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 7 }, + }, + + [JZ4780_CLK_AIC] = { + "aic", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 8 }, + }, + + [JZ4780_CLK_TSSI0] = { + "tssi0", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 9 }, + }, + + [JZ4780_CLK_OWI] = { + "owi", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 10 }, + }, + + [JZ4780_CLK_KBC] = { + "kbc", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 13 }, + }, + + [JZ4780_CLK_SADC] = { + "sadc", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 14 }, + }, + + [JZ4780_CLK_UART0] = { + "uart0", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 15 }, + }, + + [JZ4780_CLK_UART1] = { + "uart1", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 16 }, + }, + + [JZ4780_CLK_UART2] = { + "uart2", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 17 }, + }, + + [JZ4780_CLK_UART3] = { + "uart3", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 18 }, + }, + + [JZ4780_CLK_SSI1] = { + "ssi1", CGU_CLK_GATE, + .parents = { JZ4780_CLK_SSI, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 19 }, + }, + + [JZ4780_CLK_SSI2] = { + "ssi2", CGU_CLK_GATE, + .parents = { JZ4780_CLK_SSI, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 20 }, + }, + + [JZ4780_CLK_PDMA] = { + "pdma", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 21 }, + }, + + [JZ4780_CLK_GPS] = { + "gps", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 22 }, + }, + + [JZ4780_CLK_MAC] = { + "mac", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 23 }, + }, + + [JZ4780_CLK_SMB2] = { + "smb2", CGU_CLK_GATE, + .parents = { JZ4780_CLK_PCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 24 }, + }, + + [JZ4780_CLK_CIM] = { + "cim", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 26 }, + }, + + [JZ4780_CLK_LCD] = { + "lcd", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 28 }, + }, + + [JZ4780_CLK_TVE] = { + "tve", CGU_CLK_GATE, + .parents = { JZ4780_CLK_LCD, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 27 }, + }, + + [JZ4780_CLK_IPU] = { + "ipu", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 29 }, + }, + + [JZ4780_CLK_DDR0] = { + "ddr0", CGU_CLK_GATE, + .parents = { JZ4780_CLK_DDR, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 30 }, + }, + + [JZ4780_CLK_DDR1] = { + "ddr1", CGU_CLK_GATE, + .parents = { JZ4780_CLK_DDR, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR0, 31 }, + }, + + [JZ4780_CLK_SMB3] = { + "smb3", CGU_CLK_GATE, + .parents = { JZ4780_CLK_PCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 0 }, + }, + + [JZ4780_CLK_TSSI1] = { + "tssi1", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 1 }, + }, + + [JZ4780_CLK_COMPRESS] = { + "compress", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 5 }, + }, + + [JZ4780_CLK_AIC1] = { + "aic1", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 6 }, + }, + + [JZ4780_CLK_GPVLC] = { + "gpvlc", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 7 }, + }, + + [JZ4780_CLK_OTG1] = { + "otg1", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 8 }, + }, + + [JZ4780_CLK_UART4] = { + "uart4", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 10 }, + }, + + [JZ4780_CLK_AHBMON] = { + "ahb_mon", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 11 }, + }, + + [JZ4780_CLK_SMB4] = { + "smb4", CGU_CLK_GATE, + .parents = { JZ4780_CLK_PCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 12 }, + }, + + [JZ4780_CLK_DES] = { + "des", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 13 }, + }, + + [JZ4780_CLK_X2D] = { + "x2d", CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 14 }, + }, + + [JZ4780_CLK_CORE1] = { + "core1", CGU_CLK_GATE, + .parents = { JZ4780_CLK_CPU, -1, -1, -1 }, + .gate = { CGU_REG_CLKGR1, 15 }, + }, + +}; + +static void __init jz4780_cgu_init(struct device_node *np) +{ + int retval; + + cgu = ingenic_cgu_new(jz4780_cgu_clocks, + ARRAY_SIZE(jz4780_cgu_clocks), np); + if (!cgu) { + pr_err("%s: failed to initialise CGU\n", __func__); + return; + } + + retval = ingenic_cgu_register_clocks(cgu); + if (retval) { + pr_err("%s: failed to register CGU Clocks\n", __func__); + return; + } +} +CLK_OF_DECLARE(jz4780_cgu, "ingenic,jz4780-cgu", jz4780_cgu_init); diff --git a/drivers/cpufreq/ls1x-cpufreq.c b/drivers/cpufreq/ls1x-cpufreq.c index f0913eee2f50..262581b3318d 100644 --- a/drivers/cpufreq/ls1x-cpufreq.c +++ b/drivers/cpufreq/ls1x-cpufreq.c @@ -17,8 +17,8 @@ #include <linux/platform_device.h> #include <linux/slab.h> -#include <asm/mach-loongson1/cpufreq.h> -#include <asm/mach-loongson1/loongson1.h> +#include <cpufreq.h> +#include <loongson1.h> static struct { struct device *dev; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index bda2cb06dc7a..88d474b78076 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -162,6 +162,17 @@ config MX3_IPU_IRQS To avoid bloating the irq_desc[] array we allocate a sufficient number of IRQ slots and map them dynamically to specific sources. +config PXA_DMA + bool "PXA DMA support" + depends on (ARCH_MMP || ARCH_PXA) + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the DMA engine for PXA. It is also compatible with MMP PDMA + platform. The internal DMA IP of all PXA variants is supported, with + 16 to 32 channels for peripheral to memory or memory to memory + transfers. + config TXX9_DMAC tristate "Toshiba TXx9 SoC DMA support" depends on MACH_TX49XX || MACH_TX39XX @@ -245,6 +256,9 @@ config TI_EDMA Enable support for the TI EDMA controller. This DMA engine is found on TI DaVinci and AM33xx parts. +config TI_DMA_CROSSBAR + bool + config ARCH_HAS_ASYNC_TX_FIND_CHANNEL bool @@ -330,6 +344,7 @@ config DMA_OMAP depends on ARCH_OMAP select DMA_ENGINE select DMA_VIRTUAL_CHANNELS + select TI_DMA_CROSSBAR if SOC_DRA7XX config DMA_BCM2835 tristate "BCM2835 DMA engine support" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 69f77d5ba53b..6a4d6f2827da 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o obj-$(CONFIG_IMX_DMA) += imx-dma.o obj-$(CONFIG_MXS_DMA) += mxs-dma.o +obj-$(CONFIG_PXA_DMA) += pxa_dma.o obj-$(CONFIG_TIMB_DMA) += timb_dma.o obj-$(CONFIG_SIRF_DMA) += sirf-dma.o obj-$(CONFIG_TI_EDMA) += edma.o @@ -38,6 +39,7 @@ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o obj-$(CONFIG_DMA_OMAP) += omap-dma.o +obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 49d396ec06e5..5de3cf453f35 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -474,7 +474,7 @@ static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x, u32 val = readl(ch->reg_config); val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK | - PL080_CONFIG_TC_IRQ_MASK); + PL080_CONFIG_TC_IRQ_MASK); writel(val, ch->reg_config); diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 57b2141ddddc..59892126d175 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -247,6 +247,10 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) channel_writel(atchan, CTRLA, 0); channel_writel(atchan, CTRLB, 0); channel_writel(atchan, DSCR, first->txd.phys); + channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) | + ATC_SPIP_BOUNDARY(first->boundary)); + channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) | + ATC_DPIP_BOUNDARY(first->boundary)); dma_writel(atdma, CHER, atchan->mask); vdbg_dump_regs(atchan); @@ -635,6 +639,104 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) } /** + * atc_prep_dma_interleaved - prepare memory to memory interleaved operation + * @chan: the channel to prepare operation on + * @xt: Interleaved transfer template + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct data_chunk *first = xt->sgl; + struct at_desc *desc = NULL; + size_t xfer_count; + unsigned int dwidth; + u32 ctrla; + u32 ctrlb; + size_t len = 0; + int i; + + dev_info(chan2dev(chan), + "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, xt->src_start, xt->dst_start, xt->numf, + xt->frame_size, flags); + + if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) + return NULL; + + /* + * The controller can only "skip" X bytes every Y bytes, so we + * need to make sure we are given a template that fit that + * description, ie a template with chunks that always have the + * same size, with the same ICGs. + */ + for (i = 0; i < xt->frame_size; i++) { + struct data_chunk *chunk = xt->sgl + i; + + if ((chunk->size != xt->sgl->size) || + (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) || + (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) { + dev_err(chan2dev(chan), + "%s: the controller can transfer only identical chunks\n", + __func__); + return NULL; + } + + len += chunk->size; + } + + dwidth = atc_get_xfer_width(xt->src_start, + xt->dst_start, len); + + xfer_count = len >> dwidth; + if (xfer_count > ATC_BTSIZE_MAX) { + dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__); + return NULL; + } + + ctrla = ATC_SRC_WIDTH(dwidth) | + ATC_DST_WIDTH(dwidth); + + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN + | ATC_SRC_ADDR_MODE_INCR + | ATC_DST_ADDR_MODE_INCR + | ATC_SRC_PIP + | ATC_DST_PIP + | ATC_FC_MEM2MEM; + + /* create the transfer */ + desc = atc_desc_get(atchan); + if (!desc) { + dev_err(chan2dev(chan), + "%s: couldn't allocate our descriptor\n", __func__); + return NULL; + } + + desc->lli.saddr = xt->src_start; + desc->lli.daddr = xt->dst_start; + desc->lli.ctrla = ctrla | xfer_count; + desc->lli.ctrlb = ctrlb; + + desc->boundary = first->size >> dwidth; + desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1; + desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1; + + desc->txd.cookie = -EBUSY; + desc->total_len = desc->len = len; + desc->tx_width = dwidth; + + /* set end-of-link to the last link descriptor of list*/ + set_desc_eol(desc); + + desc->txd.flags = flags; /* client is in control of this ack */ + + return &desc->txd; +} + +/** * atc_prep_dma_memcpy - prepare a memcpy operation * @chan: the channel to prepare operation on * @dest: operation virtual destination address @@ -1609,6 +1711,7 @@ static int __init at_dma_probe(struct platform_device *pdev) /* setup platform data for each SoC */ dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask); dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask); + dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask); @@ -1713,6 +1816,9 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.dev = &pdev->dev; /* set prep routines based on capability */ + if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask)) + atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved; + if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 2727ca560572..bc8d5ebedd19 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -196,6 +196,11 @@ struct at_desc { size_t len; u32 tx_width; size_t total_len; + + /* Interleaved data */ + size_t boundary; + size_t dst_hole; + size_t src_hole; }; static inline struct at_desc * diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 7992164ea9ec..cf1213de7865 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -235,6 +235,10 @@ struct at_xdmac_lld { dma_addr_t mbr_sa; /* Source Address Member */ dma_addr_t mbr_da; /* Destination Address Member */ u32 mbr_cfg; /* Configuration Register */ + u32 mbr_bc; /* Block Control Register */ + u32 mbr_ds; /* Data Stride Register */ + u32 mbr_sus; /* Source Microblock Stride Register */ + u32 mbr_dus; /* Destination Microblock Stride Register */ }; @@ -358,6 +362,8 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, if (at_xdmac_chan_is_cyclic(atchan)) { reg = AT_XDMAC_CNDC_NDVIEW_NDV1; at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); + } else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) { + reg = AT_XDMAC_CNDC_NDVIEW_NDV3; } else { /* * No need to write AT_XDMAC_CC reg, it will be done when the @@ -465,6 +471,33 @@ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan) return desc; } +static void at_xdmac_queue_desc(struct dma_chan *chan, + struct at_xdmac_desc *prev, + struct at_xdmac_desc *desc) +{ + if (!prev || !desc) + return; + + prev->lld.mbr_nda = desc->tx_dma_desc.phys; + prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE; + + dev_dbg(chan2dev(chan), "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", + __func__, prev, &prev->lld.mbr_nda); +} + +static inline void at_xdmac_increment_block_count(struct dma_chan *chan, + struct at_xdmac_desc *desc) +{ + if (!desc) + return; + + desc->lld.mbr_bc++; + + dev_dbg(chan2dev(chan), + "%s: incrementing the block count of the desc 0x%p\n", + __func__, desc); +} + static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec, struct of_dma *of_dma) { @@ -656,19 +689,14 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */ | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */ | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */ - | (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE) /* descriptor fetch */ | (len >> fixed_dwidth); /* microblock length */ dev_dbg(chan2dev(chan), "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -748,7 +776,6 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1 | AT_XDMAC_MBR_UBC_NDEN | AT_XDMAC_MBR_UBC_NSEN - | AT_XDMAC_MBR_UBC_NDE | period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg); dev_dbg(chan2dev(chan), @@ -756,12 +783,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -783,6 +806,215 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, return &first->tx_dma_desc; } +static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr) +{ + u32 width; + + /* + * Check address alignment to select the greater data width we + * can use. + * + * Some XDMAC implementations don't provide dword transfer, in + * this case selecting dword has the same behavior as + * selecting word transfers. + */ + if (!(addr & 7)) { + width = AT_XDMAC_CC_DWIDTH_DWORD; + dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); + } else if (!(addr & 3)) { + width = AT_XDMAC_CC_DWIDTH_WORD; + dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); + } else if (!(addr & 1)) { + width = AT_XDMAC_CC_DWIDTH_HALFWORD; + dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); + } else { + width = AT_XDMAC_CC_DWIDTH_BYTE; + dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); + } + + return width; +} + +static struct at_xdmac_desc * +at_xdmac_interleaved_queue_desc(struct dma_chan *chan, + struct at_xdmac_chan *atchan, + struct at_xdmac_desc *prev, + dma_addr_t src, dma_addr_t dst, + struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + struct at_xdmac_desc *desc; + u32 dwidth; + unsigned long flags; + size_t ublen; + /* + * WARNING: The channel configuration is set here since there is no + * dmaengine_slave_config call in this case. Moreover we don't know the + * direction, it involves we can't dynamically set the source and dest + * interface so we have to use the same one. Only interface 0 allows EBI + * access. Hopefully we can access DDR through both ports (at least on + * SAMA5D4x), so we can use the same interface for source and dest, + * that solves the fact we don't know the direction. + */ + u32 chan_cc = AT_XDMAC_CC_DIF(0) + | AT_XDMAC_CC_SIF(0) + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_TYPE_MEM_TRAN; + + dwidth = at_xdmac_align_width(chan, src | dst | chunk->size); + if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { + dev_dbg(chan2dev(chan), + "%s: chunk too big (%d, max size %lu)...\n", + __func__, chunk->size, + AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth); + return NULL; + } + + if (prev) + dev_dbg(chan2dev(chan), + "Adding items at the end of desc 0x%p\n", prev); + + if (xt->src_inc) { + if (xt->src_sgl) + chan_cc |= AT_XDMAC_CC_SAM_UBS_DS_AM; + else + chan_cc |= AT_XDMAC_CC_SAM_INCREMENTED_AM; + } + + if (xt->dst_inc) { + if (xt->dst_sgl) + chan_cc |= AT_XDMAC_CC_DAM_UBS_DS_AM; + else + chan_cc |= AT_XDMAC_CC_DAM_INCREMENTED_AM; + } + + spin_lock_irqsave(&atchan->lock, flags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + return NULL; + } + + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + ublen = chunk->size >> dwidth; + + desc->lld.mbr_sa = src; + desc->lld.mbr_da = dst; + desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk); + desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk); + + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_sa=0x%08x, mbr_da=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, desc->lld.mbr_sa, desc->lld.mbr_da, + desc->lld.mbr_ubc, desc->lld.mbr_cfg); + + /* Chain lld. */ + if (prev) + at_xdmac_queue_desc(chan, prev, desc); + + return desc; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *prev = NULL, *first = NULL; + struct data_chunk *chunk, *prev_chunk = NULL; + dma_addr_t dst_addr, src_addr; + size_t dst_skip, src_skip, len = 0; + size_t prev_dst_icg = 0, prev_src_icg = 0; + int i; + + if (!xt || (xt->numf != 1) || (xt->dir != DMA_MEM_TO_MEM)) + return NULL; + + dev_dbg(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, xt->src_start, xt->dst_start, xt->numf, + xt->frame_size, flags); + + src_addr = xt->src_start; + dst_addr = xt->dst_start; + + for (i = 0; i < xt->frame_size; i++) { + struct at_xdmac_desc *desc; + size_t src_icg, dst_icg; + + chunk = xt->sgl + i; + + dst_icg = dmaengine_get_dst_icg(xt, chunk); + src_icg = dmaengine_get_src_icg(xt, chunk); + + src_skip = chunk->size + src_icg; + dst_skip = chunk->size + dst_icg; + + dev_dbg(chan2dev(chan), + "%s: chunk size=%d, src icg=%d, dst icg=%d\n", + __func__, chunk->size, src_icg, dst_icg); + + /* + * Handle the case where we just have the same + * transfer to setup, we can just increase the + * block number and reuse the same descriptor. + */ + if (prev_chunk && prev && + (prev_chunk->size == chunk->size) && + (prev_src_icg == src_icg) && + (prev_dst_icg == dst_icg)) { + dev_dbg(chan2dev(chan), + "%s: same configuration that the previous chunk, merging the descriptors...\n", + __func__); + at_xdmac_increment_block_count(chan, prev); + continue; + } + + desc = at_xdmac_interleaved_queue_desc(chan, atchan, + prev, + src_addr, dst_addr, + xt, chunk); + if (!desc) { + list_splice_init(&first->descs_list, + &atchan->free_descs_list); + return NULL; + } + + if (!first) + first = desc; + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + list_add_tail(&desc->desc_node, &first->descs_list); + + if (xt->src_sgl) + src_addr += src_skip; + + if (xt->dst_sgl) + dst_addr += dst_skip; + + len += chunk->size; + prev_chunk = chunk; + prev_dst_icg = dst_icg; + prev_src_icg = src_icg; + prev = desc; + } + + first->tx_dma_desc.cookie = -EBUSY; + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + static struct dma_async_tx_descriptor * at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -814,24 +1046,7 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (unlikely(!len)) return NULL; - /* - * Check address alignment to select the greater data width we can use. - * Some XDMAC implementations don't provide dword transfer, in this - * case selecting dword has the same behavior as selecting word transfers. - */ - if (!((src_addr | dst_addr) & 7)) { - dwidth = AT_XDMAC_CC_DWIDTH_DWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); - } else if (!((src_addr | dst_addr) & 3)) { - dwidth = AT_XDMAC_CC_DWIDTH_WORD; - dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); - } else if (!((src_addr | dst_addr) & 1)) { - dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); - } else { - dwidth = AT_XDMAC_CC_DWIDTH_BYTE; - dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); - } + dwidth = at_xdmac_align_width(chan, src_addr | dst_addr); /* Prepare descriptors. */ while (remaining_size) { @@ -861,19 +1076,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size); /* Check remaining length and change data width if needed. */ - if (!((src_addr | dst_addr | xfer_size) & 7)) { - dwidth = AT_XDMAC_CC_DWIDTH_DWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); - } else if (!((src_addr | dst_addr | xfer_size) & 3)) { - dwidth = AT_XDMAC_CC_DWIDTH_WORD; - dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); - } else if (!((src_addr | dst_addr | xfer_size) & 1)) { - dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); - } else if ((src_addr | dst_addr | xfer_size) & 1) { - dwidth = AT_XDMAC_CC_DWIDTH_BYTE; - dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); - } + dwidth = at_xdmac_align_width(chan, + src_addr | dst_addr | xfer_size); chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); ublen = xfer_size >> dwidth; @@ -884,7 +1088,6 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 | AT_XDMAC_MBR_UBC_NDEN | AT_XDMAC_MBR_UBC_NSEN - | (remaining_size ? AT_XDMAC_MBR_UBC_NDE : 0) | ublen; desc->lld.mbr_cfg = chan_cc; @@ -893,12 +1096,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=0x%08x\n", - __func__, prev, prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -915,6 +1114,93 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, return &first->tx_dma_desc; } +static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan, + struct at_xdmac_chan *atchan, + dma_addr_t dst_addr, + size_t len, + int value) +{ + struct at_xdmac_desc *desc; + unsigned long flags; + size_t ublen; + u32 dwidth; + /* + * WARNING: The channel configuration is set here since there is no + * dmaengine_slave_config call in this case. Moreover we don't know the + * direction, it involves we can't dynamically set the source and dest + * interface so we have to use the same one. Only interface 0 allows EBI + * access. Hopefully we can access DDR through both ports (at least on + * SAMA5D4x), so we can use the same interface for source and dest, + * that solves the fact we don't know the direction. + */ + u32 chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM + | AT_XDMAC_CC_SAM_INCREMENTED_AM + | AT_XDMAC_CC_DIF(0) + | AT_XDMAC_CC_SIF(0) + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_MEMSET_HW_MODE + | AT_XDMAC_CC_TYPE_MEM_TRAN; + + dwidth = at_xdmac_align_width(chan, dst_addr); + + if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { + dev_err(chan2dev(chan), + "%s: Transfer too large, aborting...\n", + __func__); + return NULL; + } + + spin_lock_irqsave(&atchan->lock, flags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + return NULL; + } + + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + ublen = len >> dwidth; + + desc->lld.mbr_da = dst_addr; + desc->lld.mbr_ds = value; + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_da=0x%08x, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc, + desc->lld.mbr_cfg); + + return desc; +} + +struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc; + + dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, dest, len, value, flags); + + if (unlikely(!len)) + return NULL; + + desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value); + list_add_tail(&desc->desc_node, &desc->descs_list); + + desc->tx_dma_desc.cookie = -EBUSY; + desc->tx_dma_desc.flags = flags; + desc->xfer_size = len; + + return &desc->tx_dma_desc; +} + static enum dma_status at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct dma_tx_state *txstate) @@ -1445,7 +1731,9 @@ static int at_xdmac_probe(struct platform_device *pdev) } dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask); + dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask); dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask); dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask); /* * Without DMA_PRIVATE the driver is not able to allocate more than @@ -1458,7 +1746,9 @@ static int at_xdmac_probe(struct platform_device *pdev) atxdmac->dma.device_tx_status = at_xdmac_tx_status; atxdmac->dma.device_issue_pending = at_xdmac_issue_pending; atxdmac->dma.device_prep_dma_cyclic = at_xdmac_prep_dma_cyclic; + atxdmac->dma.device_prep_interleaved_dma = at_xdmac_prep_interleaved; atxdmac->dma.device_prep_dma_memcpy = at_xdmac_prep_dma_memcpy; + atxdmac->dma.device_prep_dma_memset = at_xdmac_prep_dma_memset; atxdmac->dma.device_prep_slave_sg = at_xdmac_prep_slave_sg; atxdmac->dma.device_config = at_xdmac_device_config; atxdmac->dma.device_pause = at_xdmac_device_pause; diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 3ddfd1f6c23c..4a4cce15f25d 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -267,6 +267,13 @@ static void dma_chan_put(struct dma_chan *chan) /* This channel is not in use anymore, free it */ if (!chan->client_count && chan->device->device_free_chan_resources) chan->device->device_free_chan_resources(chan); + + /* If the channel is used via a DMA request router, free the mapping */ + if (chan->router && chan->router->route_free) { + chan->router->route_free(chan->router->dev, chan->route_data); + chan->router = NULL; + chan->route_data = NULL; + } } enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) @@ -536,7 +543,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, } /** - * dma_request_slave_channel - try to get specific channel exclusively + * dma_get_slave_channel - try to get specific channel exclusively * @chan: target channel */ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) @@ -648,7 +655,7 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, EXPORT_SYMBOL_GPL(__dma_request_channel); /** - * dma_request_slave_channel - try to allocate an exclusive slave channel + * dma_request_slave_channel_reason - try to allocate an exclusive slave channel * @dev: pointer to client device structure * @name: slave channel name * @@ -836,6 +843,8 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_pq); BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && !device->device_prep_dma_pq_val); + BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && + !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt); BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) && diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 24e5290faa32..57ff46284f15 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -1364,7 +1364,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) return ret; } -static struct platform_device_id ep93xx_dma_driver_ids[] = { +static const struct platform_device_id ep93xx_dma_driver_ids[] = { { "ep93xx-dma-m2p", 0 }, { "ep93xx-dma-m2m", 1 }, { }, diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index 09e2842d15ec..915eec3cc279 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -881,10 +881,6 @@ static int fsl_edma_probe(struct platform_device *pdev) } - ret = fsl_edma_irq_init(pdev, fsl_edma); - if (ret) - return ret; - fsl_edma->big_endian = of_property_read_bool(np, "big-endian"); INIT_LIST_HEAD(&fsl_edma->dma_dev.channels); @@ -900,6 +896,11 @@ static int fsl_edma_probe(struct platform_device *pdev) fsl_edma_chan_mux(fsl_chan, 0, false); } + edma_writel(fsl_edma, ~0, fsl_edma->membase + EDMA_INTR); + ret = fsl_edma_irq_init(pdev, fsl_edma); + if (ret) + return ret; + dma_cap_set(DMA_PRIVATE, fsl_edma->dma_dev.cap_mask); dma_cap_set(DMA_SLAVE, fsl_edma->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, fsl_edma->dma_dev.cap_mask); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index eed405976ea9..865501fcc67d 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -193,7 +193,7 @@ struct imxdma_filter_data { int request; }; -static struct platform_device_id imx_dma_devtype[] = { +static const struct platform_device_id imx_dma_devtype[] = { { .name = "imx1-dma", .driver_data = IMX1_DMA, diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 62bbd79338e0..77b6aab04f47 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -420,7 +420,7 @@ static struct sdma_driver_data sdma_imx6q = { .script_addrs = &sdma_script_imx6q, }; -static struct platform_device_id sdma_devtypes[] = { +static const struct platform_device_id sdma_devtypes[] = { { .name = "imx25-sdma", .driver_data = (unsigned long)&sdma_imx25, diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 1c56001df676..fbaf1ead2597 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -19,6 +19,7 @@ #include <linux/dma-mapping.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/memory.h> #include <linux/clk.h> @@ -30,6 +31,11 @@ #include "dmaengine.h" #include "mv_xor.h" +enum mv_xor_mode { + XOR_MODE_IN_REG, + XOR_MODE_IN_DESC, +}; + static void mv_xor_issue_pending(struct dma_chan *chan); #define to_mv_xor_chan(chan) \ @@ -56,18 +62,30 @@ static void mv_desc_init(struct mv_xor_desc_slot *desc, hw_desc->byte_count = byte_count; } -static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, - u32 next_desc_addr) +static void mv_desc_set_mode(struct mv_xor_desc_slot *desc) { struct mv_xor_desc *hw_desc = desc->hw_desc; - BUG_ON(hw_desc->phy_next_desc); - hw_desc->phy_next_desc = next_desc_addr; + + switch (desc->type) { + case DMA_XOR: + case DMA_INTERRUPT: + hw_desc->desc_command |= XOR_DESC_OPERATION_XOR; + break; + case DMA_MEMCPY: + hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY; + break; + default: + BUG(); + return; + } } -static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc) +static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, + u32 next_desc_addr) { struct mv_xor_desc *hw_desc = desc->hw_desc; - hw_desc->phy_next_desc = 0; + BUG_ON(hw_desc->phy_next_desc); + hw_desc->phy_next_desc = next_desc_addr; } static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc, @@ -104,7 +122,7 @@ static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan) return intr_cause; } -static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) +static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan) { u32 val; @@ -114,14 +132,14 @@ static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) writel_relaxed(val, XOR_INTR_CAUSE(chan)); } -static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan) +static void mv_chan_clear_err_status(struct mv_xor_chan *chan) { u32 val = 0xFFFF0000 >> (chan->idx * 16); writel_relaxed(val, XOR_INTR_CAUSE(chan)); } -static void mv_set_mode(struct mv_xor_chan *chan, - enum dma_transaction_type type) +static void mv_chan_set_mode(struct mv_xor_chan *chan, + enum dma_transaction_type type) { u32 op_mode; u32 config = readl_relaxed(XOR_CONFIG(chan)); @@ -144,6 +162,25 @@ static void mv_set_mode(struct mv_xor_chan *chan, config &= ~0x7; config |= op_mode; + if (IS_ENABLED(__BIG_ENDIAN)) + config |= XOR_DESCRIPTOR_SWAP; + else + config &= ~XOR_DESCRIPTOR_SWAP; + + writel_relaxed(config, XOR_CONFIG(chan)); + chan->current_type = type; +} + +static void mv_chan_set_mode_to_desc(struct mv_xor_chan *chan) +{ + u32 op_mode; + u32 config = readl_relaxed(XOR_CONFIG(chan)); + + op_mode = XOR_OPERATION_MODE_IN_DESC; + + config &= ~0x7; + config |= op_mode; + #if defined(__BIG_ENDIAN) config |= XOR_DESCRIPTOR_SWAP; #else @@ -151,7 +188,6 @@ static void mv_set_mode(struct mv_xor_chan *chan, #endif writel_relaxed(config, XOR_CONFIG(chan)); - chan->current_type = type; } static void mv_chan_activate(struct mv_xor_chan *chan) @@ -171,28 +207,13 @@ static char mv_chan_is_busy(struct mv_xor_chan *chan) return (state == 1) ? 1 : 0; } -/** - * mv_xor_free_slots - flags descriptor slots for reuse - * @slot: Slot to free - * Caller must hold &mv_chan->lock while calling this function - */ -static void mv_xor_free_slots(struct mv_xor_chan *mv_chan, - struct mv_xor_desc_slot *slot) -{ - dev_dbg(mv_chan_to_devp(mv_chan), "%s %d slot %p\n", - __func__, __LINE__, slot); - - slot->slot_used = 0; - -} - /* - * mv_xor_start_new_chain - program the engine to operate on new chain headed by - * sw_desc + * mv_chan_start_new_chain - program the engine to operate on new + * chain headed by sw_desc * Caller must hold &mv_chan->lock while calling this function */ -static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, - struct mv_xor_desc_slot *sw_desc) +static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *sw_desc) { dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n", __func__, __LINE__, sw_desc); @@ -205,8 +226,9 @@ static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, } static dma_cookie_t -mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, - struct mv_xor_chan *mv_chan, dma_cookie_t cookie) +mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan, + dma_cookie_t cookie) { BUG_ON(desc->async_tx.cookie < 0); @@ -230,93 +252,110 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, } static int -mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan) +mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan) { struct mv_xor_desc_slot *iter, *_iter; dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, - completed_node) { + node) { - if (async_tx_test_ack(&iter->async_tx)) { - list_del(&iter->completed_node); - mv_xor_free_slots(mv_chan, iter); - } + if (async_tx_test_ack(&iter->async_tx)) + list_move_tail(&iter->node, &mv_chan->free_slots); } return 0; } static int -mv_xor_clean_slot(struct mv_xor_desc_slot *desc, - struct mv_xor_chan *mv_chan) +mv_desc_clean_slot(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan) { dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n", __func__, __LINE__, desc, desc->async_tx.flags); - list_del(&desc->chain_node); + /* the client is allowed to attach dependent operations * until 'ack' is set */ - if (!async_tx_test_ack(&desc->async_tx)) { + if (!async_tx_test_ack(&desc->async_tx)) /* move this slot to the completed_slots */ - list_add_tail(&desc->completed_node, &mv_chan->completed_slots); - return 0; - } + list_move_tail(&desc->node, &mv_chan->completed_slots); + else + list_move_tail(&desc->node, &mv_chan->free_slots); - mv_xor_free_slots(mv_chan, desc); return 0; } /* This function must be called with the mv_xor_chan spinlock held */ -static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan) { struct mv_xor_desc_slot *iter, *_iter; dma_cookie_t cookie = 0; int busy = mv_chan_is_busy(mv_chan); u32 current_desc = mv_chan_get_current_desc(mv_chan); - int seen_current = 0; + int current_cleaned = 0; + struct mv_xor_desc *hw_desc; dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc); - mv_xor_clean_completed_slots(mv_chan); + mv_chan_clean_completed_slots(mv_chan); /* free completed slots from the chain starting with * the oldest descriptor */ list_for_each_entry_safe(iter, _iter, &mv_chan->chain, - chain_node) { - prefetch(_iter); - prefetch(&_iter->async_tx); + node) { - /* do not advance past the current descriptor loaded into the - * hardware channel, subsequent descriptors are either in - * process or have not been submitted - */ - if (seen_current) - break; + /* clean finished descriptors */ + hw_desc = iter->hw_desc; + if (hw_desc->status & XOR_DESC_SUCCESS) { + cookie = mv_desc_run_tx_complete_actions(iter, mv_chan, + cookie); - /* stop the search if we reach the current descriptor and the - * channel is busy - */ - if (iter->async_tx.phys == current_desc) { - seen_current = 1; - if (busy) + /* done processing desc, clean slot */ + mv_desc_clean_slot(iter, mv_chan); + + /* break if we did cleaned the current */ + if (iter->async_tx.phys == current_desc) { + current_cleaned = 1; break; + } + } else { + if (iter->async_tx.phys == current_desc) { + current_cleaned = 0; + break; + } } - - cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie); - - if (mv_xor_clean_slot(iter, mv_chan)) - break; } if ((busy == 0) && !list_empty(&mv_chan->chain)) { - struct mv_xor_desc_slot *chain_head; - chain_head = list_entry(mv_chan->chain.next, - struct mv_xor_desc_slot, - chain_node); - - mv_xor_start_new_chain(mv_chan, chain_head); + if (current_cleaned) { + /* + * current descriptor cleaned and removed, run + * from list head + */ + iter = list_entry(mv_chan->chain.next, + struct mv_xor_desc_slot, + node); + mv_chan_start_new_chain(mv_chan, iter); + } else { + if (!list_is_last(&iter->node, &mv_chan->chain)) { + /* + * descriptors are still waiting after + * current, trigger them + */ + iter = list_entry(iter->node.next, + struct mv_xor_desc_slot, + node); + mv_chan_start_new_chain(mv_chan, iter); + } else { + /* + * some descriptors are still waiting + * to be cleaned + */ + tasklet_schedule(&mv_chan->irq_tasklet); + } + } } if (cookie > 0) @@ -328,56 +367,35 @@ static void mv_xor_tasklet(unsigned long data) struct mv_xor_chan *chan = (struct mv_xor_chan *) data; spin_lock_bh(&chan->lock); - mv_xor_slot_cleanup(chan); + mv_chan_slot_cleanup(chan); spin_unlock_bh(&chan->lock); } static struct mv_xor_desc_slot * -mv_xor_alloc_slot(struct mv_xor_chan *mv_chan) +mv_chan_alloc_slot(struct mv_xor_chan *mv_chan) { - struct mv_xor_desc_slot *iter, *_iter; - int retry = 0; + struct mv_xor_desc_slot *iter; - /* start search from the last allocated descrtiptor - * if a contiguous allocation can not be found start searching - * from the beginning of the list - */ -retry: - if (retry == 0) - iter = mv_chan->last_used; - else - iter = list_entry(&mv_chan->all_slots, - struct mv_xor_desc_slot, - slot_node); - - list_for_each_entry_safe_continue( - iter, _iter, &mv_chan->all_slots, slot_node) { - - prefetch(_iter); - prefetch(&_iter->async_tx); - if (iter->slot_used) { - /* give up after finding the first busy slot - * on the second pass through the list - */ - if (retry) - break; - continue; - } + spin_lock_bh(&mv_chan->lock); + + if (!list_empty(&mv_chan->free_slots)) { + iter = list_first_entry(&mv_chan->free_slots, + struct mv_xor_desc_slot, + node); + + list_move_tail(&iter->node, &mv_chan->allocated_slots); + + spin_unlock_bh(&mv_chan->lock); /* pre-ack descriptor */ async_tx_ack(&iter->async_tx); - - iter->slot_used = 1; - INIT_LIST_HEAD(&iter->chain_node); iter->async_tx.cookie = -EBUSY; - mv_chan->last_used = iter; - mv_desc_clear_next_desc(iter); return iter; } - if (!retry++) - goto retry; + + spin_unlock_bh(&mv_chan->lock); /* try to free some slots if the allocation fails */ tasklet_schedule(&mv_chan->irq_tasklet); @@ -403,14 +421,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); if (list_empty(&mv_chan->chain)) - list_add_tail(&sw_desc->chain_node, &mv_chan->chain); + list_move_tail(&sw_desc->node, &mv_chan->chain); else { new_hw_chain = 0; old_chain_tail = list_entry(mv_chan->chain.prev, struct mv_xor_desc_slot, - chain_node); - list_add_tail(&sw_desc->chain_node, &mv_chan->chain); + node); + list_move_tail(&sw_desc->node, &mv_chan->chain); dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n", &old_chain_tail->async_tx.phys); @@ -431,7 +449,7 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) } if (new_hw_chain) - mv_xor_start_new_chain(mv_chan, sw_desc); + mv_chan_start_new_chain(mv_chan, sw_desc); spin_unlock_bh(&mv_chan->lock); @@ -463,26 +481,20 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&slot->async_tx, chan); slot->async_tx.tx_submit = mv_xor_tx_submit; - INIT_LIST_HEAD(&slot->chain_node); - INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->node); dma_desc = mv_chan->dma_desc_pool; slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE; slot->idx = idx++; spin_lock_bh(&mv_chan->lock); mv_chan->slots_allocated = idx; - list_add_tail(&slot->slot_node, &mv_chan->all_slots); + list_add_tail(&slot->node, &mv_chan->free_slots); spin_unlock_bh(&mv_chan->lock); } - if (mv_chan->slots_allocated && !mv_chan->last_used) - mv_chan->last_used = list_entry(mv_chan->all_slots.next, - struct mv_xor_desc_slot, - slot_node); - dev_dbg(mv_chan_to_devp(mv_chan), - "allocated %d descriptor slots last_used: %p\n", - mv_chan->slots_allocated, mv_chan->last_used); + "allocated %d descriptor slots\n", + mv_chan->slots_allocated); return mv_chan->slots_allocated ? : -ENOMEM; } @@ -503,16 +515,17 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, "%s src_cnt: %d len: %u dest %pad flags: %ld\n", __func__, src_cnt, len, &dest, flags); - spin_lock_bh(&mv_chan->lock); - sw_desc = mv_xor_alloc_slot(mv_chan); + sw_desc = mv_chan_alloc_slot(mv_chan); if (sw_desc) { sw_desc->type = DMA_XOR; sw_desc->async_tx.flags = flags; mv_desc_init(sw_desc, dest, len, flags); + if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) + mv_desc_set_mode(sw_desc); while (src_cnt--) mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]); } - spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan_to_devp(mv_chan), "%s sw_desc %p async_tx %p \n", __func__, sw_desc, &sw_desc->async_tx); @@ -556,25 +569,29 @@ static void mv_xor_free_chan_resources(struct dma_chan *chan) spin_lock_bh(&mv_chan->lock); - mv_xor_slot_cleanup(mv_chan); + mv_chan_slot_cleanup(mv_chan); list_for_each_entry_safe(iter, _iter, &mv_chan->chain, - chain_node) { + node) { in_use_descs++; - list_del(&iter->chain_node); + list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, - completed_node) { + node) { in_use_descs++; - list_del(&iter->completed_node); + list_move_tail(&iter->node, &mv_chan->free_slots); + } + list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots, + node) { + in_use_descs++; + list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe_reverse( - iter, _iter, &mv_chan->all_slots, slot_node) { - list_del(&iter->slot_node); + iter, _iter, &mv_chan->free_slots, node) { + list_del(&iter->node); kfree(iter); mv_chan->slots_allocated--; } - mv_chan->last_used = NULL; dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n", __func__, mv_chan->slots_allocated); @@ -603,13 +620,13 @@ static enum dma_status mv_xor_status(struct dma_chan *chan, return ret; spin_lock_bh(&mv_chan->lock); - mv_xor_slot_cleanup(mv_chan); + mv_chan_slot_cleanup(mv_chan); spin_unlock_bh(&mv_chan->lock); return dma_cookie_status(chan, cookie, txstate); } -static void mv_dump_xor_regs(struct mv_xor_chan *chan) +static void mv_chan_dump_regs(struct mv_xor_chan *chan) { u32 val; @@ -632,8 +649,8 @@ static void mv_dump_xor_regs(struct mv_xor_chan *chan) dev_err(mv_chan_to_devp(chan), "error addr 0x%08x\n", val); } -static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, - u32 intr_cause) +static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan, + u32 intr_cause) { if (intr_cause & XOR_INT_ERR_DECODE) { dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n"); @@ -643,7 +660,7 @@ static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n", chan->idx, intr_cause); - mv_dump_xor_regs(chan); + mv_chan_dump_regs(chan); WARN_ON(1); } @@ -655,11 +672,11 @@ static irqreturn_t mv_xor_interrupt_handler(int irq, void *data) dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause); if (intr_cause & XOR_INTR_ERRORS) - mv_xor_err_interrupt_handler(chan, intr_cause); + mv_chan_err_interrupt_handler(chan, intr_cause); tasklet_schedule(&chan->irq_tasklet); - mv_xor_device_clear_eoc_cause(chan); + mv_chan_clear_eoc_cause(chan); return IRQ_HANDLED; } @@ -678,7 +695,7 @@ static void mv_xor_issue_pending(struct dma_chan *chan) * Perform a transaction to verify the HW works. */ -static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan) +static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) { int i, ret; void *src, *dest; @@ -787,7 +804,7 @@ out: #define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */ static int -mv_xor_xor_self_test(struct mv_xor_chan *mv_chan) +mv_chan_xor_self_test(struct mv_xor_chan *mv_chan) { int i, src_idx, ret; struct page *dest; @@ -951,7 +968,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, - int idx, dma_cap_mask_t cap_mask, int irq) + int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc) { int ret = 0; struct mv_xor_chan *mv_chan; @@ -963,6 +980,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->idx = idx; mv_chan->irq = irq; + mv_chan->op_in_desc = op_in_desc; dma_dev = &mv_chan->dmadev; @@ -1014,7 +1032,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan); /* clear errors before enabling interrupts */ - mv_xor_device_clear_err_status(mv_chan); + mv_chan_clear_err_status(mv_chan); ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler, 0, dev_name(&pdev->dev), mv_chan); @@ -1023,32 +1041,37 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan_unmask_interrupts(mv_chan); - mv_set_mode(mv_chan, DMA_XOR); + if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) + mv_chan_set_mode_to_desc(mv_chan); + else + mv_chan_set_mode(mv_chan, DMA_XOR); spin_lock_init(&mv_chan->lock); INIT_LIST_HEAD(&mv_chan->chain); INIT_LIST_HEAD(&mv_chan->completed_slots); - INIT_LIST_HEAD(&mv_chan->all_slots); + INIT_LIST_HEAD(&mv_chan->free_slots); + INIT_LIST_HEAD(&mv_chan->allocated_slots); mv_chan->dmachan.device = dma_dev; dma_cookie_init(&mv_chan->dmachan); list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels); if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { - ret = mv_xor_memcpy_self_test(mv_chan); + ret = mv_chan_memcpy_self_test(mv_chan); dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); if (ret) goto err_free_irq; } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { - ret = mv_xor_xor_self_test(mv_chan); + ret = mv_chan_xor_self_test(mv_chan); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) goto err_free_irq; } - dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s)\n", + dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n", + mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); @@ -1097,6 +1120,13 @@ mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, writel(0, base + WINDOW_OVERRIDE_CTRL(1)); } +static const struct of_device_id mv_xor_dt_ids[] = { + { .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG }, + { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC }, + {}, +}; +MODULE_DEVICE_TABLE(of, mv_xor_dt_ids); + static int mv_xor_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram; @@ -1104,6 +1134,7 @@ static int mv_xor_probe(struct platform_device *pdev) struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev); struct resource *res; int i, ret; + int op_in_desc; dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); @@ -1148,11 +1179,15 @@ static int mv_xor_probe(struct platform_device *pdev) if (pdev->dev.of_node) { struct device_node *np; int i = 0; + const struct of_device_id *of_id = + of_match_device(mv_xor_dt_ids, + &pdev->dev); for_each_child_of_node(pdev->dev.of_node, np) { struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; + op_in_desc = (int)of_id->data; dma_cap_zero(cap_mask); if (of_property_read_bool(np, "dmacap,memcpy")) @@ -1169,7 +1204,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cap_mask, irq); + cap_mask, irq, op_in_desc); if (IS_ERR(chan)) { ret = PTR_ERR(chan); irq_dispose_mapping(irq); @@ -1198,7 +1233,8 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cd->cap_mask, irq); + cd->cap_mask, irq, + XOR_MODE_IN_REG); if (IS_ERR(chan)) { ret = PTR_ERR(chan); goto err_channel_add; @@ -1244,14 +1280,6 @@ static int mv_xor_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_OF -static const struct of_device_id mv_xor_dt_ids[] = { - { .compatible = "marvell,orion-xor", }, - {}, -}; -MODULE_DEVICE_TABLE(of, mv_xor_dt_ids); -#endif - static struct platform_driver mv_xor_driver = { .probe = mv_xor_probe, .remove = mv_xor_remove, diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 91958dba39a2..b7455b42137b 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -19,7 +19,7 @@ #include <linux/dmaengine.h> #include <linux/interrupt.h> -#define MV_XOR_POOL_SIZE PAGE_SIZE +#define MV_XOR_POOL_SIZE (MV_XOR_SLOT_SIZE * 3072) #define MV_XOR_SLOT_SIZE 64 #define MV_XOR_THRESHOLD 1 #define MV_XOR_MAX_CHANNELS 2 @@ -30,7 +30,13 @@ /* Values for the XOR_CONFIG register */ #define XOR_OPERATION_MODE_XOR 0 #define XOR_OPERATION_MODE_MEMCPY 2 +#define XOR_OPERATION_MODE_IN_DESC 7 #define XOR_DESCRIPTOR_SWAP BIT(14) +#define XOR_DESC_SUCCESS 0x40000000 + +#define XOR_DESC_OPERATION_XOR (0 << 24) +#define XOR_DESC_OPERATION_CRC32C (1 << 24) +#define XOR_DESC_OPERATION_MEMCPY (2 << 24) #define XOR_DESC_DMA_OWNED BIT(31) #define XOR_DESC_EOD_INT_EN BIT(31) @@ -88,13 +94,14 @@ struct mv_xor_device { * @mmr_base: memory mapped register base * @idx: the index of the xor channel * @chain: device chain view of the descriptors + * @free_slots: free slots usable by the channel + * @allocated_slots: slots allocated by the driver * @completed_slots: slots completed by HW but still need to be acked * @device: parent device * @common: common dmaengine channel object members - * @last_used: place holder for allocation to continue from where it left off - * @all_slots: complete domain of slots usable by the channel * @slots_allocated: records the actual size of the descriptor slot pool * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs + * @op_in_desc: new mode of driver, each op is writen to descriptor. */ struct mv_xor_chan { int pending; @@ -105,16 +112,17 @@ struct mv_xor_chan { int irq; enum dma_transaction_type current_type; struct list_head chain; + struct list_head free_slots; + struct list_head allocated_slots; struct list_head completed_slots; dma_addr_t dma_desc_pool; void *dma_desc_pool_virt; size_t pool_size; struct dma_device dmadev; struct dma_chan dmachan; - struct mv_xor_desc_slot *last_used; - struct list_head all_slots; int slots_allocated; struct tasklet_struct irq_tasklet; + int op_in_desc; char dummy_src[MV_XOR_MIN_BYTE_COUNT]; char dummy_dst[MV_XOR_MIN_BYTE_COUNT]; dma_addr_t dummy_src_addr, dummy_dst_addr; @@ -122,9 +130,7 @@ struct mv_xor_chan { /** * struct mv_xor_desc_slot - software descriptor - * @slot_node: node on the mv_xor_chan.all_slots list - * @chain_node: node on the mv_xor_chan.chain list - * @completed_node: node on the mv_xor_chan.completed_slots list + * @node: node on the mv_xor_chan lists * @hw_desc: virtual address of the hardware descriptor chain * @phys: hardware address of the hardware descriptor chain * @slot_used: slot in use or not @@ -133,12 +139,9 @@ struct mv_xor_chan { * @async_tx: support for the async_tx api */ struct mv_xor_desc_slot { - struct list_head slot_node; - struct list_head chain_node; - struct list_head completed_node; + struct list_head node; enum dma_transaction_type type; void *hw_desc; - u16 slot_used; u16 idx; struct dma_async_tx_descriptor async_tx; }; diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 829ec686dac3..60de35251da5 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -170,7 +170,7 @@ static struct mxs_dma_type mxs_dma_types[] = { } }; -static struct platform_device_id mxs_dma_ids[] = { +static const struct platform_device_id mxs_dma_ids[] = { { .name = "imx23-dma-apbh", .driver_data = (kernel_ulong_t) &mxs_dma_types[0], diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c index 88b77c98365d..2b5a198ac77e 100644 --- a/drivers/dma/nbpfaxi.c +++ b/drivers/dma/nbpfaxi.c @@ -1455,7 +1455,7 @@ static int nbpf_remove(struct platform_device *pdev) return 0; } -static struct platform_device_id nbpf_ids[] = { +static const struct platform_device_id nbpf_ids[] = { {"nbpfaxi64dmac1b4", (kernel_ulong_t)&nbpf_cfg[NBPF1B4]}, {"nbpfaxi64dmac1b8", (kernel_ulong_t)&nbpf_cfg[NBPF1B8]}, {"nbpfaxi64dmac1b16", (kernel_ulong_t)&nbpf_cfg[NBPF1B16]}, diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index cbd4a8aff120..1e1f2986eba8 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -45,6 +45,50 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec) } /** + * of_dma_router_xlate - translation function for router devices + * @dma_spec: pointer to DMA specifier as found in the device tree + * @of_dma: pointer to DMA controller data (router information) + * + * The function creates new dma_spec to be passed to the router driver's + * of_dma_route_allocate() function to prepare a dma_spec which will be used + * to request channel from the real DMA controller. + */ +static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_chan *chan; + struct of_dma *ofdma_target; + struct of_phandle_args dma_spec_target; + void *route_data; + + /* translate the request for the real DMA controller */ + memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target)); + route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma); + if (IS_ERR(route_data)) + return NULL; + + ofdma_target = of_dma_find_controller(&dma_spec_target); + if (!ofdma_target) + return NULL; + + chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target); + if (chan) { + chan->router = ofdma->dma_router; + chan->route_data = route_data; + } else { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + } + + /* + * Need to put the node back since the ofdma->of_dma_route_allocate + * has taken it for generating the new, translated dma_spec + */ + of_node_put(dma_spec_target.np); + return chan; +} + +/** * of_dma_controller_register - Register a DMA controller to DT DMA helpers * @np: device node of DMA controller * @of_dma_xlate: translation function which converts a phandle @@ -110,6 +154,51 @@ void of_dma_controller_free(struct device_node *np) EXPORT_SYMBOL_GPL(of_dma_controller_free); /** + * of_dma_router_register - Register a DMA router to DT DMA helpers as a + * controller + * @np: device node of DMA router + * @of_dma_route_allocate: setup function for the router which need to + * modify the dma_spec for the DMA controller to + * use and to set up the requested route. + * @dma_router: pointer to dma_router structure to be used when + * the route need to be free up. + * + * Returns 0 on success or appropriate errno value on error. + * + * Allocated memory should be freed with appropriate of_dma_controller_free() + * call. + */ +int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router) +{ + struct of_dma *ofdma; + + if (!np || !of_dma_route_allocate || !dma_router) { + pr_err("%s: not enough information provided\n", __func__); + return -EINVAL; + } + + ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL); + if (!ofdma) + return -ENOMEM; + + ofdma->of_node = np; + ofdma->of_dma_xlate = of_dma_router_xlate; + ofdma->of_dma_route_allocate = of_dma_route_allocate; + ofdma->dma_router = dma_router; + + /* Now queue of_dma controller structure in list */ + mutex_lock(&of_dma_lock); + list_add_tail(&ofdma->of_dma_controllers, &of_dma_list); + mutex_unlock(&of_dma_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(of_dma_router_register); + +/** * of_dma_match_channel - Check if a DMA specifier matches name * @np: device node to look for DMA channels * @name: channel name to be matched diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 167dbaf65742..249445c8a4c6 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -22,6 +22,9 @@ #include "virt-dma.h" +#define OMAP_SDMA_REQUESTS 127 +#define OMAP_SDMA_CHANNELS 32 + struct omap_dmadev { struct dma_device ddev; spinlock_t lock; @@ -31,9 +34,10 @@ struct omap_dmadev { const struct omap_dma_reg *reg_map; struct omap_system_dma_plat_info *plat; bool legacy; + unsigned dma_requests; spinlock_t irq_lock; uint32_t irq_enable_mask; - struct omap_chan *lch_map[32]; + struct omap_chan *lch_map[OMAP_SDMA_CHANNELS]; }; struct omap_chan { @@ -362,7 +366,7 @@ static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d, struct omap_sg *sg = d->sg + idx; unsigned cxsa, cxei, cxfi; - if (d->dir == DMA_DEV_TO_MEM) { + if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) { cxsa = CDSA; cxei = CDEI; cxfi = CDFI; @@ -408,7 +412,7 @@ static void omap_dma_start_desc(struct omap_chan *c) if (dma_omap1()) omap_dma_chan_write(c, CCR2, d->ccr >> 16); - if (d->dir == DMA_DEV_TO_MEM) { + if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) { cxsa = CSSA; cxei = CSEI; cxfi = CSFI; @@ -589,6 +593,7 @@ static void omap_dma_free_chan_resources(struct dma_chan *chan) omap_free_dma(c->dma_ch); dev_dbg(od->ddev.dev, "freeing channel for %u\n", c->dma_sig); + c->dma_sig = 0; } static size_t omap_dma_sg_size(struct omap_sg *sg) @@ -948,6 +953,51 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic( return vchan_tx_prep(&c->vc, &d->vd, flags); } +static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long tx_flags) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + struct omap_desc *d; + uint8_t data_type; + + d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC); + if (!d) + return NULL; + + data_type = __ffs((src | dest | len)); + if (data_type > CSDP_DATA_TYPE_32) + data_type = CSDP_DATA_TYPE_32; + + d->dir = DMA_MEM_TO_MEM; + d->dev_addr = src; + d->fi = 0; + d->es = data_type; + d->sg[0].en = len / BIT(data_type); + d->sg[0].fn = 1; + d->sg[0].addr = dest; + d->sglen = 1; + d->ccr = c->ccr; + d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC; + + d->cicr = CICR_DROP_IE; + if (tx_flags & DMA_PREP_INTERRUPT) + d->cicr |= CICR_FRAME_IE; + + d->csdp = data_type; + + if (dma_omap1()) { + d->cicr |= CICR_TOUT_IE; + d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF; + } else { + d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED; + d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE; + d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64; + } + + return vchan_tx_prep(&c->vc, &d->vd, tx_flags); +} + static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg) { struct omap_chan *c = to_omap_dma_chan(chan); @@ -1037,7 +1087,7 @@ static int omap_dma_resume(struct dma_chan *chan) return 0; } -static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig) +static int omap_dma_chan_init(struct omap_dmadev *od) { struct omap_chan *c; @@ -1046,7 +1096,6 @@ static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig) return -ENOMEM; c->reg_map = od->reg_map; - c->dma_sig = dma_sig; c->vc.desc_free = omap_dma_desc_free; vchan_init(&c->vc, &od->ddev); INIT_LIST_HEAD(&c->node); @@ -1094,12 +1143,14 @@ static int omap_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = omap_dma_free_chan_resources; od->ddev.device_tx_status = omap_dma_tx_status; od->ddev.device_issue_pending = omap_dma_issue_pending; od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg; od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic; + od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy; od->ddev.device_config = omap_dma_slave_config; od->ddev.device_pause = omap_dma_pause; od->ddev.device_resume = omap_dma_resume; @@ -1116,8 +1167,17 @@ static int omap_dma_probe(struct platform_device *pdev) tasklet_init(&od->task, omap_dma_sched, (unsigned long)od); - for (i = 0; i < 127; i++) { - rc = omap_dma_chan_init(od, i); + od->dma_requests = OMAP_SDMA_REQUESTS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &od->dma_requests)) { + dev_info(&pdev->dev, + "Missing dma-requests property, using %u.\n", + OMAP_SDMA_REQUESTS); + } + + for (i = 0; i < OMAP_SDMA_CHANNELS; i++) { + rc = omap_dma_chan_init(od); if (rc) { omap_dma_free(od); return rc; @@ -1208,10 +1268,14 @@ static struct platform_driver omap_dma_driver = { bool omap_dma_filter_fn(struct dma_chan *chan, void *param) { if (chan->device->dev->driver == &omap_dma_driver.driver) { + struct omap_dmadev *od = to_omap_dma_dev(chan->device); struct omap_chan *c = to_omap_dma_chan(chan); unsigned req = *(unsigned *)param; - return req == c->dma_sig; + if (req <= od->dma_requests) { + c->dma_sig = req; + return true; + } } return false; } diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 340f9e607cd8..f513f77b1d85 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1424,8 +1424,8 @@ static int pl330_submit_req(struct pl330_thread *thrd, goto xfer_exit; if (ret > pl330->mcbufsz / 2) { - dev_info(pl330->ddma.dev, "%s:%d Trying increasing mcbufsz\n", - __func__, __LINE__); + dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n", + __func__, __LINE__, ret, pl330->mcbufsz / 2); ret = -ENOMEM; goto xfer_exit; } @@ -2584,12 +2584,14 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, { struct dma_pl330_desc *desc; struct dma_pl330_chan *pch = to_pchan(chan); - struct pl330_dmac *pl330 = pch->dmac; + struct pl330_dmac *pl330; int burst; if (unlikely(!pch || !len)) return NULL; + pl330 = pch->dmac; + desc = __pl330_prep_dma_memcpy(pch, dst, src, len); if (!desc) return NULL; diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c new file mode 100644 index 000000000000..ddcbbf5cd9e9 --- /dev/null +++ b/drivers/dma/pxa_dma.c @@ -0,0 +1,1467 @@ +/* + * Copyright 2015 Robert Jarzmik <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/dmaengine.h> +#include <linux/platform_device.h> +#include <linux/device.h> +#include <linux/platform_data/mmp_dma.h> +#include <linux/dmapool.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/of.h> +#include <linux/dma/pxa-dma.h> + +#include "dmaengine.h" +#include "virt-dma.h" + +#define DCSR(n) (0x0000 + ((n) << 2)) +#define DALGN(n) 0x00a0 +#define DINT 0x00f0 +#define DDADR(n) (0x0200 + ((n) << 4)) +#define DSADR(n) (0x0204 + ((n) << 4)) +#define DTADR(n) (0x0208 + ((n) << 4)) +#define DCMD(n) (0x020c + ((n) << 4)) + +#define PXA_DCSR_RUN BIT(31) /* Run Bit (read / write) */ +#define PXA_DCSR_NODESC BIT(30) /* No-Descriptor Fetch (read / write) */ +#define PXA_DCSR_STOPIRQEN BIT(29) /* Stop Interrupt Enable (R/W) */ +#define PXA_DCSR_REQPEND BIT(8) /* Request Pending (read-only) */ +#define PXA_DCSR_STOPSTATE BIT(3) /* Stop State (read-only) */ +#define PXA_DCSR_ENDINTR BIT(2) /* End Interrupt (read / write) */ +#define PXA_DCSR_STARTINTR BIT(1) /* Start Interrupt (read / write) */ +#define PXA_DCSR_BUSERR BIT(0) /* Bus Error Interrupt (read / write) */ + +#define PXA_DCSR_EORIRQEN BIT(28) /* End of Receive IRQ Enable (R/W) */ +#define PXA_DCSR_EORJMPEN BIT(27) /* Jump to next descriptor on EOR */ +#define PXA_DCSR_EORSTOPEN BIT(26) /* STOP on an EOR */ +#define PXA_DCSR_SETCMPST BIT(25) /* Set Descriptor Compare Status */ +#define PXA_DCSR_CLRCMPST BIT(24) /* Clear Descriptor Compare Status */ +#define PXA_DCSR_CMPST BIT(10) /* The Descriptor Compare Status */ +#define PXA_DCSR_EORINTR BIT(9) /* The end of Receive */ + +#define DRCMR_MAPVLD BIT(7) /* Map Valid (read / write) */ +#define DRCMR_CHLNUM 0x1f /* mask for Channel Number (read / write) */ + +#define DDADR_DESCADDR 0xfffffff0 /* Address of next descriptor (mask) */ +#define DDADR_STOP BIT(0) /* Stop (read / write) */ + +#define PXA_DCMD_INCSRCADDR BIT(31) /* Source Address Increment Setting. */ +#define PXA_DCMD_INCTRGADDR BIT(30) /* Target Address Increment Setting. */ +#define PXA_DCMD_FLOWSRC BIT(29) /* Flow Control by the source. */ +#define PXA_DCMD_FLOWTRG BIT(28) /* Flow Control by the target. */ +#define PXA_DCMD_STARTIRQEN BIT(22) /* Start Interrupt Enable */ +#define PXA_DCMD_ENDIRQEN BIT(21) /* End Interrupt Enable */ +#define PXA_DCMD_ENDIAN BIT(18) /* Device Endian-ness. */ +#define PXA_DCMD_BURST8 (1 << 16) /* 8 byte burst */ +#define PXA_DCMD_BURST16 (2 << 16) /* 16 byte burst */ +#define PXA_DCMD_BURST32 (3 << 16) /* 32 byte burst */ +#define PXA_DCMD_WIDTH1 (1 << 14) /* 1 byte width */ +#define PXA_DCMD_WIDTH2 (2 << 14) /* 2 byte width (HalfWord) */ +#define PXA_DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ +#define PXA_DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ + +#define PDMA_ALIGNMENT 3 +#define PDMA_MAX_DESC_BYTES (PXA_DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1)) + +struct pxad_desc_hw { + u32 ddadr; /* Points to the next descriptor + flags */ + u32 dsadr; /* DSADR value for the current transfer */ + u32 dtadr; /* DTADR value for the current transfer */ + u32 dcmd; /* DCMD value for the current transfer */ +} __aligned(16); + +struct pxad_desc_sw { + struct virt_dma_desc vd; /* Virtual descriptor */ + int nb_desc; /* Number of hw. descriptors */ + size_t len; /* Number of bytes xfered */ + dma_addr_t first; /* First descriptor's addr */ + + /* At least one descriptor has an src/dst address not multiple of 8 */ + bool misaligned; + bool cyclic; + struct dma_pool *desc_pool; /* Channel's used allocator */ + + struct pxad_desc_hw *hw_desc[]; /* DMA coherent descriptors */ +}; + +struct pxad_phy { + int idx; + void __iomem *base; + struct pxad_chan *vchan; +}; + +struct pxad_chan { + struct virt_dma_chan vc; /* Virtual channel */ + u32 drcmr; /* Requestor of the channel */ + enum pxad_chan_prio prio; /* Required priority of phy */ + /* + * At least one desc_sw in submitted or issued transfers on this channel + * has one address such as: addr % 8 != 0. This implies the DALGN + * setting on the phy. + */ + bool misaligned; + struct dma_slave_config cfg; /* Runtime config */ + + /* protected by vc->lock */ + struct pxad_phy *phy; + struct dma_pool *desc_pool; /* Descriptors pool */ +}; + +struct pxad_device { + struct dma_device slave; + int nr_chans; + void __iomem *base; + struct pxad_phy *phys; + spinlock_t phy_lock; /* Phy association */ +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_root; + struct dentry *dbgfs_state; + struct dentry **dbgfs_chan; +#endif +}; + +#define tx_to_pxad_desc(tx) \ + container_of(tx, struct pxad_desc_sw, async_tx) +#define to_pxad_chan(dchan) \ + container_of(dchan, struct pxad_chan, vc.chan) +#define to_pxad_dev(dmadev) \ + container_of(dmadev, struct pxad_device, slave) +#define to_pxad_sw_desc(_vd) \ + container_of((_vd), struct pxad_desc_sw, vd) + +#define _phy_readl_relaxed(phy, _reg) \ + readl_relaxed((phy)->base + _reg((phy)->idx)) +#define phy_readl_relaxed(phy, _reg) \ + ({ \ + u32 _v; \ + _v = readl_relaxed((phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): readl(%s): 0x%08x\n", __func__, #_reg, \ + _v); \ + _v; \ + }) +#define phy_writel(phy, val, _reg) \ + do { \ + writel((val), (phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): writel(0x%08x, %s)\n", \ + __func__, (u32)(val), #_reg); \ + } while (0) +#define phy_writel_relaxed(phy, val, _reg) \ + do { \ + writel_relaxed((val), (phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): writel_relaxed(0x%08x, %s)\n", \ + __func__, (u32)(val), #_reg); \ + } while (0) + +static unsigned int pxad_drcmr(unsigned int line) +{ + if (line < 64) + return 0x100 + line * 4; + return 0x1000 + line * 4; +} + +/* + * Debug fs + */ +#ifdef CONFIG_DEBUG_FS +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/seq_file.h> + +static int dbg_show_requester_chan(struct seq_file *s, void *p) +{ + int pos = 0; + struct pxad_phy *phy = s->private; + int i; + u32 drcmr; + + pos += seq_printf(s, "DMA channel %d requester :\n", phy->idx); + for (i = 0; i < 70; i++) { + drcmr = readl_relaxed(phy->base + pxad_drcmr(i)); + if ((drcmr & DRCMR_CHLNUM) == phy->idx) + pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, + !!(drcmr & DRCMR_MAPVLD)); + } + return pos; +} + +static inline int dbg_burst_from_dcmd(u32 dcmd) +{ + int burst = (dcmd >> 16) & 0x3; + + return burst ? 4 << burst : 0; +} + +static int is_phys_valid(unsigned long addr) +{ + return pfn_valid(__phys_to_pfn(addr)); +} + +#define PXA_DCSR_STR(flag) (dcsr & PXA_DCSR_##flag ? #flag" " : "") +#define PXA_DCMD_STR(flag) (dcmd & PXA_DCMD_##flag ? #flag" " : "") + +static int dbg_show_descriptors(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + int i, max_show = 20, burst, width; + u32 dcmd; + unsigned long phys_desc, ddadr; + struct pxad_desc_hw *desc; + + phys_desc = ddadr = _phy_readl_relaxed(phy, DDADR); + + seq_printf(s, "DMA channel %d descriptors :\n", phy->idx); + seq_printf(s, "[%03d] First descriptor unknown\n", 0); + for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) { + desc = phys_to_virt(phys_desc); + dcmd = desc->dcmd; + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n", + i, phys_desc, desc); + seq_printf(s, "\tDDADR = %08x\n", desc->ddadr); + seq_printf(s, "\tDSADR = %08x\n", desc->dsadr); + seq_printf(s, "\tDTADR = %08x\n", desc->dtadr); + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR), + PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG), + PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN), + PXA_DCMD_STR(ENDIAN), burst, width, + dcmd & PXA_DCMD_LENGTH); + phys_desc = desc->ddadr; + } + if (i == max_show) + seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n", + i, phys_desc); + else + seq_printf(s, "[%03d] Desc at %08lx is %s\n", + i, phys_desc, phys_desc == DDADR_STOP ? + "DDADR_STOP" : "invalid"); + + return 0; +} + +static int dbg_show_chan_state(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + u32 dcsr, dcmd; + int burst, width; + static const char * const str_prio[] = { + "high", "normal", "low", "invalid" + }; + + dcsr = _phy_readl_relaxed(phy, DCSR); + dcmd = _phy_readl_relaxed(phy, DCMD); + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + seq_printf(s, "DMA channel %d\n", phy->idx); + seq_printf(s, "\tPriority : %s\n", + str_prio[(phy->idx & 0xf) / 4]); + seq_printf(s, "\tUnaligned transfer bit: %s\n", + _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ? + "yes" : "no"); + seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC), + PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN), + PXA_DCSR_STR(EORJMPEN), PXA_DCSR_STR(EORSTOPEN), + PXA_DCSR_STR(SETCMPST), PXA_DCSR_STR(CLRCMPST), + PXA_DCSR_STR(CMPST), PXA_DCSR_STR(EORINTR), + PXA_DCSR_STR(REQPEND), PXA_DCSR_STR(STOPSTATE), + PXA_DCSR_STR(ENDINTR), PXA_DCSR_STR(STARTINTR), + PXA_DCSR_STR(BUSERR)); + + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR), + PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG), + PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN), + PXA_DCMD_STR(ENDIAN), burst, width, dcmd & PXA_DCMD_LENGTH); + seq_printf(s, "\tDSADR = %08x\n", _phy_readl_relaxed(phy, DSADR)); + seq_printf(s, "\tDTADR = %08x\n", _phy_readl_relaxed(phy, DTADR)); + seq_printf(s, "\tDDADR = %08x\n", _phy_readl_relaxed(phy, DDADR)); + + return 0; +} + +static int dbg_show_state(struct seq_file *s, void *p) +{ + struct pxad_device *pdev = s->private; + + /* basic device status */ + seq_puts(s, "DMA engine status\n"); + seq_printf(s, "\tChannel number: %d\n", pdev->nr_chans); + + return 0; +} + +#define DBGFS_FUNC_DECL(name) \ +static int dbg_open_##name(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, dbg_show_##name, inode->i_private); \ +} \ +static const struct file_operations dbg_fops_##name = { \ + .owner = THIS_MODULE, \ + .open = dbg_open_##name, \ + .llseek = seq_lseek, \ + .read = seq_read, \ + .release = single_release, \ +} + +DBGFS_FUNC_DECL(state); +DBGFS_FUNC_DECL(chan_state); +DBGFS_FUNC_DECL(descriptors); +DBGFS_FUNC_DECL(requester_chan); + +static struct dentry *pxad_dbg_alloc_chan(struct pxad_device *pdev, + int ch, struct dentry *chandir) +{ + char chan_name[11]; + struct dentry *chan, *chan_state = NULL, *chan_descr = NULL; + struct dentry *chan_reqs = NULL; + void *dt; + + scnprintf(chan_name, sizeof(chan_name), "%d", ch); + chan = debugfs_create_dir(chan_name, chandir); + dt = (void *)&pdev->phys[ch]; + + if (chan) + chan_state = debugfs_create_file("state", 0400, chan, dt, + &dbg_fops_chan_state); + if (chan_state) + chan_descr = debugfs_create_file("descriptors", 0400, chan, dt, + &dbg_fops_descriptors); + if (chan_descr) + chan_reqs = debugfs_create_file("requesters", 0400, chan, dt, + &dbg_fops_requester_chan); + if (!chan_reqs) + goto err_state; + + return chan; + +err_state: + debugfs_remove_recursive(chan); + return NULL; +} + +static void pxad_init_debugfs(struct pxad_device *pdev) +{ + int i; + struct dentry *chandir; + + pdev->dbgfs_root = debugfs_create_dir(dev_name(pdev->slave.dev), NULL); + if (IS_ERR(pdev->dbgfs_root) || !pdev->dbgfs_root) + goto err_root; + + pdev->dbgfs_state = debugfs_create_file("state", 0400, pdev->dbgfs_root, + pdev, &dbg_fops_state); + if (!pdev->dbgfs_state) + goto err_state; + + pdev->dbgfs_chan = + kmalloc_array(pdev->nr_chans, sizeof(*pdev->dbgfs_state), + GFP_KERNEL); + if (!pdev->dbgfs_chan) + goto err_alloc; + + chandir = debugfs_create_dir("channels", pdev->dbgfs_root); + if (!chandir) + goto err_chandir; + + for (i = 0; i < pdev->nr_chans; i++) { + pdev->dbgfs_chan[i] = pxad_dbg_alloc_chan(pdev, i, chandir); + if (!pdev->dbgfs_chan[i]) + goto err_chans; + } + + return; +err_chans: +err_chandir: + kfree(pdev->dbgfs_chan); +err_alloc: +err_state: + debugfs_remove_recursive(pdev->dbgfs_root); +err_root: + pr_err("pxad: debugfs is not available\n"); +} + +static void pxad_cleanup_debugfs(struct pxad_device *pdev) +{ + debugfs_remove_recursive(pdev->dbgfs_root); +} +#else +static inline void pxad_init_debugfs(struct pxad_device *pdev) {} +static inline void pxad_cleanup_debugfs(struct pxad_device *pdev) {} +#endif + +/* + * In the transition phase where legacy pxa handling is done at the same time as + * mmp_dma, the DMA physical channel split between the 2 DMA providers is done + * through legacy_reserved. Legacy code reserves DMA channels by settings + * corresponding bits in legacy_reserved. + */ +static u32 legacy_reserved; +static u32 legacy_unavailable; + +static struct pxad_phy *lookup_phy(struct pxad_chan *pchan) +{ + int prio, i; + struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device); + struct pxad_phy *phy, *found = NULL; + unsigned long flags; + + /* + * dma channel priorities + * ch 0 - 3, 16 - 19 <--> (0) + * ch 4 - 7, 20 - 23 <--> (1) + * ch 8 - 11, 24 - 27 <--> (2) + * ch 12 - 15, 28 - 31 <--> (3) + */ + + spin_lock_irqsave(&pdev->phy_lock, flags); + for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) { + for (i = 0; i < pdev->nr_chans; i++) { + if (prio != (i & 0xf) >> 2) + continue; + if ((i < 32) && (legacy_reserved & BIT(i))) + continue; + phy = &pdev->phys[i]; + if (!phy->vchan) { + phy->vchan = pchan; + found = phy; + if (i < 32) + legacy_unavailable |= BIT(i); + goto out_unlock; + } + } + } + +out_unlock: + spin_unlock_irqrestore(&pdev->phy_lock, flags); + dev_dbg(&pchan->vc.chan.dev->device, + "%s(): phy=%p(%d)\n", __func__, found, + found ? found->idx : -1); + + return found; +} + +static void pxad_free_phy(struct pxad_chan *chan) +{ + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + unsigned long flags; + u32 reg; + int i; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): freeing\n", __func__); + if (!chan->phy) + return; + + /* clear the channel mapping in DRCMR */ + reg = pxad_drcmr(chan->drcmr); + writel_relaxed(0, chan->phy->base + reg); + + spin_lock_irqsave(&pdev->phy_lock, flags); + for (i = 0; i < 32; i++) + if (chan->phy == &pdev->phys[i]) + legacy_unavailable &= ~BIT(i); + chan->phy->vchan = NULL; + chan->phy = NULL; + spin_unlock_irqrestore(&pdev->phy_lock, flags); +} + +static bool is_chan_running(struct pxad_chan *chan) +{ + u32 dcsr; + struct pxad_phy *phy = chan->phy; + + if (!phy) + return false; + dcsr = phy_readl_relaxed(phy, DCSR); + return dcsr & PXA_DCSR_RUN; +} + +static bool is_running_chan_misaligned(struct pxad_chan *chan) +{ + u32 dalgn; + + BUG_ON(!chan->phy); + dalgn = phy_readl_relaxed(chan->phy, DALGN); + return dalgn & (BIT(chan->phy->idx)); +} + +static void phy_enable(struct pxad_phy *phy, bool misaligned) +{ + u32 reg, dalgn; + + if (!phy->vchan) + return; + + dev_dbg(&phy->vchan->vc.chan.dev->device, + "%s(); phy=%p(%d) misaligned=%d\n", __func__, + phy, phy->idx, misaligned); + + reg = pxad_drcmr(phy->vchan->drcmr); + writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg); + + dalgn = phy_readl_relaxed(phy, DALGN); + if (misaligned) + dalgn |= BIT(phy->idx); + else + dalgn &= ~BIT(phy->idx); + phy_writel_relaxed(phy, dalgn, DALGN); + + phy_writel(phy, PXA_DCSR_STOPIRQEN | PXA_DCSR_ENDINTR | + PXA_DCSR_BUSERR | PXA_DCSR_RUN, DCSR); +} + +static void phy_disable(struct pxad_phy *phy) +{ + u32 dcsr; + + if (!phy) + return; + + dcsr = phy_readl_relaxed(phy, DCSR); + dev_dbg(&phy->vchan->vc.chan.dev->device, + "%s(): phy=%p(%d)\n", __func__, phy, phy->idx); + phy_writel(phy, dcsr & ~PXA_DCSR_RUN & ~PXA_DCSR_STOPIRQEN, DCSR); +} + +static void pxad_launch_chan(struct pxad_chan *chan, + struct pxad_desc_sw *desc) +{ + dev_dbg(&chan->vc.chan.dev->device, + "%s(): desc=%p\n", __func__, desc); + if (!chan->phy) { + chan->phy = lookup_phy(chan); + if (!chan->phy) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): no free dma channel\n", __func__); + return; + } + } + + /* + * Program the descriptor's address into the DMA controller, + * then start the DMA transaction + */ + phy_writel(chan->phy, desc->first, DDADR); + phy_enable(chan->phy, chan->misaligned); +} + +static void set_updater_desc(struct pxad_desc_sw *sw_desc, + unsigned long flags) +{ + struct pxad_desc_hw *updater = + sw_desc->hw_desc[sw_desc->nb_desc - 1]; + dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr; + + updater->ddadr = DDADR_STOP; + updater->dsadr = dma; + updater->dtadr = dma + 8; + updater->dcmd = PXA_DCMD_WIDTH4 | PXA_DCMD_BURST32 | + (PXA_DCMD_LENGTH & sizeof(u32)); + if (flags & DMA_PREP_INTERRUPT) + updater->dcmd |= PXA_DCMD_ENDIRQEN; +} + +static bool is_desc_completed(struct virt_dma_desc *vd) +{ + struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd); + struct pxad_desc_hw *updater = + sw_desc->hw_desc[sw_desc->nb_desc - 1]; + + return updater->dtadr != (updater->dsadr + 8); +} + +static void pxad_desc_chain(struct virt_dma_desc *vd1, + struct virt_dma_desc *vd2) +{ + struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1); + struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2); + dma_addr_t dma_to_chain; + + dma_to_chain = desc2->first; + desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain; +} + +static bool pxad_try_hotchain(struct virt_dma_chan *vc, + struct virt_dma_desc *vd) +{ + struct virt_dma_desc *vd_last_issued = NULL; + struct pxad_chan *chan = to_pxad_chan(&vc->chan); + + /* + * Attempt to hot chain the tx if the phy is still running. This is + * considered successful only if either the channel is still running + * after the chaining, or if the chained transfer is completed after + * having been hot chained. + * A change of alignment is not allowed, and forbids hotchaining. + */ + if (is_chan_running(chan)) { + BUG_ON(list_empty(&vc->desc_issued)); + + if (!is_running_chan_misaligned(chan) && + to_pxad_sw_desc(vd)->misaligned) + return false; + + vd_last_issued = list_entry(vc->desc_issued.prev, + struct virt_dma_desc, node); + pxad_desc_chain(vd_last_issued, vd); + if (is_chan_running(chan) || is_desc_completed(vd_last_issued)) + return true; + } + + return false; +} + +static unsigned int clear_chan_irq(struct pxad_phy *phy) +{ + u32 dcsr; + u32 dint = readl(phy->base + DINT); + + if (!(dint & BIT(phy->idx))) + return PXA_DCSR_RUN; + + /* clear irq */ + dcsr = phy_readl_relaxed(phy, DCSR); + phy_writel(phy, dcsr, DCSR); + if ((dcsr & PXA_DCSR_BUSERR) && (phy->vchan)) + dev_warn(&phy->vchan->vc.chan.dev->device, + "%s(chan=%p): PXA_DCSR_BUSERR\n", + __func__, &phy->vchan); + + return dcsr & ~PXA_DCSR_RUN; +} + +static irqreturn_t pxad_chan_handler(int irq, void *dev_id) +{ + struct pxad_phy *phy = dev_id; + struct pxad_chan *chan = phy->vchan; + struct virt_dma_desc *vd, *tmp; + unsigned int dcsr; + unsigned long flags; + + BUG_ON(!chan); + + dcsr = clear_chan_irq(phy); + if (dcsr & PXA_DCSR_RUN) + return IRQ_NONE; + + spin_lock_irqsave(&chan->vc.lock, flags); + list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): checking txd %p[%x]: completed=%d\n", + __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + if (is_desc_completed(vd)) { + list_del(&vd->node); + vchan_cookie_complete(vd); + } else { + break; + } + } + + if (dcsr & PXA_DCSR_STOPSTATE) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): channel stopped, submitted_empty=%d issued_empty=%d", + __func__, + list_empty(&chan->vc.desc_submitted), + list_empty(&chan->vc.desc_issued)); + phy_writel_relaxed(phy, dcsr & ~PXA_DCSR_STOPIRQEN, DCSR); + + if (list_empty(&chan->vc.desc_issued)) { + chan->misaligned = + !list_empty(&chan->vc.desc_submitted); + } else { + vd = list_first_entry(&chan->vc.desc_issued, + struct virt_dma_desc, node); + pxad_launch_chan(chan, to_pxad_sw_desc(vd)); + } + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return IRQ_HANDLED; +} + +static irqreturn_t pxad_int_handler(int irq, void *dev_id) +{ + struct pxad_device *pdev = dev_id; + struct pxad_phy *phy; + u32 dint = readl(pdev->base + DINT); + int i, ret = IRQ_NONE; + + while (dint) { + i = __ffs(dint); + dint &= (dint - 1); + phy = &pdev->phys[i]; + if ((i < 32) && (legacy_reserved & BIT(i))) + continue; + if (pxad_chan_handler(irq, phy) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + + return ret; +} + +static int pxad_alloc_chan_resources(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + + if (chan->desc_pool) + return 1; + + chan->desc_pool = dma_pool_create(dma_chan_name(dchan), + pdev->slave.dev, + sizeof(struct pxad_desc_hw), + __alignof__(struct pxad_desc_hw), + 0); + if (!chan->desc_pool) { + dev_err(&chan->vc.chan.dev->device, + "%s(): unable to allocate descriptor pool\n", + __func__); + return -ENOMEM; + } + + return 1; +} + +static void pxad_free_chan_resources(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + vchan_free_chan_resources(&chan->vc); + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; + +} + +static void pxad_free_desc(struct virt_dma_desc *vd) +{ + int i; + dma_addr_t dma; + struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd); + + BUG_ON(sw_desc->nb_desc == 0); + for (i = sw_desc->nb_desc - 1; i >= 0; i--) { + if (i > 0) + dma = sw_desc->hw_desc[i - 1]->ddadr; + else + dma = sw_desc->first; + dma_pool_free(sw_desc->desc_pool, + sw_desc->hw_desc[i], dma); + } + sw_desc->nb_desc = 0; + kfree(sw_desc); +} + +static struct pxad_desc_sw * +pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc) +{ + struct pxad_desc_sw *sw_desc; + dma_addr_t dma; + int i; + + sw_desc = kzalloc(sizeof(*sw_desc) + + nb_hw_desc * sizeof(struct pxad_desc_hw *), + GFP_NOWAIT); + if (!sw_desc) + return NULL; + sw_desc->desc_pool = chan->desc_pool; + + for (i = 0; i < nb_hw_desc; i++) { + sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool, + GFP_NOWAIT, &dma); + if (!sw_desc->hw_desc[i]) { + dev_err(&chan->vc.chan.dev->device, + "%s(): Couldn't allocate the %dth hw_desc from dma_pool %p\n", + __func__, i, sw_desc->desc_pool); + goto err; + } + + if (i == 0) + sw_desc->first = dma; + else + sw_desc->hw_desc[i - 1]->ddadr = dma; + sw_desc->nb_desc++; + } + + return sw_desc; +err: + pxad_free_desc(&sw_desc->vd); + return NULL; +} + +static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct virt_dma_chan *vc = to_virt_chan(tx->chan); + struct pxad_chan *chan = to_pxad_chan(&vc->chan); + struct virt_dma_desc *vd_chained = NULL, + *vd = container_of(tx, struct virt_dma_desc, tx); + dma_cookie_t cookie; + unsigned long flags; + + set_updater_desc(to_pxad_sw_desc(vd), tx->flags); + + spin_lock_irqsave(&vc->lock, flags); + cookie = dma_cookie_assign(tx); + + if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) { + list_move_tail(&vd->node, &vc->desc_issued); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]: submitted (hot linked)\n", + __func__, vd, cookie); + goto out; + } + + /* + * Fallback to placing the tx in the submitted queue + */ + if (!list_empty(&vc->desc_submitted)) { + vd_chained = list_entry(vc->desc_submitted.prev, + struct virt_dma_desc, node); + /* + * Only chain the descriptors if no new misalignment is + * introduced. If a new misalignment is chained, let the channel + * stop, and be relaunched in misalign mode from the irq + * handler. + */ + if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned) + pxad_desc_chain(vd_chained, vd); + else + vd_chained = NULL; + } + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]: submitted (%s linked)\n", + __func__, vd, cookie, vd_chained ? "cold" : "not"); + list_move_tail(&vd->node, &vc->desc_submitted); + chan->misaligned |= to_pxad_sw_desc(vd)->misaligned; + +out: + spin_unlock_irqrestore(&vc->lock, flags); + return cookie; +} + +static void pxad_issue_pending(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct virt_dma_desc *vd_first; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (list_empty(&chan->vc.desc_submitted)) + goto out; + + vd_first = list_first_entry(&chan->vc.desc_submitted, + struct virt_dma_desc, node); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]", __func__, vd_first, vd_first->tx.cookie); + + vchan_issue_pending(&chan->vc); + if (!pxad_try_hotchain(&chan->vc, vd_first)) + pxad_launch_chan(chan, to_pxad_sw_desc(vd_first)); +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static inline struct dma_async_tx_descriptor * +pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd, + unsigned long tx_flags) +{ + struct dma_async_tx_descriptor *tx; + struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc); + + tx = vchan_tx_prep(vc, vd, tx_flags); + tx->tx_submit = pxad_tx_submit; + dev_dbg(&chan->vc.chan.dev->device, + "%s(): vc=%p txd=%p[%x] flags=0x%lx\n", __func__, + vc, vd, vd->tx.cookie, + tx_flags); + + return tx; +} + +static void pxad_get_config(struct pxad_chan *chan, + enum dma_transfer_direction dir, + u32 *dcmd, u32 *dev_src, u32 *dev_dst) +{ + u32 maxburst = 0, dev_addr = 0; + enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + + *dcmd = 0; + if (chan->cfg.direction == DMA_DEV_TO_MEM) { + maxburst = chan->cfg.src_maxburst; + width = chan->cfg.src_addr_width; + dev_addr = chan->cfg.src_addr; + *dev_src = dev_addr; + *dcmd |= PXA_DCMD_INCTRGADDR | PXA_DCMD_FLOWSRC; + } + if (chan->cfg.direction == DMA_MEM_TO_DEV) { + maxburst = chan->cfg.dst_maxburst; + width = chan->cfg.dst_addr_width; + dev_addr = chan->cfg.dst_addr; + *dev_dst = dev_addr; + *dcmd |= PXA_DCMD_INCSRCADDR | PXA_DCMD_FLOWTRG; + } + if (chan->cfg.direction == DMA_MEM_TO_MEM) + *dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR | + PXA_DCMD_INCSRCADDR; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dev_addr=0x%x maxburst=%d width=%d dir=%d\n", + __func__, dev_addr, maxburst, width, dir); + + if (width == DMA_SLAVE_BUSWIDTH_1_BYTE) + *dcmd |= PXA_DCMD_WIDTH1; + else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES) + *dcmd |= PXA_DCMD_WIDTH2; + else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES) + *dcmd |= PXA_DCMD_WIDTH4; + + if (maxburst == 8) + *dcmd |= PXA_DCMD_BURST8; + else if (maxburst == 16) + *dcmd |= PXA_DCMD_BURST16; + else if (maxburst == 32) + *dcmd |= PXA_DCMD_BURST32; + + /* FIXME: drivers should be ported over to use the filter + * function. Once that's done, the following two lines can + * be removed. + */ + if (chan->cfg.slave_id) + chan->drcmr = chan->cfg.slave_id; +} + +static struct dma_async_tx_descriptor * +pxad_prep_memcpy(struct dma_chan *dchan, + dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + struct pxad_desc_hw *hw_desc; + u32 dcmd; + unsigned int i, nb_desc = 0; + size_t copy; + + if (!dchan || !len) + return NULL; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n", + __func__, (unsigned long)dma_dst, (unsigned long)dma_src, + len, flags); + pxad_get_config(chan, DMA_MEM_TO_MEM, &dcmd, NULL, NULL); + + nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + sw_desc->len = len; + + if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) || + !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT)) + sw_desc->misaligned = true; + + i = 0; + do { + hw_desc = sw_desc->hw_desc[i++]; + copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES); + hw_desc->dcmd = dcmd | (PXA_DCMD_LENGTH & copy); + hw_desc->dsadr = dma_src; + hw_desc->dtadr = dma_dst; + len -= copy; + dma_src += copy; + dma_dst += copy; + } while (len); + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + size_t len, avail; + struct scatterlist *sg; + dma_addr_t dma; + u32 dcmd, dsadr = 0, dtadr = 0; + unsigned int nb_desc = 0, i, j = 0; + + if ((sgl == NULL) || (sg_len == 0)) + return NULL; + + pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dir=%d flags=%lx\n", __func__, dir, flags); + + for_each_sg(sgl, sg, sg_len, i) + nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + dma = sg_dma_address(sg); + avail = sg_dma_len(sg); + sw_desc->len += avail; + + do { + len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES); + if (dma & 0x7) + sw_desc->misaligned = true; + + sw_desc->hw_desc[j]->dcmd = + dcmd | (PXA_DCMD_LENGTH & len); + sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma; + sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma; + + dma += len; + avail -= len; + } while (avail); + } + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +pxad_prep_dma_cyclic(struct dma_chan *dchan, + dma_addr_t buf_addr, size_t len, size_t period_len, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + struct pxad_desc_hw **phw_desc; + dma_addr_t dma; + u32 dcmd, dsadr = 0, dtadr = 0; + unsigned int nb_desc = 0; + + if (!dchan || !len || !period_len) + return NULL; + if ((dir != DMA_DEV_TO_MEM) && (dir != DMA_MEM_TO_DEV)) { + dev_err(&chan->vc.chan.dev->device, + "Unsupported direction for cyclic DMA\n"); + return NULL; + } + /* the buffer length must be a multiple of period_len */ + if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES || + !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT)) + return NULL; + + pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); + dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH | period_len); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n", + __func__, (unsigned long)buf_addr, len, period_len, dir, flags); + + nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES); + nb_desc *= DIV_ROUND_UP(len, period_len); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + sw_desc->cyclic = true; + sw_desc->len = len; + + phw_desc = sw_desc->hw_desc; + dma = buf_addr; + do { + phw_desc[0]->dsadr = dsadr ? dsadr : dma; + phw_desc[0]->dtadr = dtadr ? dtadr : dma; + phw_desc[0]->dcmd = dcmd; + phw_desc++; + dma += period_len; + len -= period_len; + } while (len); + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static int pxad_config(struct dma_chan *dchan, + struct dma_slave_config *cfg) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + if (!dchan) + return -EINVAL; + + chan->cfg = *cfg; + return 0; +} + +static int pxad_terminate_all(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + struct virt_dma_desc *vd = NULL; + unsigned long flags; + struct pxad_phy *phy; + LIST_HEAD(head); + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): vchan %p: terminate all\n", __func__, &chan->vc); + + spin_lock_irqsave(&chan->vc.lock, flags); + vchan_get_all_descriptors(&chan->vc, &head); + + list_for_each_entry(vd, &head, node) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): cancelling txd %p[%x] (completed=%d)", __func__, + vd, vd->tx.cookie, is_desc_completed(vd)); + } + + phy = chan->phy; + if (phy) { + phy_disable(chan->phy); + pxad_free_phy(chan); + chan->phy = NULL; + spin_lock(&pdev->phy_lock); + phy->vchan = NULL; + spin_unlock(&pdev->phy_lock); + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + vchan_dma_desc_free_list(&chan->vc, &head); + + return 0; +} + +static unsigned int pxad_residue(struct pxad_chan *chan, + dma_cookie_t cookie) +{ + struct virt_dma_desc *vd = NULL; + struct pxad_desc_sw *sw_desc = NULL; + struct pxad_desc_hw *hw_desc = NULL; + u32 curr, start, len, end, residue = 0; + unsigned long flags; + bool passed = false; + int i; + + /* + * If the channel does not have a phy pointer anymore, it has already + * been completed. Therefore, its residue is 0. + */ + if (!chan->phy) + return 0; + + spin_lock_irqsave(&chan->vc.lock, flags); + + vd = vchan_find_desc(&chan->vc, cookie); + if (!vd) + goto out; + + sw_desc = to_pxad_sw_desc(vd); + if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) + curr = phy_readl_relaxed(chan->phy, DSADR); + else + curr = phy_readl_relaxed(chan->phy, DTADR); + + for (i = 0; i < sw_desc->nb_desc - 1; i++) { + hw_desc = sw_desc->hw_desc[i]; + if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) + start = hw_desc->dsadr; + else + start = hw_desc->dtadr; + len = hw_desc->dcmd & PXA_DCMD_LENGTH; + end = start + len; + + /* + * 'passed' will be latched once we found the descriptor + * which lies inside the boundaries of the curr + * pointer. All descriptors that occur in the list + * _after_ we found that partially handled descriptor + * are still to be processed and are hence added to the + * residual bytes counter. + */ + + if (passed) { + residue += len; + } else if (curr >= start && curr <= end) { + residue += end - curr; + passed = true; + } + } + if (!passed) + residue = sw_desc->len; + +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x] sw_desc=%p: %d\n", + __func__, vd, cookie, sw_desc, residue); + return residue; +} + +static enum dma_status pxad_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + enum dma_status ret; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (likely(txstate && (ret != DMA_ERROR))) + dma_set_residue(txstate, pxad_residue(chan, cookie)); + + return ret; +} + +static void pxad_free_channels(struct dma_device *dmadev) +{ + struct pxad_chan *c, *cn; + + list_for_each_entry_safe(c, cn, &dmadev->channels, + vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + } +} + +static int pxad_remove(struct platform_device *op) +{ + struct pxad_device *pdev = platform_get_drvdata(op); + + pxad_cleanup_debugfs(pdev); + pxad_free_channels(&pdev->slave); + dma_async_device_unregister(&pdev->slave); + return 0; +} + +static int pxad_init_phys(struct platform_device *op, + struct pxad_device *pdev, + unsigned int nb_phy_chans) +{ + int irq0, irq, nr_irq = 0, i, ret; + struct pxad_phy *phy; + + irq0 = platform_get_irq(op, 0); + if (irq0 < 0) + return irq0; + + pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans, + sizeof(pdev->phys[0]), GFP_KERNEL); + if (!pdev->phys) + return -ENOMEM; + + for (i = 0; i < nb_phy_chans; i++) + if (platform_get_irq(op, i) > 0) + nr_irq++; + + for (i = 0; i < nb_phy_chans; i++) { + phy = &pdev->phys[i]; + phy->base = pdev->base; + phy->idx = i; + irq = platform_get_irq(op, i); + if ((nr_irq > 1) && (irq > 0)) + ret = devm_request_irq(&op->dev, irq, + pxad_chan_handler, + IRQF_SHARED, "pxa-dma", phy); + if ((nr_irq == 1) && (i == 0)) + ret = devm_request_irq(&op->dev, irq0, + pxad_int_handler, + IRQF_SHARED, "pxa-dma", pdev); + if (ret) { + dev_err(pdev->slave.dev, + "%s(): can't request irq %d:%d\n", __func__, + irq, ret); + return ret; + } + } + + return 0; +} + +static const struct of_device_id const pxad_dt_ids[] = { + { .compatible = "marvell,pdma-1.0", }, + {} +}; +MODULE_DEVICE_TABLE(of, pxad_dt_ids); + +static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct pxad_device *d = ofdma->of_dma_data; + struct dma_chan *chan; + + chan = dma_get_any_slave_channel(&d->slave); + if (!chan) + return NULL; + + to_pxad_chan(chan)->drcmr = dma_spec->args[0]; + to_pxad_chan(chan)->prio = dma_spec->args[1]; + + return chan; +} + +static int pxad_init_dmadev(struct platform_device *op, + struct pxad_device *pdev, + unsigned int nr_phy_chans) +{ + int ret; + unsigned int i; + struct pxad_chan *c; + + pdev->nr_chans = nr_phy_chans; + INIT_LIST_HEAD(&pdev->slave.channels); + pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources; + pdev->slave.device_free_chan_resources = pxad_free_chan_resources; + pdev->slave.device_tx_status = pxad_tx_status; + pdev->slave.device_issue_pending = pxad_issue_pending; + pdev->slave.device_config = pxad_config; + pdev->slave.device_terminate_all = pxad_terminate_all; + + if (op->dev.coherent_dma_mask) + dma_set_mask(&op->dev, op->dev.coherent_dma_mask); + else + dma_set_mask(&op->dev, DMA_BIT_MASK(32)); + + ret = pxad_init_phys(op, pdev, nr_phy_chans); + if (ret) + return ret; + + for (i = 0; i < nr_phy_chans; i++) { + c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + c->vc.desc_free = pxad_free_desc; + vchan_init(&c->vc, &pdev->slave); + } + + return dma_async_device_register(&pdev->slave); +} + +static int pxad_probe(struct platform_device *op) +{ + struct pxad_device *pdev; + const struct of_device_id *of_id; + struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev); + struct resource *iores; + int ret, dma_channels = 0; + const enum dma_slave_buswidth widths = + DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | + DMA_SLAVE_BUSWIDTH_4_BYTES; + + pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + + spin_lock_init(&pdev->phy_lock); + + iores = platform_get_resource(op, IORESOURCE_MEM, 0); + pdev->base = devm_ioremap_resource(&op->dev, iores); + if (IS_ERR(pdev->base)) + return PTR_ERR(pdev->base); + + of_id = of_match_device(pxad_dt_ids, &op->dev); + if (of_id) + of_property_read_u32(op->dev.of_node, "#dma-channels", + &dma_channels); + else if (pdata && pdata->dma_channels) + dma_channels = pdata->dma_channels; + else + dma_channels = 32; /* default 32 channel */ + + dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask); + pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy; + pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg; + pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic; + + pdev->slave.copy_align = PDMA_ALIGNMENT; + pdev->slave.src_addr_widths = widths; + pdev->slave.dst_addr_widths = widths; + pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + pdev->slave.dev = &op->dev; + ret = pxad_init_dmadev(op, pdev, dma_channels); + if (ret) { + dev_err(pdev->slave.dev, "unable to register\n"); + return ret; + } + + if (op->dev.of_node) { + /* Device-tree DMA controller registration */ + ret = of_dma_controller_register(op->dev.of_node, + pxad_dma_xlate, pdev); + if (ret < 0) { + dev_err(pdev->slave.dev, + "of_dma_controller_register failed\n"); + return ret; + } + } + + platform_set_drvdata(op, pdev); + pxad_init_debugfs(pdev); + dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels); + return 0; +} + +static const struct platform_device_id pxad_id_table[] = { + { "pxa-dma", }, + { }, +}; + +static struct platform_driver pxad_driver = { + .driver = { + .name = "pxa-dma", + .of_match_table = pxad_dt_ids, + }, + .id_table = pxad_id_table, + .probe = pxad_probe, + .remove = pxad_remove, +}; + +bool pxad_filter_fn(struct dma_chan *chan, void *param) +{ + struct pxad_chan *c = to_pxad_chan(chan); + struct pxad_param *p = param; + + if (chan->device->dev->driver != &pxad_driver.driver) + return false; + + c->drcmr = p->drcmr; + c->prio = p->prio; + + return true; +} +EXPORT_SYMBOL_GPL(pxad_filter_fn); + +int pxad_toggle_reserved_channel(int legacy_channel) +{ + if (legacy_unavailable & (BIT(legacy_channel))) + return -EBUSY; + legacy_reserved ^= BIT(legacy_channel); + return 0; +} +EXPORT_SYMBOL_GPL(pxad_toggle_reserved_channel); + +module_platform_driver(pxad_driver); + +MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver"); +MODULE_AUTHOR("Robert Jarzmik <[email protected]>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c index 01dcaf21b988..17ccdfd28f37 100644 --- a/drivers/dma/s3c24xx-dma.c +++ b/drivers/dma/s3c24xx-dma.c @@ -1168,7 +1168,7 @@ static struct soc_data soc_s3c2443 = { .has_clocks = true, }; -static struct platform_device_id s3c24xx_dma_driver_ids[] = { +static const struct platform_device_id s3c24xx_dma_driver_ids[] = { { .name = "s3c2410-dma", .driver_data = (kernel_ulong_t)&soc_s3c2410, diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index e0302c784ba4..7820d07e7bee 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -183,7 +183,7 @@ struct rcar_dmac { unsigned int n_channels; struct rcar_dmac_chan *channels; - unsigned long modules[256 / BITS_PER_LONG]; + DECLARE_BITMAP(modules, 256); }; #define to_rcar_dmac(d) container_of(d, struct rcar_dmac, engine) diff --git a/drivers/dma/sh/shdma-r8a73a4.c b/drivers/dma/sh/shdma-r8a73a4.c index 4fb99970a3ea..96ea3828c3eb 100644 --- a/drivers/dma/sh/shdma-r8a73a4.c +++ b/drivers/dma/sh/shdma-r8a73a4.c @@ -11,7 +11,7 @@ #include "shdma-arm.h" -const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT; +static const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT; static const struct sh_dmae_slave_config dma_slaves[] = { { diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c index a1afda43b8ef..8c5186cc9f63 100644 --- a/drivers/dma/sirf-dma.c +++ b/drivers/dma/sirf-dma.c @@ -23,8 +23,13 @@ #include "dmaengine.h" +#define SIRFSOC_DMA_VER_A7V1 1 +#define SIRFSOC_DMA_VER_A7V2 2 +#define SIRFSOC_DMA_VER_A6 4 + #define SIRFSOC_DMA_DESCRIPTORS 16 #define SIRFSOC_DMA_CHANNELS 16 +#define SIRFSOC_DMA_TABLE_NUM 256 #define SIRFSOC_DMA_CH_ADDR 0x00 #define SIRFSOC_DMA_CH_XLEN 0x04 @@ -35,15 +40,44 @@ #define SIRFSOC_DMA_CH_VALID 0x140 #define SIRFSOC_DMA_CH_INT 0x144 #define SIRFSOC_DMA_INT_EN 0x148 -#define SIRFSOC_DMA_INT_EN_CLR 0x14C +#define SIRFSOC_DMA_INT_EN_CLR 0x14C #define SIRFSOC_DMA_CH_LOOP_CTRL 0x150 -#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR 0x15C +#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR 0x154 +#define SIRFSOC_DMA_WIDTH_ATLAS7 0x10 +#define SIRFSOC_DMA_VALID_ATLAS7 0x14 +#define SIRFSOC_DMA_INT_ATLAS7 0x18 +#define SIRFSOC_DMA_INT_EN_ATLAS7 0x1c +#define SIRFSOC_DMA_LOOP_CTRL_ATLAS7 0x20 +#define SIRFSOC_DMA_CUR_DATA_ADDR 0x34 +#define SIRFSOC_DMA_MUL_ATLAS7 0x38 +#define SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7 0x158 +#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7 0x15C +#define SIRFSOC_DMA_IOBG_SCMD_EN 0x800 +#define SIRFSOC_DMA_EARLY_RESP_SET 0x818 +#define SIRFSOC_DMA_EARLY_RESP_CLR 0x81C #define SIRFSOC_DMA_MODE_CTRL_BIT 4 #define SIRFSOC_DMA_DIR_CTRL_BIT 5 +#define SIRFSOC_DMA_MODE_CTRL_BIT_ATLAS7 2 +#define SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7 3 +#define SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7 4 +#define SIRFSOC_DMA_TAB_NUM_ATLAS7 7 +#define SIRFSOC_DMA_CHAIN_INT_BIT_ATLAS7 5 +#define SIRFSOC_DMA_CHAIN_FLAG_SHIFT_ATLAS7 25 +#define SIRFSOC_DMA_CHAIN_ADDR_SHIFT 32 + +#define SIRFSOC_DMA_INT_FINI_INT_ATLAS7 BIT(0) +#define SIRFSOC_DMA_INT_CNT_INT_ATLAS7 BIT(1) +#define SIRFSOC_DMA_INT_PAU_INT_ATLAS7 BIT(2) +#define SIRFSOC_DMA_INT_LOOP_INT_ATLAS7 BIT(3) +#define SIRFSOC_DMA_INT_INV_INT_ATLAS7 BIT(4) +#define SIRFSOC_DMA_INT_END_INT_ATLAS7 BIT(5) +#define SIRFSOC_DMA_INT_ALL_ATLAS7 0x3F /* xlen and dma_width register is in 4 bytes boundary */ #define SIRFSOC_DMA_WORD_LEN 4 +#define SIRFSOC_DMA_XLEN_MAX_V1 0x800 +#define SIRFSOC_DMA_XLEN_MAX_V2 0x1000 struct sirfsoc_dma_desc { struct dma_async_tx_descriptor desc; @@ -56,7 +90,9 @@ struct sirfsoc_dma_desc { int width; /* DMA width */ int dir; bool cyclic; /* is loop DMA? */ + bool chain; /* is chain DMA? */ u32 addr; /* DMA buffer address */ + u64 chain_table[SIRFSOC_DMA_TABLE_NUM]; /* chain tbl */ }; struct sirfsoc_dma_chan { @@ -87,10 +123,25 @@ struct sirfsoc_dma { void __iomem *base; int irq; struct clk *clk; - bool is_marco; + int type; + void (*exec_desc)(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base); struct sirfsoc_dma_regs regs_save; }; +struct sirfsoc_dmadata { + void (*exec)(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base); + int type; +}; + +enum sirfsoc_dma_chain_flag { + SIRFSOC_DMA_CHAIN_NORMAL = 0x01, + SIRFSOC_DMA_CHAIN_PAUSE = 0x02, + SIRFSOC_DMA_CHAIN_LOOP = 0x03, + SIRFSOC_DMA_CHAIN_END = 0x04 +}; + #define DRV_NAME "sirfsoc_dma" static int sirfsoc_dma_runtime_suspend(struct device *dev); @@ -109,48 +160,105 @@ static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c) return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]); } +static void sirfsoc_dma_execute_hw_a7v2(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + if (sdesc->chain) { + /* DMA v2 HW chain mode */ + writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) | + (sdesc->chain << + SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) | + (0x8 << SIRFSOC_DMA_TAB_NUM_ATLAS7) | 0x3, + base + SIRFSOC_DMA_CH_CTRL); + } else { + /* DMA v2 legacy mode */ + writel_relaxed(sdesc->xlen, base + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_ATLAS7); + writel_relaxed((sdesc->width*((sdesc->ylen+1)>>1)), + base + SIRFSOC_DMA_MUL_ATLAS7); + writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) | + (sdesc->chain << + SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) | + 0x3, base + SIRFSOC_DMA_CH_CTRL); + } + writel_relaxed(sdesc->chain ? SIRFSOC_DMA_INT_END_INT_ATLAS7 : + (SIRFSOC_DMA_INT_FINI_INT_ATLAS7 | + SIRFSOC_DMA_INT_LOOP_INT_ATLAS7), + base + SIRFSOC_DMA_INT_EN_ATLAS7); + writel(sdesc->addr, base + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) + writel(0x10001, base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); +} + +static void sirfsoc_dma_execute_hw_a7v1(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + writel_relaxed(1, base + SIRFSOC_DMA_IOBG_SCMD_EN); + writel_relaxed((1 << cid), base + SIRFSOC_DMA_EARLY_RESP_SET); + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4); + writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) | + (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), + base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); + writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) | + (1 << cid), base + SIRFSOC_DMA_INT_EN); + writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) { + writel((1 << cid) | 1 << (cid + 16) | + readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7), + base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7); + } + +} + +static void sirfsoc_dma_execute_hw_a6(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4); + writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) | + (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), + base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); + writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) | + (1 << cid), base + SIRFSOC_DMA_INT_EN); + writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) { + writel((1 << cid) | 1 << (cid + 16) | + readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL), + base + SIRFSOC_DMA_CH_LOOP_CTRL); + } + +} + /* Execute all queued DMA descriptors */ static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan) { struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan); int cid = schan->chan.chan_id; struct sirfsoc_dma_desc *sdesc = NULL; + void __iomem *base; /* * lock has been held by functions calling this, so we don't hold * lock again */ - + base = sdma->base; sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc, - node); + node); /* Move the first queued descriptor to active list */ list_move_tail(&sdesc->node, &schan->active); - /* Start the DMA transfer */ - writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 + - cid * 4); - writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) | - (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), - sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); - writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 + - SIRFSOC_DMA_CH_XLEN); - writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 + - SIRFSOC_DMA_CH_YLEN); - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) | - (1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); + if (sdma->type == SIRFSOC_DMA_VER_A7V2) + cid = 0; - /* - * writel has an implict memory write barrier to make sure data is - * flushed into memory before starting DMA - */ - writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + /* Start the DMA transfer */ + sdma->exec_desc(sdesc, cid, schan->mode, base); - if (sdesc->cyclic) { - writel((1 << cid) | 1 << (cid + 16) | - readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + if (sdesc->cyclic) schan->happened_cyclic = schan->completed_cyclic = 0; - } } /* Interrupt handler */ @@ -160,27 +268,65 @@ static irqreturn_t sirfsoc_dma_irq(int irq, void *data) struct sirfsoc_dma_chan *schan; struct sirfsoc_dma_desc *sdesc = NULL; u32 is; + bool chain; int ch; + void __iomem *reg; + + switch (sdma->type) { + case SIRFSOC_DMA_VER_A6: + case SIRFSOC_DMA_VER_A7V1: + is = readl(sdma->base + SIRFSOC_DMA_CH_INT); + reg = sdma->base + SIRFSOC_DMA_CH_INT; + while ((ch = fls(is) - 1) >= 0) { + is &= ~(1 << ch); + writel_relaxed(1 << ch, reg); + schan = &sdma->channels[ch]; + spin_lock(&schan->lock); + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); + if (!sdesc->cyclic) { + /* Execute queued descriptors */ + list_splice_tail_init(&schan->active, + &schan->completed); + dma_cookie_complete(&sdesc->desc); + if (!list_empty(&schan->queued)) + sirfsoc_dma_execute(schan); + } else + schan->happened_cyclic++; + spin_unlock(&schan->lock); + } + break; - is = readl(sdma->base + SIRFSOC_DMA_CH_INT); - while ((ch = fls(is) - 1) >= 0) { - is &= ~(1 << ch); - writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT); - schan = &sdma->channels[ch]; + case SIRFSOC_DMA_VER_A7V2: + is = readl(sdma->base + SIRFSOC_DMA_INT_ATLAS7); + reg = sdma->base + SIRFSOC_DMA_INT_ATLAS7; + writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, reg); + schan = &sdma->channels[0]; spin_lock(&schan->lock); - - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); if (!sdesc->cyclic) { - /* Execute queued descriptors */ - list_splice_tail_init(&schan->active, &schan->completed); - if (!list_empty(&schan->queued)) - sirfsoc_dma_execute(schan); - } else + chain = sdesc->chain; + if ((chain && (is & SIRFSOC_DMA_INT_END_INT_ATLAS7)) || + (!chain && + (is & SIRFSOC_DMA_INT_FINI_INT_ATLAS7))) { + /* Execute queued descriptors */ + list_splice_tail_init(&schan->active, + &schan->completed); + dma_cookie_complete(&sdesc->desc); + if (!list_empty(&schan->queued)) + sirfsoc_dma_execute(schan); + } + } else if (sdesc->cyclic && (is & + SIRFSOC_DMA_INT_LOOP_INT_ATLAS7)) schan->happened_cyclic++; spin_unlock(&schan->lock); + break; + + default: + break; } /* Schedule tasklet */ @@ -227,16 +373,15 @@ static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma) schan->chan.completed_cookie = last_cookie; spin_unlock_irqrestore(&schan->lock, flags); } else { - /* for cyclic channel, desc is always in active list */ - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); - - if (!sdesc || (sdesc && !sdesc->cyclic)) { - /* without active cyclic DMA */ + if (list_empty(&schan->active)) { spin_unlock_irqrestore(&schan->lock, flags); continue; } + /* for cyclic channel, desc is always in active list */ + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); + /* cyclic DMA */ happened_cyclic = schan->happened_cyclic; spin_unlock_irqrestore(&schan->lock, flags); @@ -307,20 +452,32 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&schan->lock, flags); - if (!sdma->is_marco) { - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) & - ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - & ~((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - } else { + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR); writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR); + sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7); + writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) & + ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) & + ~((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); + break; + default: + break; } - writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); - list_splice_tail_init(&schan->active, &schan->free); list_splice_tail_init(&schan->queued, &schan->free); @@ -338,13 +495,25 @@ static int sirfsoc_dma_pause_chan(struct dma_chan *chan) spin_lock_irqsave(&schan->lock, flags); - if (!sdma->is_marco) - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - & ~((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - else + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR); + sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) & + ~((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + break; + + default: + break; + } spin_unlock_irqrestore(&schan->lock, flags); @@ -359,14 +528,25 @@ static int sirfsoc_dma_resume_chan(struct dma_chan *chan) unsigned long flags; spin_lock_irqsave(&schan->lock, flags); - - if (!sdma->is_marco) - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - | ((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - else + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0x10001, + sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) | + ((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + break; + + default: + break; + } spin_unlock_irqrestore(&schan->lock, flags); @@ -473,14 +653,31 @@ sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, spin_lock_irqsave(&schan->lock, flags); - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); - dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) * - (sdesc->width * SIRFSOC_DMA_WORD_LEN); + if (list_empty(&schan->active)) { + ret = dma_cookie_status(chan, cookie, txstate); + dma_set_residue(txstate, 0); + spin_unlock_irqrestore(&schan->lock, flags); + return ret; + } + sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, node); + if (sdesc->cyclic) + dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) * + (sdesc->width * SIRFSOC_DMA_WORD_LEN); + else + dma_request_bytes = sdesc->xlen * SIRFSOC_DMA_WORD_LEN; ret = dma_cookie_status(chan, cookie, txstate); - dma_pos = readl_relaxed(sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) - << 2; + + if (sdma->type == SIRFSOC_DMA_VER_A7V2) + cid = 0; + + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + dma_pos = readl_relaxed(sdma->base + SIRFSOC_DMA_CUR_DATA_ADDR); + } else { + dma_pos = readl_relaxed( + sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) << 2; + } + residue = dma_request_bytes - (dma_pos - sdesc->addr); dma_set_residue(txstate, residue); @@ -647,6 +844,7 @@ static int sirfsoc_dma_probe(struct platform_device *op) struct dma_device *dma; struct sirfsoc_dma *sdma; struct sirfsoc_dma_chan *schan; + struct sirfsoc_dmadata *data; struct resource res; ulong regs_start, regs_size; u32 id; @@ -657,9 +855,11 @@ static int sirfsoc_dma_probe(struct platform_device *op) dev_err(dev, "Memory exhausted!\n"); return -ENOMEM; } - - if (of_device_is_compatible(dn, "sirf,marco-dmac")) - sdma->is_marco = true; + data = (struct sirfsoc_dmadata *) + (of_match_device(op->dev.driver->of_match_table, + &op->dev)->data); + sdma->exec_desc = data->exec; + sdma->type = data->type; if (of_property_read_u32(dn, "cell-index", &id)) { dev_err(dev, "Fail to get DMAC index\n"); @@ -816,6 +1016,8 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) struct sirfsoc_dma_chan *schan; int ch; int ret; + int count; + u32 int_offset; /* * if we were runtime-suspended before, resume to enable clock @@ -827,11 +1029,19 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) return ret; } + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + count = 1; + int_offset = SIRFSOC_DMA_INT_EN_ATLAS7; + } else { + count = SIRFSOC_DMA_CHANNELS; + int_offset = SIRFSOC_DMA_INT_EN; + } + /* * DMA controller will lose all registers while suspending * so we need to save registers for active channels */ - for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) { + for (ch = 0; ch < count; ch++) { schan = &sdma->channels[ch]; if (list_empty(&schan->active)) continue; @@ -841,7 +1051,7 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) save->ctrl[ch] = readl_relaxed(sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL); } - save->interrupt_en = readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN); + save->interrupt_en = readl_relaxed(sdma->base + int_offset); /* Disable clock */ sirfsoc_dma_runtime_suspend(dev); @@ -857,14 +1067,27 @@ static int sirfsoc_dma_pm_resume(struct device *dev) struct sirfsoc_dma_chan *schan; int ch; int ret; + int count; + u32 int_offset; + u32 width_offset; /* Enable clock before accessing register */ ret = sirfsoc_dma_runtime_resume(dev); if (ret < 0) return ret; - writel_relaxed(save->interrupt_en, sdma->base + SIRFSOC_DMA_INT_EN); - for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) { + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + count = 1; + int_offset = SIRFSOC_DMA_INT_EN_ATLAS7; + width_offset = SIRFSOC_DMA_WIDTH_ATLAS7; + } else { + count = SIRFSOC_DMA_CHANNELS; + int_offset = SIRFSOC_DMA_INT_EN; + width_offset = SIRFSOC_DMA_WIDTH_0; + } + + writel_relaxed(save->interrupt_en, sdma->base + int_offset); + for (ch = 0; ch < count; ch++) { schan = &sdma->channels[ch]; if (list_empty(&schan->active)) continue; @@ -872,15 +1095,21 @@ static int sirfsoc_dma_pm_resume(struct device *dev) struct sirfsoc_dma_desc, node); writel_relaxed(sdesc->width, - sdma->base + SIRFSOC_DMA_WIDTH_0 + ch * 4); + sdma->base + width_offset + ch * 4); writel_relaxed(sdesc->xlen, sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN); writel_relaxed(sdesc->ylen, sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN); writel_relaxed(save->ctrl[ch], sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL); - writel_relaxed(sdesc->addr >> 2, - sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + writel_relaxed(sdesc->addr, + sdma->base + SIRFSOC_DMA_CH_ADDR); + } else { + writel_relaxed(sdesc->addr >> 2, + sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR); + + } } /* if we were runtime-suspended before, suspend again */ @@ -896,9 +1125,25 @@ static const struct dev_pm_ops sirfsoc_dma_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume) }; +struct sirfsoc_dmadata sirfsoc_dmadata_a6 = { + .exec = sirfsoc_dma_execute_hw_a6, + .type = SIRFSOC_DMA_VER_A6, +}; + +struct sirfsoc_dmadata sirfsoc_dmadata_a7v1 = { + .exec = sirfsoc_dma_execute_hw_a7v1, + .type = SIRFSOC_DMA_VER_A7V1, +}; + +struct sirfsoc_dmadata sirfsoc_dmadata_a7v2 = { + .exec = sirfsoc_dma_execute_hw_a7v2, + .type = SIRFSOC_DMA_VER_A7V2, +}; + static const struct of_device_id sirfsoc_dma_match[] = { - { .compatible = "sirf,prima2-dmac", }, - { .compatible = "sirf,marco-dmac", }, + { .compatible = "sirf,prima2-dmac", .data = &sirfsoc_dmadata_a6,}, + { .compatible = "sirf,atlas7-dmac", .data = &sirfsoc_dmadata_a7v1,}, + { .compatible = "sirf,atlas7-dmac-v2", .data = &sirfsoc_dmadata_a7v2,}, {}, }; @@ -925,7 +1170,7 @@ static void __exit sirfsoc_dma_exit(void) subsys_initcall(sirfsoc_dma_init); module_exit(sirfsoc_dma_exit); -MODULE_AUTHOR("Rongjun Ying <[email protected]>, " - "Barry Song <[email protected]>"); +MODULE_AUTHOR("Rongjun Ying <[email protected]>"); +MODULE_AUTHOR("Barry Song <[email protected]>"); MODULE_DESCRIPTION("SIRFSOC DMA control driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 11e536586812..842ff97c2cfb 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -891,9 +891,21 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = { .nr_max_vchans = 37, }; +/* + * The H3 has 12 physical channels, a maximum DRQ port id of 27, + * and a total of 34 usable source and destination endpoints. + */ + +static struct sun6i_dma_config sun8i_h3_dma_cfg = { + .nr_max_channels = 12, + .nr_max_requests = 27, + .nr_max_vchans = 34, +}; + static const struct of_device_id sun6i_dma_match[] = { { .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg }, { .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg }, + { .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg }, { /* sentinel */ } }; diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c new file mode 100644 index 000000000000..24f5ca2356bf --- /dev/null +++ b/drivers/dma/ti-dma-crossbar.c @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/io.h> +#include <linux/idr.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> + +#define TI_XBAR_OUTPUTS 127 +#define TI_XBAR_INPUTS 256 + +static DEFINE_IDR(map_idr); + +struct ti_dma_xbar_data { + void __iomem *iomem; + + struct dma_router dmarouter; + + u16 safe_val; /* Value to rest the crossbar lines */ + u32 xbar_requests; /* number of DMA requests connected to XBAR */ + u32 dma_requests; /* number of DMA requests forwarded to DMA */ +}; + +struct ti_dma_xbar_map { + u16 xbar_in; + int xbar_out; +}; + +static inline void ti_dma_xbar_write(void __iomem *iomem, int xbar, u16 val) +{ + writew_relaxed(val, iomem + (xbar * 2)); +} + +static void ti_dma_xbar_free(struct device *dev, void *route_data) +{ + struct ti_dma_xbar_data *xbar = dev_get_drvdata(dev); + struct ti_dma_xbar_map *map = route_data; + + dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n", + map->xbar_in, map->xbar_out); + + ti_dma_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val); + idr_remove(&map_idr, map->xbar_out); + kfree(map); +} + +static void *ti_dma_xbar_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct ti_dma_xbar_data *xbar = platform_get_drvdata(pdev); + struct ti_dma_xbar_map *map; + + if (dma_spec->args[0] >= xbar->xbar_requests) { + dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "Can't get DMA master\n"); + return ERR_PTR(-EINVAL); + } + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + of_node_put(dma_spec->np); + return ERR_PTR(-ENOMEM); + } + + map->xbar_out = idr_alloc(&map_idr, NULL, 0, xbar->dma_requests, + GFP_KERNEL); + map->xbar_in = (u16)dma_spec->args[0]; + + /* The DMA request is 1 based in sDMA */ + dma_spec->args[0] = map->xbar_out + 1; + + dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n", + map->xbar_in, map->xbar_out); + + ti_dma_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in); + + return map; +} + +static int ti_dma_xbar_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct device_node *dma_node; + struct ti_dma_xbar_data *xbar; + struct resource *res; + u32 safe_val; + void __iomem *iomem; + int i, ret; + + if (!node) + return -ENODEV; + + xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL); + if (!xbar) + return -ENOMEM; + + dma_node = of_parse_phandle(node, "dma-masters", 0); + if (!dma_node) { + dev_err(&pdev->dev, "Can't get DMA master node\n"); + return -ENODEV; + } + + if (of_property_read_u32(dma_node, "dma-requests", + &xbar->dma_requests)) { + dev_info(&pdev->dev, + "Missing XBAR output information, using %u.\n", + TI_XBAR_OUTPUTS); + xbar->dma_requests = TI_XBAR_OUTPUTS; + } + of_node_put(dma_node); + + if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) { + dev_info(&pdev->dev, + "Missing XBAR input information, using %u.\n", + TI_XBAR_INPUTS); + xbar->xbar_requests = TI_XBAR_INPUTS; + } + + if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val)) + xbar->safe_val = (u16)safe_val; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + iomem = devm_ioremap_resource(&pdev->dev, res); + if (!iomem) + return -ENOMEM; + + xbar->iomem = iomem; + + xbar->dmarouter.dev = &pdev->dev; + xbar->dmarouter.route_free = ti_dma_xbar_free; + + platform_set_drvdata(pdev, xbar); + + /* Reset the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) + ti_dma_xbar_write(xbar->iomem, i, xbar->safe_val); + + ret = of_dma_router_register(node, ti_dma_xbar_route_allocate, + &xbar->dmarouter); + if (ret) { + /* Restore the defaults for the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) + ti_dma_xbar_write(xbar->iomem, i, i); + } + + return ret; +} + +static const struct of_device_id ti_dma_xbar_match[] = { + { .compatible = "ti,dra7-dma-crossbar" }, + {}, +}; + +static struct platform_driver ti_dma_xbar_driver = { + .driver = { + .name = "ti-dma-crossbar", + .of_match_table = of_match_ptr(ti_dma_xbar_match), + }, + .probe = ti_dma_xbar_probe, +}; + +int omap_dmaxbar_init(void) +{ + return platform_driver_register(&ti_dma_xbar_driver); +} +arch_initcall(omap_dmaxbar_init); diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c index 6f80432a3f0a..7d2c17d8d30f 100644 --- a/drivers/dma/virt-dma.c +++ b/drivers/dma/virt-dma.c @@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&vc->lock, flags); cookie = dma_cookie_assign(tx); - list_add_tail(&vd->node, &vc->desc_submitted); + list_move_tail(&vd->node, &vc->desc_submitted); spin_unlock_irqrestore(&vc->lock, flags); dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n", @@ -83,8 +83,10 @@ static void vchan_complete(unsigned long arg) cb_data = vd->tx.callback_param; list_del(&vd->node); - - vc->desc_free(vd); + if (async_tx_test_ack(&vd->tx)) + list_add(&vd->node, &vc->desc_allocated); + else + vc->desc_free(vd); if (cb) cb(cb_data); @@ -96,9 +98,13 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head) while (!list_empty(head)) { struct virt_dma_desc *vd = list_first_entry(head, struct virt_dma_desc, node); - list_del(&vd->node); - dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); - vc->desc_free(vd); + if (async_tx_test_ack(&vd->tx)) { + list_move_tail(&vd->node, &vc->desc_allocated); + } else { + dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); + list_del(&vd->node); + vc->desc_free(vd); + } } } EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list); @@ -108,6 +114,7 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev) dma_cookie_init(&vc->chan); spin_lock_init(&vc->lock); + INIT_LIST_HEAD(&vc->desc_allocated); INIT_LIST_HEAD(&vc->desc_submitted); INIT_LIST_HEAD(&vc->desc_issued); INIT_LIST_HEAD(&vc->desc_completed); diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h index 181b95267866..189e75dbcb15 100644 --- a/drivers/dma/virt-dma.h +++ b/drivers/dma/virt-dma.h @@ -29,6 +29,7 @@ struct virt_dma_chan { spinlock_t lock; /* protected by vc.lock */ + struct list_head desc_allocated; struct list_head desc_submitted; struct list_head desc_issued; struct list_head desc_completed; @@ -55,11 +56,16 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan struct virt_dma_desc *vd, unsigned long tx_flags) { extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *); + unsigned long flags; dma_async_tx_descriptor_init(&vd->tx, &vc->chan); vd->tx.flags = tx_flags; vd->tx.tx_submit = vchan_tx_submit; + spin_lock_irqsave(&vc->lock, flags); + list_add_tail(&vd->node, &vc->desc_allocated); + spin_unlock_irqrestore(&vc->lock, flags); + return &vd->tx; } @@ -122,7 +128,8 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) } /** - * vchan_get_all_descriptors - obtain all submitted and issued descriptors + * vchan_get_all_descriptors - obtain all allocated, submitted and issued + * descriptors * vc: virtual channel to get descriptors from * head: list of descriptors found * @@ -134,6 +141,7 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, struct list_head *head) { + list_splice_tail_init(&vc->desc_allocated, head); list_splice_tail_init(&vc->desc_submitted, head); list_splice_tail_init(&vc->desc_issued, head); list_splice_tail_init(&vc->desc_completed, head); @@ -141,11 +149,14 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, static inline void vchan_free_chan_resources(struct virt_dma_chan *vc) { + struct virt_dma_desc *vd; unsigned long flags; LIST_HEAD(head); spin_lock_irqsave(&vc->lock, flags); vchan_get_all_descriptors(vc, &head); + list_for_each_entry(vd, &head, node) + async_tx_clear_ack(&vd->tx); spin_unlock_irqrestore(&vc->lock, flags); vchan_dma_desc_free_list(vc, &head); diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index f52e37502254..620fd55ec766 100755..100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -124,32 +124,8 @@ #define XGENE_DMA_DESC_ELERR_POS 46 #define XGENE_DMA_DESC_RTYPE_POS 56 #define XGENE_DMA_DESC_LERR_POS 60 -#define XGENE_DMA_DESC_FLYBY_POS 4 #define XGENE_DMA_DESC_BUFLEN_POS 48 #define XGENE_DMA_DESC_HOENQ_NUM_POS 48 - -#define XGENE_DMA_DESC_NV_SET(m) \ - (((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT) -#define XGENE_DMA_DESC_IN_SET(m) \ - (((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT) -#define XGENE_DMA_DESC_RTYPE_SET(m, v) \ - (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS)) -#define XGENE_DMA_DESC_BUFADDR_SET(m, v) \ - (((u64 *)(m))[0] |= (v)) -#define XGENE_DMA_DESC_BUFLEN_SET(m, v) \ - (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS)) -#define XGENE_DMA_DESC_C_SET(m) \ - (((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT) -#define XGENE_DMA_DESC_FLYBY_SET(m, v) \ - (((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS)) -#define XGENE_DMA_DESC_MULTI_SET(m, v, i) \ - (((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8))) -#define XGENE_DMA_DESC_DR_SET(m) \ - (((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT) -#define XGENE_DMA_DESC_DST_ADDR_SET(m, v) \ - (((u64 *)(m))[3] |= (v)) -#define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v) \ - (((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS)) #define XGENE_DMA_DESC_ELERR_RD(m) \ (((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3) #define XGENE_DMA_DESC_LERR_RD(m) \ @@ -158,14 +134,7 @@ (((elerr) << 4) | (lerr)) /* X-Gene DMA descriptor empty s/w signature */ -#define XGENE_DMA_DESC_EMPTY_INDEX 0 #define XGENE_DMA_DESC_EMPTY_SIGNATURE ~0ULL -#define XGENE_DMA_DESC_SET_EMPTY(m) \ - (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] = \ - XGENE_DMA_DESC_EMPTY_SIGNATURE) -#define XGENE_DMA_DESC_IS_EMPTY(m) \ - (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] == \ - XGENE_DMA_DESC_EMPTY_SIGNATURE) /* X-Gene DMA configurable parameters defines */ #define XGENE_DMA_RING_NUM 512 @@ -184,7 +153,7 @@ #define XGENE_DMA_XOR_ALIGNMENT 6 /* 64 Bytes */ #define XGENE_DMA_MAX_XOR_SRC 5 #define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0 -#define XGENE_DMA_INVALID_LEN_CODE 0x7800 +#define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL /* X-Gene DMA descriptor error codes */ #define ERR_DESC_AXI 0x01 @@ -214,10 +183,10 @@ #define ERR_DESC_SRC_INT 0xB /* X-Gene DMA flyby operation code */ -#define FLYBY_2SRC_XOR 0x8 -#define FLYBY_3SRC_XOR 0x9 -#define FLYBY_4SRC_XOR 0xA -#define FLYBY_5SRC_XOR 0xB +#define FLYBY_2SRC_XOR 0x80 +#define FLYBY_3SRC_XOR 0x90 +#define FLYBY_4SRC_XOR 0xA0 +#define FLYBY_5SRC_XOR 0xB0 /* X-Gene DMA SW descriptor flags */ #define XGENE_DMA_FLAG_64B_DESC BIT(0) @@ -238,10 +207,10 @@ dev_err(chan->dev, "%s: " fmt, chan->name, ##arg) struct xgene_dma_desc_hw { - u64 m0; - u64 m1; - u64 m2; - u64 m3; + __le64 m0; + __le64 m1; + __le64 m2; + __le64 m3; }; enum xgene_dma_ring_cfgsize { @@ -388,18 +357,11 @@ static bool is_pq_enabled(struct xgene_dma *pdma) return !(val & XGENE_DMA_PQ_DISABLE_MASK); } -static void xgene_dma_cpu_to_le64(u64 *desc, int count) -{ - int i; - - for (i = 0; i < count; i++) - desc[i] = cpu_to_le64(desc[i]); -} - -static u16 xgene_dma_encode_len(u32 len) +static u64 xgene_dma_encode_len(size_t len) { return (len < XGENE_DMA_MAX_BYTE_CNT) ? - len : XGENE_DMA_16K_BUFFER_LEN_CODE; + ((u64)len << XGENE_DMA_DESC_BUFLEN_POS) : + XGENE_DMA_16K_BUFFER_LEN_CODE; } static u8 xgene_dma_encode_xor_flyby(u32 src_cnt) @@ -424,34 +386,50 @@ static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring) return XGENE_DMA_RING_DESC_CNT(ring_state); } -static void xgene_dma_set_src_buffer(void *ext8, size_t *len, +static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len, dma_addr_t *paddr) { size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ? *len : XGENE_DMA_MAX_BYTE_CNT; - XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr); - XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes)); + *ext8 |= cpu_to_le64(*paddr); + *ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes)); *len -= nbytes; *paddr += nbytes; } -static void xgene_dma_invalidate_buffer(void *ext8) +static void xgene_dma_invalidate_buffer(__le64 *ext8) { - XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE); + *ext8 |= cpu_to_le64(XGENE_DMA_INVALID_LEN_CODE); } -static void *xgene_dma_lookup_ext8(u64 *desc, int idx) +static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx) { - return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1); + switch (idx) { + case 0: + return &desc->m1; + case 1: + return &desc->m0; + case 2: + return &desc->m3; + case 3: + return &desc->m2; + default: + pr_err("Invalid dma descriptor index\n"); + } + + return NULL; } -static void xgene_dma_init_desc(void *desc, u16 dst_ring_num) +static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc, + u16 dst_ring_num) { - XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */ - XGENE_DMA_DESC_IN_SET(desc); - XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num); - XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA); + desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT); + desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA << + XGENE_DMA_DESC_RTYPE_POS); + desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT); + desc->m3 |= cpu_to_le64((u64)dst_ring_num << + XGENE_DMA_DESC_HOENQ_NUM_POS); } static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, @@ -459,7 +437,7 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len) { - void *desc1, *desc2; + struct xgene_dma_desc_hw *desc1, *desc2; int i; /* Get 1st descriptor */ @@ -467,23 +445,21 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); /* Set destination address */ - XGENE_DMA_DESC_DR_SET(desc1); - XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst); + desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT); + desc1->m3 |= cpu_to_le64(dst); /* Set 1st source address */ - xgene_dma_set_src_buffer(desc1 + 8, &len, &src); + xgene_dma_set_src_buffer(&desc1->m1, &len, &src); - if (len <= 0) { - desc2 = NULL; - goto skip_additional_src; - } + if (!len) + return; /* * We need to split this source buffer, * and need to use 2nd descriptor */ desc2 = &desc_sw->desc2; - XGENE_DMA_DESC_NV_SET(desc1); + desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT); /* Set 2nd to 5th source address */ for (i = 0; i < 4 && len; i++) @@ -496,12 +472,6 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, /* Updated flag that we have prepared 64B descriptor */ desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC; - -skip_additional_src: - /* Hardware stores descriptor in little endian format */ - xgene_dma_cpu_to_le64(desc1, 4); - if (desc2) - xgene_dma_cpu_to_le64(desc2, 4); } static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, @@ -510,7 +480,7 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, u32 src_cnt, size_t *nbytes, const u8 *scf) { - void *desc1, *desc2; + struct xgene_dma_desc_hw *desc1, *desc2; size_t len = *nbytes; int i; @@ -521,28 +491,24 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); /* Set destination address */ - XGENE_DMA_DESC_DR_SET(desc1); - XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst); + desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT); + desc1->m3 |= cpu_to_le64(*dst); /* We have multiple source addresses, so need to set NV bit*/ - XGENE_DMA_DESC_NV_SET(desc1); + desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT); /* Set flyby opcode */ - XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt)); + desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt)); /* Set 1st to 5th source addresses */ for (i = 0; i < src_cnt; i++) { len = *nbytes; - xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) : + xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 : xgene_dma_lookup_ext8(desc2, i - 1), &len, &src[i]); - XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i); + desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8))); } - /* Hardware stores descriptor in little endian format */ - xgene_dma_cpu_to_le64(desc1, 4); - xgene_dma_cpu_to_le64(desc2, 4); - /* Update meta data */ *nbytes = len; *dst += XGENE_DMA_MAX_BYTE_CNT; @@ -738,7 +704,7 @@ static int xgene_chan_xfer_request(struct xgene_dma_ring *ring, * xgene_chan_xfer_ld_pending - push any pending transactions to hw * @chan : X-Gene DMA channel * - * LOCKING: must hold chan->desc_lock + * LOCKING: must hold chan->lock */ static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan) { @@ -808,7 +774,8 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) desc_hw = &ring->desc_hw[ring->head]; /* Check if this descriptor has been completed */ - if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw))) + if (unlikely(le64_to_cpu(desc_hw->m0) == + XGENE_DMA_DESC_EMPTY_SIGNATURE)) break; if (++ring->head == ring->slots) @@ -842,7 +809,7 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) iowrite32(-1, ring->cmd); /* Mark this hw descriptor as processed */ - XGENE_DMA_DESC_SET_EMPTY(desc_hw); + desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); xgene_dma_run_tx_complete_actions(chan, desc_sw); @@ -889,7 +856,7 @@ static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan) * @chan: X-Gene DMA channel * @list: the list to free * - * LOCKING: must hold chan->desc_lock + * LOCKING: must hold chan->lock */ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan, struct list_head *list) @@ -900,15 +867,6 @@ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan, xgene_dma_clean_descriptor(chan, desc); } -static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan, - struct list_head *list) -{ - struct xgene_dma_desc_sw *desc, *_desc; - - list_for_each_entry_safe(desc, _desc, list, node) - xgene_dma_clean_descriptor(chan, desc); -} - static void xgene_dma_free_chan_resources(struct dma_chan *dchan) { struct xgene_dma_chan *chan = to_dma_chan(dchan); @@ -985,7 +943,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1093,7 +1051,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1141,7 +1099,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1218,7 +1176,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1316,7 +1274,6 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring) { void *ring_cfg = ring->state; u64 addr = ring->desc_paddr; - void *desc; u32 i, val; ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE; @@ -1358,8 +1315,10 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring) /* Set empty signature to DMA Rx ring descriptors */ for (i = 0; i < ring->slots; i++) { + struct xgene_dma_desc_hw *desc; + desc = &ring->desc_hw[i]; - XGENE_DMA_DESC_SET_EMPTY(desc); + desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); } /* Enable DMA Rx ring interrupt */ diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 6517132e5d8b..99c69a3205c4 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -136,6 +136,7 @@ config QCOM_SCM bool depends on ARM || ARM64 +source "drivers/firmware/broadcom/Kconfig" source "drivers/firmware/google/Kconfig" source "drivers/firmware/efi/Kconfig" diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 3001f1ae1062..4a4b897f9314 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_QCOM_SCM) += qcom_scm.o obj-$(CONFIG_QCOM_SCM) += qcom_scm-32.o CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch_extension sec,-DREQUIRES_SEC=1) +obj-y += broadcom/ obj-$(CONFIG_GOOGLE_FIRMWARE) += google/ obj-$(CONFIG_EFI) += efi/ obj-$(CONFIG_UEFI_CPER) += efi/ diff --git a/drivers/firmware/broadcom/Kconfig b/drivers/firmware/broadcom/Kconfig new file mode 100644 index 000000000000..6bed119930dd --- /dev/null +++ b/drivers/firmware/broadcom/Kconfig @@ -0,0 +1,11 @@ +config BCM47XX_NVRAM + bool "Broadcom NVRAM driver" + depends on BCM47XX || ARCH_BCM_5301X + help + Broadcom home routers contain flash partition called "nvram" with all + important hardware configuration as well as some minor user setup. + NVRAM partition contains a text-like data representing name=value + pairs. + This driver provides an easy way to get value of requested parameter. + It simply reads content of NVRAM and parses it. It doesn't control any + hardware part itself. diff --git a/drivers/firmware/broadcom/Makefile b/drivers/firmware/broadcom/Makefile new file mode 100644 index 000000000000..d0e683583cd6 --- /dev/null +++ b/drivers/firmware/broadcom/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_BCM47XX_NVRAM) += bcm47xx_nvram.o diff --git a/arch/mips/bcm47xx/nvram.c b/drivers/firmware/broadcom/bcm47xx_nvram.c index ba632ff08a13..87add3fdce52 100644 --- a/arch/mips/bcm47xx/nvram.c +++ b/drivers/firmware/broadcom/bcm47xx_nvram.c @@ -35,6 +35,7 @@ struct nvram_header { }; static char nvram_buf[NVRAM_SPACE]; +static size_t nvram_len; static const u32 nvram_sizes[] = {0x8000, 0xF000, 0x10000}; static u32 find_nvram_size(void __iomem *end) @@ -60,7 +61,7 @@ static int nvram_find_and_copy(void __iomem *iobase, u32 lim) u32 *src, *dst; u32 size; - if (nvram_buf[0]) { + if (nvram_len) { pr_warn("nvram already initialized\n"); return -EEXIST; } @@ -94,18 +95,25 @@ static int nvram_find_and_copy(void __iomem *iobase, u32 lim) return -ENXIO; found: - if (header->len > size) - pr_err("The nvram size accoridng to the header seems to be bigger than the partition on flash\n"); - if (header->len > NVRAM_SPACE) - pr_err("nvram on flash (%i bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n", - header->len, NVRAM_SPACE); - src = (u32 *)header; dst = (u32 *)nvram_buf; for (i = 0; i < sizeof(struct nvram_header); i += 4) *dst++ = __raw_readl(src++); - for (; i < header->len && i < NVRAM_SPACE && i < size; i += 4) + header = (struct nvram_header *)nvram_buf; + nvram_len = header->len; + if (nvram_len > size) { + pr_err("The nvram size according to the header seems to be bigger than the partition on flash\n"); + nvram_len = size; + } + if (nvram_len >= NVRAM_SPACE) { + pr_err("nvram on flash (%i bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n", + header->len, NVRAM_SPACE - 1); + nvram_len = NVRAM_SPACE - 1; + } + /* proceed reading data after header */ + for (; i < nvram_len; i += 4) *dst++ = readl(src++); + nvram_buf[NVRAM_SPACE - 1] = '\0'; return 0; } @@ -146,21 +154,18 @@ static int nvram_init(void) return -ENODEV; err = mtd_read(mtd, 0, sizeof(header), &bytes_read, (uint8_t *)&header); - if (!err && header.magic == NVRAM_MAGIC) { - u8 *dst = (uint8_t *)nvram_buf; - size_t len = header.len; - - if (header.len > NVRAM_SPACE) { + if (!err && header.magic == NVRAM_MAGIC && + header.len > sizeof(header)) { + nvram_len = header.len; + if (nvram_len >= NVRAM_SPACE) { pr_err("nvram on flash (%i bytes) is bigger than the reserved space in memory, will just copy the first %i bytes\n", header.len, NVRAM_SPACE); - len = NVRAM_SPACE; + nvram_len = NVRAM_SPACE - 1; } - err = mtd_read(mtd, 0, len, &bytes_read, dst); - if (err) - return err; - - return 0; + err = mtd_read(mtd, 0, nvram_len, &nvram_len, + (u8 *)nvram_buf); + return err; } #endif @@ -170,12 +175,12 @@ static int nvram_init(void) int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len) { char *var, *value, *end, *eq; - int data_left, err; + int err; if (!name) return -EINVAL; - if (!nvram_buf[0]) { + if (!nvram_len) { err = nvram_init(); if (err) return err; @@ -183,19 +188,16 @@ int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len) /* Look for name=value and return value */ var = &nvram_buf[sizeof(struct nvram_header)]; - end = nvram_buf + sizeof(nvram_buf) - 2; - end[0] = '\0'; - end[1] = '\0'; - for (; *var; var = value + strlen(value) + 1) { - data_left = end - var; - - eq = strnchr(var, data_left, '='); + end = nvram_buf + sizeof(nvram_buf); + while (var < end && *var) { + eq = strchr(var, '='); if (!eq) break; value = eq + 1; if (eq - var == strlen(name) && strncmp(var, name, eq - var) == 0) return snprintf(val, val_len, "%s", value); + var = value + strlen(value) + 1; } return -ENOENT; } @@ -221,3 +223,26 @@ int bcm47xx_nvram_gpio_pin(const char *name) return -ENOENT; } EXPORT_SYMBOL(bcm47xx_nvram_gpio_pin); + +char *bcm47xx_nvram_get_contents(size_t *nvram_size) +{ + int err; + char *nvram; + + if (!nvram_len) { + err = nvram_init(); + if (err) + return NULL; + } + + *nvram_size = nvram_len - sizeof(struct nvram_header); + nvram = vmalloc(*nvram_size); + if (!nvram) + return NULL; + memcpy(nvram, &nvram_buf[sizeof(struct nvram_header)], *nvram_size); + + return nvram; +} +EXPORT_SYMBOL(bcm47xx_nvram_get_contents); + +MODULE_LICENSE("GPLv2"); diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 8a7d7807b596..120d81543e53 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -86,6 +86,11 @@ config IMGPDC_IRQ select GENERIC_IRQ_CHIP select IRQ_DOMAIN +config IRQ_MIPS_CPU + bool + select GENERIC_IRQ_CHIP + select IRQ_DOMAIN + config CLPS711X_IRQCHIP bool depends on ARCH_CLPS711X @@ -160,10 +165,15 @@ config MIPS_GIC bool select MIPS_CM +config INGENIC_IRQ + bool + depends on MACH_INGENIC + default y + config RENESAS_H8300H_INTC bool select IRQ_DOMAIN config RENESAS_H8S_INTC bool - select IRQ_DOMAIN
\ No newline at end of file + select IRQ_DOMAIN diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 5c9adf1f554d..b8d4e9691890 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_ARM_VIC) += irq-vic.o obj-$(CONFIG_ATMEL_AIC_IRQ) += irq-atmel-aic-common.o irq-atmel-aic.o obj-$(CONFIG_ATMEL_AIC5_IRQ) += irq-atmel-aic-common.o irq-atmel-aic5.o obj-$(CONFIG_IMGPDC_IRQ) += irq-imgpdc.o +obj-$(CONFIG_IRQ_MIPS_CPU) += irq-mips-cpu.o obj-$(CONFIG_SIRF_IRQ) += irq-sirfsoc.o obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o @@ -50,3 +51,4 @@ obj-$(CONFIG_ARCH_DIGICOLOR) += irq-digicolor.o obj-$(CONFIG_RENESAS_H8300H_INTC) += irq-renesas-h8300h.o obj-$(CONFIG_RENESAS_H8S_INTC) += irq-renesas-h8s.o obj-$(CONFIG_ARCH_SA1100) += irq-sa11x0.o +obj-$(CONFIG_INGENIC_IRQ) += irq-ingenic.o diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c new file mode 100644 index 000000000000..005de3f932ae --- /dev/null +++ b/drivers/irqchip/irq-ingenic.c @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2009-2010, Lars-Peter Clausen <[email protected]> + * JZ4740 platform IRQ support + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/irqchip/ingenic.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/timex.h> +#include <linux/slab.h> +#include <linux/delay.h> + +#include <asm/io.h> +#include <asm/mach-jz4740/irq.h> + +#include "irqchip.h" + +struct ingenic_intc_data { + void __iomem *base; + unsigned num_chips; +}; + +#define JZ_REG_INTC_STATUS 0x00 +#define JZ_REG_INTC_MASK 0x04 +#define JZ_REG_INTC_SET_MASK 0x08 +#define JZ_REG_INTC_CLEAR_MASK 0x0c +#define JZ_REG_INTC_PENDING 0x10 +#define CHIP_SIZE 0x20 + +static irqreturn_t intc_cascade(int irq, void *data) +{ + struct ingenic_intc_data *intc = irq_get_handler_data(irq); + uint32_t irq_reg; + unsigned i; + + for (i = 0; i < intc->num_chips; i++) { + irq_reg = readl(intc->base + (i * CHIP_SIZE) + + JZ_REG_INTC_PENDING); + if (!irq_reg) + continue; + + generic_handle_irq(__fls(irq_reg) + (i * 32) + JZ4740_IRQ_BASE); + } + + return IRQ_HANDLED; +} + +static void intc_irq_set_mask(struct irq_chip_generic *gc, uint32_t mask) +{ + struct irq_chip_regs *regs = &gc->chip_types->regs; + + writel(mask, gc->reg_base + regs->enable); + writel(~mask, gc->reg_base + regs->disable); +} + +void ingenic_intc_irq_suspend(struct irq_data *data) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); + intc_irq_set_mask(gc, gc->wake_active); +} + +void ingenic_intc_irq_resume(struct irq_data *data) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); + intc_irq_set_mask(gc, gc->mask_cache); +} + +static struct irqaction intc_cascade_action = { + .handler = intc_cascade, + .name = "SoC intc cascade interrupt", +}; + +static int __init ingenic_intc_of_init(struct device_node *node, + unsigned num_chips) +{ + struct ingenic_intc_data *intc; + struct irq_chip_generic *gc; + struct irq_chip_type *ct; + struct irq_domain *domain; + int parent_irq, err = 0; + unsigned i; + + intc = kzalloc(sizeof(*intc), GFP_KERNEL); + if (!intc) { + err = -ENOMEM; + goto out_err; + } + + parent_irq = irq_of_parse_and_map(node, 0); + if (!parent_irq) { + err = -EINVAL; + goto out_free; + } + + err = irq_set_handler_data(parent_irq, intc); + if (err) + goto out_unmap_irq; + + intc->num_chips = num_chips; + intc->base = of_iomap(node, 0); + if (!intc->base) { + err = -ENODEV; + goto out_unmap_irq; + } + + for (i = 0; i < num_chips; i++) { + /* Mask all irqs */ + writel(0xffffffff, intc->base + (i * CHIP_SIZE) + + JZ_REG_INTC_SET_MASK); + + gc = irq_alloc_generic_chip("INTC", 1, + JZ4740_IRQ_BASE + (i * 32), + intc->base + (i * CHIP_SIZE), + handle_level_irq); + + gc->wake_enabled = IRQ_MSK(32); + + ct = gc->chip_types; + ct->regs.enable = JZ_REG_INTC_CLEAR_MASK; + ct->regs.disable = JZ_REG_INTC_SET_MASK; + ct->chip.irq_unmask = irq_gc_unmask_enable_reg; + ct->chip.irq_mask = irq_gc_mask_disable_reg; + ct->chip.irq_mask_ack = irq_gc_mask_disable_reg; + ct->chip.irq_set_wake = irq_gc_set_wake; + ct->chip.irq_suspend = ingenic_intc_irq_suspend; + ct->chip.irq_resume = ingenic_intc_irq_resume; + + irq_setup_generic_chip(gc, IRQ_MSK(32), 0, 0, + IRQ_NOPROBE | IRQ_LEVEL); + } + + domain = irq_domain_add_legacy(node, num_chips * 32, JZ4740_IRQ_BASE, 0, + &irq_domain_simple_ops, NULL); + if (!domain) + pr_warn("unable to register IRQ domain\n"); + + setup_irq(parent_irq, &intc_cascade_action); + return 0; + +out_unmap_irq: + irq_dispose_mapping(parent_irq); +out_free: + kfree(intc); +out_err: + return err; +} + +static int __init intc_1chip_of_init(struct device_node *node, + struct device_node *parent) +{ + return ingenic_intc_of_init(node, 1); +} +IRQCHIP_DECLARE(jz4740_intc, "ingenic,jz4740-intc", intc_1chip_of_init); + +static int __init intc_2chip_of_init(struct device_node *node, + struct device_node *parent) +{ + return ingenic_intc_of_init(node, 2); +} +IRQCHIP_DECLARE(jz4770_intc, "ingenic,jz4770-intc", intc_2chip_of_init); +IRQCHIP_DECLARE(jz4775_intc, "ingenic,jz4775-intc", intc_2chip_of_init); +IRQCHIP_DECLARE(jz4780_intc, "ingenic,jz4780-intc", intc_2chip_of_init); diff --git a/arch/mips/kernel/irq_cpu.c b/drivers/irqchip/irq-mips-cpu.c index 6eb7a3f515fc..a43c41988009 100644 --- a/arch/mips/kernel/irq_cpu.c +++ b/drivers/irqchip/irq-mips-cpu.c @@ -38,6 +38,8 @@ #include <asm/mipsmtregs.h> #include <asm/setup.h> +#include "irqchip.h" + static inline void unmask_mips_irq(struct irq_data *d) { set_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE)); @@ -167,3 +169,4 @@ int __init mips_cpu_irq_of_init(struct device_node *of_node, __mips_cpu_irq_init(of_node); return 0; } +IRQCHIP_DECLARE(cpu_intc, "mti,cpu-interrupt-controller", mips_cpu_irq_of_init); diff --git a/drivers/md/md.c b/drivers/md/md.c index 8d9f89b4519d..df92d30ca054 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2628,13 +2628,14 @@ errors_show(struct md_rdev *rdev, char *page) static ssize_t errors_store(struct md_rdev *rdev, const char *buf, size_t len) { - char *e; - unsigned long n = simple_strtoul(buf, &e, 10); - if (*buf && (*e == 0 || *e == '\n')) { - atomic_set(&rdev->corrected_errors, n); - return len; - } - return -EINVAL; + unsigned int n; + int rv; + + rv = kstrtouint(buf, 10, &n); + if (rv < 0) + return rv; + atomic_set(&rdev->corrected_errors, n); + return len; } static struct rdev_sysfs_entry rdev_errors = __ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store); @@ -2651,13 +2652,16 @@ slot_show(struct md_rdev *rdev, char *page) static ssize_t slot_store(struct md_rdev *rdev, const char *buf, size_t len) { - char *e; + int slot; int err; - int slot = simple_strtoul(buf, &e, 10); + if (strncmp(buf, "none", 4)==0) slot = -1; - else if (e==buf || (*e && *e!= '\n')) - return -EINVAL; + else { + err = kstrtouint(buf, 10, (unsigned int *)&slot); + if (err < 0) + return err; + } if (rdev->mddev->pers && slot == -1) { /* Setting 'slot' on an active array requires also * updating the 'rd%d' link, and communicating @@ -3542,12 +3546,12 @@ layout_show(struct mddev *mddev, char *page) static ssize_t layout_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; - unsigned long n = simple_strtoul(buf, &e, 10); + unsigned int n; int err; - if (!*buf || (*e && *e != '\n')) - return -EINVAL; + err = kstrtouint(buf, 10, &n); + if (err < 0) + return err; err = mddev_lock(mddev); if (err) return err; @@ -3591,12 +3595,12 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks); static ssize_t raid_disks_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; + unsigned int n; int err; - unsigned long n = simple_strtoul(buf, &e, 10); - if (!*buf || (*e && *e != '\n')) - return -EINVAL; + err = kstrtouint(buf, 10, &n); + if (err < 0) + return err; err = mddev_lock(mddev); if (err) @@ -3643,12 +3647,12 @@ chunk_size_show(struct mddev *mddev, char *page) static ssize_t chunk_size_store(struct mddev *mddev, const char *buf, size_t len) { + unsigned long n; int err; - char *e; - unsigned long n = simple_strtoul(buf, &e, 10); - if (!*buf || (*e && *e != '\n')) - return -EINVAL; + err = kstrtoul(buf, 10, &n); + if (err < 0) + return err; err = mddev_lock(mddev); if (err) @@ -3686,19 +3690,24 @@ resync_start_show(struct mddev *mddev, char *page) static ssize_t resync_start_store(struct mddev *mddev, const char *buf, size_t len) { + unsigned long long n; int err; - char *e; - unsigned long long n = simple_strtoull(buf, &e, 10); + + if (cmd_match(buf, "none")) + n = MaxSector; + else { + err = kstrtoull(buf, 10, &n); + if (err < 0) + return err; + if (n != (sector_t)n) + return -EINVAL; + } err = mddev_lock(mddev); if (err) return err; if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) err = -EBUSY; - else if (cmd_match(buf, "none")) - n = MaxSector; - else if (!*buf || (*e && *e != '\n')) - err = -EINVAL; if (!err) { mddev->recovery_cp = n; @@ -3934,14 +3943,14 @@ max_corrected_read_errors_show(struct mddev *mddev, char *page) { static ssize_t max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; - unsigned long n = simple_strtoul(buf, &e, 10); + unsigned int n; + int rv; - if (*buf && (*e == 0 || *e == '\n')) { - atomic_set(&mddev->max_corr_read_errors, n); - return len; - } - return -EINVAL; + rv = kstrtouint(buf, 10, &n); + if (rv < 0) + return rv; + atomic_set(&mddev->max_corr_read_errors, n); + return len; } static struct md_sysfs_entry max_corr_read_errors = @@ -4003,8 +4012,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) else rdev = md_import_device(dev, -1, -1); - if (IS_ERR(rdev)) + if (IS_ERR(rdev)) { + mddev_unlock(mddev); return PTR_ERR(rdev); + } err = bind_rdev_to_array(rdev, mddev); out: if (err) @@ -4298,15 +4309,18 @@ sync_min_show(struct mddev *mddev, char *page) static ssize_t sync_min_store(struct mddev *mddev, const char *buf, size_t len) { - int min; - char *e; + unsigned int min; + int rv; + if (strncmp(buf, "system", 6)==0) { - mddev->sync_speed_min = 0; - return len; + min = 0; + } else { + rv = kstrtouint(buf, 10, &min); + if (rv < 0) + return rv; + if (min == 0) + return -EINVAL; } - min = simple_strtoul(buf, &e, 10); - if (buf == e || (*e && *e != '\n') || min <= 0) - return -EINVAL; mddev->sync_speed_min = min; return len; } @@ -4324,15 +4338,18 @@ sync_max_show(struct mddev *mddev, char *page) static ssize_t sync_max_store(struct mddev *mddev, const char *buf, size_t len) { - int max; - char *e; + unsigned int max; + int rv; + if (strncmp(buf, "system", 6)==0) { - mddev->sync_speed_max = 0; - return len; + max = 0; + } else { + rv = kstrtouint(buf, 10, &max); + if (rv < 0) + return rv; + if (max == 0) + return -EINVAL; } - max = simple_strtoul(buf, &e, 10); - if (buf == e || (*e && *e != '\n') || max <= 0) - return -EINVAL; mddev->sync_speed_max = max; return len; } @@ -4515,12 +4532,13 @@ suspend_lo_show(struct mddev *mddev, char *page) static ssize_t suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; - unsigned long long new = simple_strtoull(buf, &e, 10); - unsigned long long old; + unsigned long long old, new; int err; - if (buf == e || (*e && *e != '\n')) + err = kstrtoull(buf, 10, &new); + if (err < 0) + return err; + if (new != (sector_t)new) return -EINVAL; err = mddev_lock(mddev); @@ -4557,12 +4575,13 @@ suspend_hi_show(struct mddev *mddev, char *page) static ssize_t suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; - unsigned long long new = simple_strtoull(buf, &e, 10); - unsigned long long old; + unsigned long long old, new; int err; - if (buf == e || (*e && *e != '\n')) + err = kstrtoull(buf, 10, &new); + if (err < 0) + return err; + if (new != (sector_t)new) return -EINVAL; err = mddev_lock(mddev); @@ -4604,11 +4623,13 @@ static ssize_t reshape_position_store(struct mddev *mddev, const char *buf, size_t len) { struct md_rdev *rdev; - char *e; + unsigned long long new; int err; - unsigned long long new = simple_strtoull(buf, &e, 10); - if (buf == e || (*e && *e != '\n')) + err = kstrtoull(buf, 10, &new); + if (err < 0) + return err; + if (new != (sector_t)new) return -EINVAL; err = mddev_lock(mddev); if (err) @@ -5157,6 +5178,7 @@ int md_run(struct mddev *mddev) mddev_detach(mddev); if (mddev->private) pers->free(mddev, mddev->private); + mddev->private = NULL; module_put(pers->owner); bitmap_destroy(mddev); return err; @@ -5292,6 +5314,7 @@ static void md_clean(struct mddev *mddev) mddev->changed = 0; mddev->degraded = 0; mddev->safemode = 0; + mddev->private = NULL; mddev->merge_check_needed = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; @@ -5364,6 +5387,7 @@ static void __md_stop(struct mddev *mddev) mddev->pers = NULL; spin_unlock(&mddev->lock); pers->free(mddev, mddev->private); + mddev->private = NULL; if (pers->sync_request && mddev->to_remove == NULL) mddev->to_remove = &md_redundancy_group; module_put(pers->owner); @@ -6373,7 +6397,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->ctime != info->ctime || mddev->level != info->level || /* mddev->layout != info->layout || */ - !mddev->persistent != info->not_persistent|| + mddev->persistent != !info->not_persistent || mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) @@ -8104,6 +8128,15 @@ void md_check_recovery(struct mddev *mddev) int spares = 0; if (mddev->ro) { + struct md_rdev *rdev; + if (!mddev->external && mddev->in_sync) + /* 'Blocked' flag not needed as failed devices + * will be recorded if array switched to read/write. + * Leaving it set will prevent the device + * from being removed. + */ + rdev_for_each(rdev, mddev) + clear_bit(Blocked, &rdev->flags); /* On a read-only array we can: * - remove failed devices * - add already-in_sync devices if the array itself @@ -9011,13 +9044,7 @@ static int get_ro(char *buffer, struct kernel_param *kp) } static int set_ro(const char *val, struct kernel_param *kp) { - char *e; - int num = simple_strtoul(val, &e, 10); - if (*val && (*e == '\0' || *e == '\n')) { - start_readonly = num; - return 0; - } - return -EINVAL; + return kstrtouint(val, 10, (unsigned int *)&start_readonly); } module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 188d8e9a6bdc..940f2f365461 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2099,17 +2099,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) tbio->bi_rw = WRITE; tbio->bi_private = r10_bio; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; - - for (j=0; j < vcnt ; j++) { - tbio->bi_io_vec[j].bv_offset = 0; - tbio->bi_io_vec[j].bv_len = PAGE_SIZE; - - memcpy(page_address(tbio->bi_io_vec[j].bv_page), - page_address(fbio->bi_io_vec[j].bv_page), - PAGE_SIZE); - } tbio->bi_end_io = end_sync_write; + bio_copy_data(tbio, fbio); + d = r10_bio->devs[i].devnum; atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); @@ -2124,17 +2117,14 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) * that are active */ for (i = 0; i < conf->copies; i++) { - int j, d; + int d; tbio = r10_bio->devs[i].repl_bio; if (!tbio || !tbio->bi_end_io) continue; if (r10_bio->devs[i].bio->bi_end_io != end_sync_write && r10_bio->devs[i].bio != fbio) - for (j = 0; j < vcnt; j++) - memcpy(page_address(tbio->bi_io_vec[j].bv_page), - page_address(fbio->bi_io_vec[j].bv_page), - PAGE_SIZE); + bio_copy_data(tbio, fbio); d = r10_bio->devs[i].devnum; atomic_inc(&r10_bio->remaining); md_sync_acct(conf->mirrors[d].replacement->bdev, diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b6793d2e051f..59e44e99eef3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -344,7 +344,8 @@ static void release_inactive_stripe_list(struct r5conf *conf, int hash) { int size; - bool do_wakeup = false; + unsigned long do_wakeup = 0; + int i = 0; unsigned long flags; if (hash == NR_STRIPE_HASH_LOCKS) { @@ -365,15 +366,21 @@ static void release_inactive_stripe_list(struct r5conf *conf, !list_empty(list)) atomic_dec(&conf->empty_inactive_list_nr); list_splice_tail_init(list, conf->inactive_list + hash); - do_wakeup = true; + do_wakeup |= 1 << hash; spin_unlock_irqrestore(conf->hash_locks + hash, flags); } size--; hash--; } + for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { + if (do_wakeup & (1 << i)) + wake_up(&conf->wait_for_stripe[i]); + } + if (do_wakeup) { - wake_up(&conf->wait_for_stripe); + if (atomic_read(&conf->active_stripes) == 0) + wake_up(&conf->wait_for_quiescent); if (conf->retry_read_aligned) md_wakeup_thread(conf->mddev->thread); } @@ -667,15 +674,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector, spin_lock_irq(conf->hash_locks + hash); do { - wait_event_lock_irq(conf->wait_for_stripe, + wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0 || noquiesce, *(conf->hash_locks + hash)); sh = __find_stripe(conf, sector, conf->generation - previous); if (!sh) { if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { sh = get_free_stripe(conf, hash); - if (!sh && llist_empty(&conf->released_stripes) && - !test_bit(R5_DID_ALLOC, &conf->cache_state)) + if (!sh && !test_bit(R5_DID_ALLOC, + &conf->cache_state)) set_bit(R5_ALLOC_MORE, &conf->cache_state); } @@ -684,14 +691,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector, if (!sh) { set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - wait_event_lock_irq( - conf->wait_for_stripe, + wait_event_exclusive_cmd( + conf->wait_for_stripe[hash], !list_empty(conf->inactive_list + hash) && (atomic_read(&conf->active_stripes) < (conf->max_nr_stripes * 3 / 4) || !test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)), - *(conf->hash_locks + hash)); + spin_unlock_irq(conf->hash_locks + hash), + spin_lock_irq(conf->hash_locks + hash)); clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); } else { @@ -716,6 +724,9 @@ get_active_stripe(struct r5conf *conf, sector_t sector, } } while (sh == NULL); + if (!list_empty(conf->inactive_list + hash)) + wake_up(&conf->wait_for_stripe[hash]); + spin_unlock_irq(conf->hash_locks + hash); return sh; } @@ -2177,7 +2188,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) cnt = 0; list_for_each_entry(nsh, &newstripes, lru) { lock_device_hash_lock(conf, hash); - wait_event_cmd(conf->wait_for_stripe, + wait_event_exclusive_cmd(conf->wait_for_stripe[hash], !list_empty(conf->inactive_list + hash), unlock_device_hash_lock(conf, hash), lock_device_hash_lock(conf, hash)); @@ -4760,7 +4771,7 @@ static void raid5_align_endio(struct bio *bi, int error) raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) - wake_up(&conf->wait_for_stripe); + wake_up(&conf->wait_for_quiescent); return; } @@ -4855,7 +4866,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) align_bi->bi_iter.bi_sector += rdev->data_offset; spin_lock_irq(&conf->device_lock); - wait_event_lock_irq(conf->wait_for_stripe, + wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, conf->device_lock); atomic_inc(&conf->active_aligned_reads); @@ -5699,7 +5710,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) bio_endio(raid_bio, 0); } if (atomic_dec_and_test(&conf->active_aligned_reads)) - wake_up(&conf->wait_for_stripe); + wake_up(&conf->wait_for_quiescent); return handled; } @@ -6433,7 +6444,10 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; spin_lock_init(&conf->device_lock); seqcount_init(&conf->gen_lock); - init_waitqueue_head(&conf->wait_for_stripe); + init_waitqueue_head(&conf->wait_for_quiescent); + for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { + init_waitqueue_head(&conf->wait_for_stripe[i]); + } init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->hold_list); @@ -7466,7 +7480,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) * active stripes can drain */ conf->quiesce = 2; - wait_event_cmd(conf->wait_for_stripe, + wait_event_cmd(conf->wait_for_quiescent, atomic_read(&conf->active_stripes) == 0 && atomic_read(&conf->active_aligned_reads) == 0, unlock_all_device_hash_locks_irq(conf), @@ -7480,7 +7494,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) case 0: /* re-enable writes */ lock_all_device_hash_locks_irq(conf); conf->quiesce = 0; - wake_up(&conf->wait_for_stripe); + wake_up(&conf->wait_for_quiescent); wake_up(&conf->wait_for_overlap); unlock_all_device_hash_locks_irq(conf); break; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 896d603ad0da..02c3bf8fbfe7 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -511,7 +511,8 @@ struct r5conf { struct list_head inactive_list[NR_STRIPE_HASH_LOCKS]; atomic_t empty_inactive_list_nr; struct llist_head released_stripes; - wait_queue_head_t wait_for_stripe; + wait_queue_head_t wait_for_quiescent; + wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS]; wait_queue_head_t wait_for_overlap; unsigned long cache_state; #define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked, diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig new file mode 100644 index 000000000000..72226acb5c0f --- /dev/null +++ b/drivers/nvdimm/Kconfig @@ -0,0 +1,68 @@ +menuconfig LIBNVDIMM + tristate "NVDIMM (Non-Volatile Memory Device) Support" + depends on PHYS_ADDR_T_64BIT + depends on BLK_DEV + help + Generic support for non-volatile memory devices including + ACPI-6-NFIT defined resources. On platforms that define an + NFIT, or otherwise can discover NVDIMM resources, a libnvdimm + bus is registered to advertise PMEM (persistent memory) + namespaces (/dev/pmemX) and BLK (sliding mmio window(s)) + namespaces (/dev/ndblkX.Y). A PMEM namespace refers to a + memory resource that may span multiple DIMMs and support DAX + (see CONFIG_DAX). A BLK namespace refers to an NVDIMM control + region which exposes an mmio register set for windowed access + mode to non-volatile memory. + +if LIBNVDIMM + +config BLK_DEV_PMEM + tristate "PMEM: Persistent memory block device support" + default LIBNVDIMM + depends on HAS_IOMEM + select ND_BTT if BTT + help + Memory ranges for PMEM are described by either an NFIT + (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a + non-standard OEM-specific E820 memory type (type-12, see + CONFIG_X86_PMEM_LEGACY), or it is manually specified by the + 'memmap=nn[KMG]!ss[KMG]' kernel command line (see + Documentation/kernel-parameters.txt). This driver converts + these persistent memory ranges into block devices that are + capable of DAX (direct-access) file system mappings. See + Documentation/nvdimm/nvdimm.txt for more details. + + Say Y if you want to use an NVDIMM + +config ND_BLK + tristate "BLK: Block data window (aperture) device support" + default LIBNVDIMM + select ND_BTT if BTT + help + Support NVDIMMs, or other devices, that implement a BLK-mode + access capability. BLK-mode access uses memory-mapped-i/o + apertures to access persistent media. + + Say Y if your platform firmware emits an ACPI.NFIT table + (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode + capabilities. + +config ND_BTT + tristate + +config BTT + bool "BTT: Block Translation Table (atomic sector updates)" + default y if LIBNVDIMM + help + The Block Translation Table (BTT) provides atomic sector + update semantics for persistent memory devices, so that + applications that rely on sector writes not being torn (a + guarantee that typical disks provide) can continue to do so. + The BTT manifests itself as an alternate personality for an + NVDIMM namespace, i.e. a namespace can be in raw mode (pmemX, + ndblkX.Y, etc...), or 'sectored' mode, (pmemXs, ndblkX.Ys, + etc...). + + Select Y if unsure + +endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile new file mode 100644 index 000000000000..594bb97c867a --- /dev/null +++ b/drivers/nvdimm/Makefile @@ -0,0 +1,20 @@ +obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o +obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o +obj-$(CONFIG_ND_BTT) += nd_btt.o +obj-$(CONFIG_ND_BLK) += nd_blk.o + +nd_pmem-y := pmem.o + +nd_btt-y := btt.o + +nd_blk-y := blk.o + +libnvdimm-y := core.o +libnvdimm-y += bus.o +libnvdimm-y += dimm_devs.o +libnvdimm-y += dimm.o +libnvdimm-y += region_devs.o +libnvdimm-y += region.o +libnvdimm-y += namespace_devs.o +libnvdimm-y += label.o +libnvdimm-$(CONFIG_BTT) += btt_devs.o diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c new file mode 100644 index 000000000000..4f97b248c236 --- /dev/null +++ b/drivers/nvdimm/blk.c @@ -0,0 +1,384 @@ +/* + * NVDIMM Block Window Driver + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/blkdev.h> +#include <linux/fs.h> +#include <linux/genhd.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/nd.h> +#include <linux/sizes.h> +#include "nd.h" + +struct nd_blk_device { + struct request_queue *queue; + struct gendisk *disk; + struct nd_namespace_blk *nsblk; + struct nd_blk_region *ndbr; + size_t disk_size; + u32 sector_size; + u32 internal_lbasize; +}; + +static int nd_blk_major; + +static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) +{ + return blk_dev->nsblk->lbasize - blk_dev->sector_size; +} + +static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, + resource_size_t ns_offset, unsigned int len) +{ + int i; + + for (i = 0; i < nsblk->num_resources; i++) { + if (ns_offset < resource_size(nsblk->res[i])) { + if (ns_offset + len > resource_size(nsblk->res[i])) { + dev_WARN_ONCE(&nsblk->common.dev, 1, + "illegal request\n"); + return SIZE_MAX; + } + return nsblk->res[i]->start + ns_offset; + } + ns_offset -= resource_size(nsblk->res[i]); + } + + dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n"); + return SIZE_MAX; +} + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, u64 lba, + int rw) +{ + unsigned int len = nd_blk_meta_size(blk_dev); + resource_size_t dev_offset, ns_offset; + struct nd_namespace_blk *nsblk; + struct nd_blk_region *ndbr; + int err = 0; + + nsblk = blk_dev->nsblk; + ndbr = blk_dev->ndbr; + ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; + dev_offset = to_dev_offset(nsblk, ns_offset, len); + if (dev_offset == SIZE_MAX) + return -EIO; + + while (len) { + unsigned int cur_len; + struct bio_vec bv; + void *iobuf; + + bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); + /* + * The 'bv' obtained from bvec_iter_bvec has its .bv_len and + * .bv_offset already adjusted for iter->bi_bvec_done, and we + * can use those directly + */ + + cur_len = min(len, bv.bv_len); + iobuf = kmap_atomic(bv.bv_page); + err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset, + cur_len, rw); + kunmap_atomic(iobuf); + if (err) + return err; + + len -= cur_len; + dev_offset += cur_len; + bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); + } + + return err; +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, u64 lba, + int rw) +{ + return 0; +} +#endif + +static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, struct page *page, + unsigned int len, unsigned int off, int rw, + sector_t sector) +{ + struct nd_blk_region *ndbr = blk_dev->ndbr; + resource_size_t dev_offset, ns_offset; + int err = 0; + void *iobuf; + u64 lba; + + while (len) { + unsigned int cur_len; + + /* + * If we don't have an integrity payload, we don't have to + * split the bvec into sectors, as this would cause unnecessary + * Block Window setup/move steps. the do_io routine is capable + * of handling len <= PAGE_SIZE. + */ + cur_len = bip ? min(len, blk_dev->sector_size) : len; + + lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); + ns_offset = lba * blk_dev->internal_lbasize; + dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); + if (dev_offset == SIZE_MAX) + return -EIO; + + iobuf = kmap_atomic(page); + err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw); + kunmap_atomic(iobuf); + if (err) + return err; + + if (bip) { + err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); + if (err) + return err; + } + len -= cur_len; + off += cur_len; + sector += blk_dev->sector_size >> SECTOR_SHIFT; + } + + return err; +} + +static void nd_blk_make_request(struct request_queue *q, struct bio *bio) +{ + struct block_device *bdev = bio->bi_bdev; + struct gendisk *disk = bdev->bd_disk; + struct bio_integrity_payload *bip; + struct nd_blk_device *blk_dev; + struct bvec_iter iter; + unsigned long start; + struct bio_vec bvec; + int err = 0, rw; + bool do_acct; + + /* + * bio_integrity_enabled also checks if the bio already has an + * integrity payload attached. If it does, we *don't* do a + * bio_integrity_prep here - the payload has been generated by + * another kernel subsystem, and we just pass it through. + */ + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + err = -EIO; + goto out; + } + + bip = bio_integrity(bio); + blk_dev = disk->private_data; + rw = bio_data_dir(bio); + do_acct = nd_iostat_start(bio, &start); + bio_for_each_segment(bvec, bio, iter) { + unsigned int len = bvec.bv_len; + + BUG_ON(len > PAGE_SIZE); + err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, + bvec.bv_offset, rw, iter.bi_sector); + if (err) { + dev_info(&blk_dev->nsblk->common.dev, + "io error in %s sector %lld, len %d,\n", + (rw == READ) ? "READ" : "WRITE", + (unsigned long long) iter.bi_sector, len); + break; + } + } + if (do_acct) + nd_iostat_end(bio, start); + + out: + bio_endio(bio, err); +} + +static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *iobuf, size_t n, int rw) +{ + struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); + struct nd_namespace_blk *nsblk = blk_dev->nsblk; + struct nd_blk_region *ndbr = blk_dev->ndbr; + resource_size_t dev_offset; + + dev_offset = to_dev_offset(nsblk, offset, n); + + if (unlikely(offset + n > blk_dev->disk_size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (dev_offset == SIZE_MAX) + return -EIO; + + return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw); +} + +static const struct block_device_operations nd_blk_fops = { + .owner = THIS_MODULE, + .revalidate_disk = nvdimm_revalidate_disk, +}; + +static int nd_blk_attach_disk(struct nd_namespace_common *ndns, + struct nd_blk_device *blk_dev) +{ + resource_size_t available_disk_size; + struct gendisk *disk; + u64 internal_nlba; + + internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); + available_disk_size = internal_nlba * blk_dev->sector_size; + + blk_dev->queue = blk_alloc_queue(GFP_KERNEL); + if (!blk_dev->queue) + return -ENOMEM; + + blk_queue_make_request(blk_dev->queue, nd_blk_make_request); + blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); + blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); + blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); + + disk = blk_dev->disk = alloc_disk(0); + if (!disk) { + blk_cleanup_queue(blk_dev->queue); + return -ENOMEM; + } + + disk->driverfs_dev = &ndns->dev; + disk->major = nd_blk_major; + disk->first_minor = 0; + disk->fops = &nd_blk_fops; + disk->private_data = blk_dev; + disk->queue = blk_dev->queue; + disk->flags = GENHD_FL_EXT_DEVT; + nvdimm_namespace_disk_name(ndns, disk->disk_name); + set_capacity(disk, 0); + add_disk(disk); + + if (nd_blk_meta_size(blk_dev)) { + int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); + + if (rc) { + del_gendisk(disk); + put_disk(disk); + blk_cleanup_queue(blk_dev->queue); + return rc; + } + } + + set_capacity(disk, available_disk_size >> SECTOR_SHIFT); + revalidate_disk(disk); + return 0; +} + +static int nd_blk_probe(struct device *dev) +{ + struct nd_namespace_common *ndns; + struct nd_namespace_blk *nsblk; + struct nd_blk_device *blk_dev; + int rc; + + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); + + blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL); + if (!blk_dev) + return -ENOMEM; + + nsblk = to_nd_namespace_blk(&ndns->dev); + blk_dev->disk_size = nvdimm_namespace_capacity(ndns); + blk_dev->ndbr = to_nd_blk_region(dev->parent); + blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); + blk_dev->internal_lbasize = roundup(nsblk->lbasize, + INT_LBASIZE_ALIGNMENT); + blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); + dev_set_drvdata(dev, blk_dev); + + ndns->rw_bytes = nd_blk_rw_bytes; + if (is_nd_btt(dev)) + rc = nvdimm_namespace_attach_btt(ndns); + else if (nd_btt_probe(ndns, blk_dev) == 0) { + /* we'll come back as btt-blk */ + rc = -ENXIO; + } else + rc = nd_blk_attach_disk(ndns, blk_dev); + if (rc) + kfree(blk_dev); + return rc; +} + +static void nd_blk_detach_disk(struct nd_blk_device *blk_dev) +{ + del_gendisk(blk_dev->disk); + put_disk(blk_dev->disk); + blk_cleanup_queue(blk_dev->queue); +} + +static int nd_blk_remove(struct device *dev) +{ + struct nd_blk_device *blk_dev = dev_get_drvdata(dev); + + if (is_nd_btt(dev)) + nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + else + nd_blk_detach_disk(blk_dev); + kfree(blk_dev); + + return 0; +} + +static struct nd_device_driver nd_blk_driver = { + .probe = nd_blk_probe, + .remove = nd_blk_remove, + .drv = { + .name = "nd_blk", + }, + .type = ND_DRIVER_NAMESPACE_BLK, +}; + +static int __init nd_blk_init(void) +{ + int rc; + + rc = register_blkdev(0, "nd_blk"); + if (rc < 0) + return rc; + + nd_blk_major = rc; + rc = nd_driver_register(&nd_blk_driver); + + if (rc < 0) + unregister_blkdev(nd_blk_major, "nd_blk"); + + return rc; +} + +static void __exit nd_blk_exit(void) +{ + driver_unregister(&nd_blk_driver.drv); + unregister_blkdev(nd_blk_major, "nd_blk"); +} + +MODULE_AUTHOR("Ross Zwisler <[email protected]>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK); +module_init(nd_blk_init); +module_exit(nd_blk_exit); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c new file mode 100644 index 000000000000..411c7b2bb37a --- /dev/null +++ b/drivers/nvdimm/btt.c @@ -0,0 +1,1479 @@ +/* + * Block Translation Table + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/highmem.h> +#include <linux/debugfs.h> +#include <linux/blkdev.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/hdreg.h> +#include <linux/genhd.h> +#include <linux/sizes.h> +#include <linux/ndctl.h> +#include <linux/fs.h> +#include <linux/nd.h> +#include "btt.h" +#include "nd.h" + +enum log_ent_request { + LOG_NEW_ENT = 0, + LOG_OLD_ENT +}; + +static int btt_major; + +static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, + void *buf, size_t n) +{ + struct nd_btt *nd_btt = arena->nd_btt; + struct nd_namespace_common *ndns = nd_btt->ndns; + + /* arena offsets are 4K from the base of the device */ + offset += SZ_4K; + return nvdimm_read_bytes(ndns, offset, buf, n); +} + +static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, + void *buf, size_t n) +{ + struct nd_btt *nd_btt = arena->nd_btt; + struct nd_namespace_common *ndns = nd_btt->ndns; + + /* arena offsets are 4K from the base of the device */ + offset += SZ_4K; + return nvdimm_write_bytes(ndns, offset, buf, n); +} + +static int btt_info_write(struct arena_info *arena, struct btt_sb *super) +{ + int ret; + + ret = arena_write_bytes(arena, arena->info2off, super, + sizeof(struct btt_sb)); + if (ret) + return ret; + + return arena_write_bytes(arena, arena->infooff, super, + sizeof(struct btt_sb)); +} + +static int btt_info_read(struct arena_info *arena, struct btt_sb *super) +{ + WARN_ON(!super); + return arena_read_bytes(arena, arena->infooff, super, + sizeof(struct btt_sb)); +} + +/* + * 'raw' version of btt_map write + * Assumptions: + * mapping is in little-endian + * mapping contains 'E' and 'Z' flags as desired + */ +static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping) +{ + u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); + + WARN_ON(lba >= arena->external_nlba); + return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE); +} + +static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, + u32 z_flag, u32 e_flag) +{ + u32 ze; + __le32 mapping_le; + + /* + * This 'mapping' is supposed to be just the LBA mapping, without + * any flags set, so strip the flag bits. + */ + mapping &= MAP_LBA_MASK; + + ze = (z_flag << 1) + e_flag; + switch (ze) { + case 0: + /* + * We want to set neither of the Z or E flags, and + * in the actual layout, this means setting the bit + * positions of both to '1' to indicate a 'normal' + * map entry + */ + mapping |= MAP_ENT_NORMAL; + break; + case 1: + mapping |= (1 << MAP_ERR_SHIFT); + break; + case 2: + mapping |= (1 << MAP_TRIM_SHIFT); + break; + default: + /* + * The case where Z and E are both sent in as '1' could be + * construed as a valid 'normal' case, but we decide not to, + * to avoid confusion + */ + WARN_ONCE(1, "Invalid use of Z and E flags\n"); + return -EIO; + } + + mapping_le = cpu_to_le32(mapping); + return __btt_map_write(arena, lba, mapping_le); +} + +static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, + int *trim, int *error) +{ + int ret; + __le32 in; + u32 raw_mapping, postmap, ze, z_flag, e_flag; + u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); + + WARN_ON(lba >= arena->external_nlba); + + ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE); + if (ret) + return ret; + + raw_mapping = le32_to_cpu(in); + + z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT; + e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT; + ze = (z_flag << 1) + e_flag; + postmap = raw_mapping & MAP_LBA_MASK; + + /* Reuse the {z,e}_flag variables for *trim and *error */ + z_flag = 0; + e_flag = 0; + + switch (ze) { + case 0: + /* Initial state. Return postmap = premap */ + *mapping = lba; + break; + case 1: + *mapping = postmap; + e_flag = 1; + break; + case 2: + *mapping = postmap; + z_flag = 1; + break; + case 3: + *mapping = postmap; + break; + default: + return -EIO; + } + + if (trim) + *trim = z_flag; + if (error) + *error = e_flag; + + return ret; +} + +static int btt_log_read_pair(struct arena_info *arena, u32 lane, + struct log_entry *ent) +{ + WARN_ON(!ent); + return arena_read_bytes(arena, + arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, + 2 * LOG_ENT_SIZE); +} + +static struct dentry *debugfs_root; + +static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, + int idx) +{ + char dirname[32]; + struct dentry *d; + + /* If for some reason, parent bttN was not created, exit */ + if (!parent) + return; + + snprintf(dirname, 32, "arena%d", idx); + d = debugfs_create_dir(dirname, parent); + if (IS_ERR_OR_NULL(d)) + return; + a->debugfs_dir = d; + + debugfs_create_x64("size", S_IRUGO, d, &a->size); + debugfs_create_x64("external_lba_start", S_IRUGO, d, + &a->external_lba_start); + debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba); + debugfs_create_u32("internal_lbasize", S_IRUGO, d, + &a->internal_lbasize); + debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba); + debugfs_create_u32("external_lbasize", S_IRUGO, d, + &a->external_lbasize); + debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree); + debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major); + debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor); + debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff); + debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff); + debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff); + debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff); + debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); + debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); + debugfs_create_x32("flags", S_IRUGO, d, &a->flags); +} + +static void btt_debugfs_init(struct btt *btt) +{ + int i = 0; + struct arena_info *arena; + + btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev), + debugfs_root); + if (IS_ERR_OR_NULL(btt->debugfs_dir)) + return; + + list_for_each_entry(arena, &btt->arena_list, list) { + arena_debugfs_init(arena, btt->debugfs_dir, i); + i++; + } +} + +/* + * This function accepts two log entries, and uses the + * sequence number to find the 'older' entry. + * It also updates the sequence number in this old entry to + * make it the 'new' one if the mark_flag is set. + * Finally, it returns which of the entries was the older one. + * + * TODO The logic feels a bit kludge-y. make it better.. + */ +static int btt_log_get_old(struct log_entry *ent) +{ + int old; + + /* + * the first ever time this is seen, the entry goes into [0] + * the next time, the following logic works out to put this + * (next) entry into [1] + */ + if (ent[0].seq == 0) { + ent[0].seq = cpu_to_le32(1); + return 0; + } + + if (ent[0].seq == ent[1].seq) + return -EINVAL; + if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + return -EINVAL; + + if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { + if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + old = 0; + else + old = 1; + } else { + if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + old = 1; + else + old = 0; + } + + return old; +} + +static struct device *to_dev(struct arena_info *arena) +{ + return &arena->nd_btt->dev; +} + +/* + * This function copies the desired (old/new) log entry into ent if + * it is not NULL. It returns the sub-slot number (0 or 1) + * where the desired log entry was found. Negative return values + * indicate errors. + */ +static int btt_log_read(struct arena_info *arena, u32 lane, + struct log_entry *ent, int old_flag) +{ + int ret; + int old_ent, ret_ent; + struct log_entry log[2]; + + ret = btt_log_read_pair(arena, lane, log); + if (ret) + return -EIO; + + old_ent = btt_log_get_old(log); + if (old_ent < 0 || old_ent > 1) { + dev_info(to_dev(arena), + "log corruption (%d): lane %d seq [%d, %d]\n", + old_ent, lane, log[0].seq, log[1].seq); + /* TODO set error state? */ + return -EIO; + } + + ret_ent = (old_flag ? old_ent : (1 - old_ent)); + + if (ent != NULL) + memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + + return ret_ent; +} + +/* + * This function commits a log entry to media + * It does _not_ prepare the freelist entry for the next write + * btt_flog_write is the wrapper for updating the freelist elements + */ +static int __btt_log_write(struct arena_info *arena, u32 lane, + u32 sub, struct log_entry *ent) +{ + int ret; + /* + * Ignore the padding in log_entry for calculating log_half. + * The entry is 'committed' when we write the sequence number, + * and we want to ensure that that is the last thing written. + * We don't bother writing the padding as that would be extra + * media wear and write amplification + */ + unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; + u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + void *src = ent; + + /* split the 16B write into atomic, durable halves */ + ret = arena_write_bytes(arena, ns_off, src, log_half); + if (ret) + return ret; + + ns_off += log_half; + src += log_half; + return arena_write_bytes(arena, ns_off, src, log_half); +} + +static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, + struct log_entry *ent) +{ + int ret; + + ret = __btt_log_write(arena, lane, sub, ent); + if (ret) + return ret; + + /* prepare the next free entry */ + arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; + if (++(arena->freelist[lane].seq) == 4) + arena->freelist[lane].seq = 1; + arena->freelist[lane].block = le32_to_cpu(ent->old_map); + + return ret; +} + +/* + * This function initializes the BTT map to the initial state, which is + * all-zeroes, and indicates an identity mapping + */ +static int btt_map_init(struct arena_info *arena) +{ + int ret = -EINVAL; + void *zerobuf; + size_t offset = 0; + size_t chunk_size = SZ_2M; + size_t mapsize = arena->logoff - arena->mapoff; + + zerobuf = kzalloc(chunk_size, GFP_KERNEL); + if (!zerobuf) + return -ENOMEM; + + while (mapsize) { + size_t size = min(mapsize, chunk_size); + + ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf, + size); + if (ret) + goto free; + + offset += size; + mapsize -= size; + cond_resched(); + } + + free: + kfree(zerobuf); + return ret; +} + +/* + * This function initializes the BTT log with 'fake' entries pointing + * to the initial reserved set of blocks as being free + */ +static int btt_log_init(struct arena_info *arena) +{ + int ret; + u32 i; + struct log_entry log, zerolog; + + memset(&zerolog, 0, sizeof(zerolog)); + + for (i = 0; i < arena->nfree; i++) { + log.lba = cpu_to_le32(i); + log.old_map = cpu_to_le32(arena->external_nlba + i); + log.new_map = cpu_to_le32(arena->external_nlba + i); + log.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &log); + if (ret) + return ret; + ret = __btt_log_write(arena, i, 1, &zerolog); + if (ret) + return ret; + } + + return 0; +} + +static int btt_freelist_init(struct arena_info *arena) +{ + int old, new, ret; + u32 i, map_entry; + struct log_entry log_new, log_old; + + arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), + GFP_KERNEL); + if (!arena->freelist) + return -ENOMEM; + + for (i = 0; i < arena->nfree; i++) { + old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT); + if (old < 0) + return old; + + new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT); + if (new < 0) + return new; + + /* sub points to the next one to be overwritten */ + arena->freelist[i].sub = 1 - new; + arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); + arena->freelist[i].block = le32_to_cpu(log_new.old_map); + + /* This implies a newly created or untouched flog entry */ + if (log_new.old_map == log_new.new_map) + continue; + + /* Check if map recovery is needed */ + ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry, + NULL, NULL); + if (ret) + return ret; + if ((le32_to_cpu(log_new.new_map) != map_entry) && + (le32_to_cpu(log_new.old_map) == map_entry)) { + /* + * Last transaction wrote the flog, but wasn't able + * to complete the map write. So fix up the map. + */ + ret = btt_map_write(arena, le32_to_cpu(log_new.lba), + le32_to_cpu(log_new.new_map), 0, 0); + if (ret) + return ret; + } + + } + + return 0; +} + +static int btt_rtt_init(struct arena_info *arena) +{ + arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); + if (arena->rtt == NULL) + return -ENOMEM; + + return 0; +} + +static int btt_maplocks_init(struct arena_info *arena) +{ + u32 i; + + arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock), + GFP_KERNEL); + if (!arena->map_locks) + return -ENOMEM; + + for (i = 0; i < arena->nfree; i++) + spin_lock_init(&arena->map_locks[i].lock); + + return 0; +} + +static struct arena_info *alloc_arena(struct btt *btt, size_t size, + size_t start, size_t arena_off) +{ + struct arena_info *arena; + u64 logsize, mapsize, datasize; + u64 available = size; + + arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL); + if (!arena) + return NULL; + arena->nd_btt = btt->nd_btt; + + if (!size) + return arena; + + arena->size = size; + arena->external_lba_start = start; + arena->external_lbasize = btt->lbasize; + arena->internal_lbasize = roundup(arena->external_lbasize, + INT_LBASIZE_ALIGNMENT); + arena->nfree = BTT_DEFAULT_NFREE; + arena->version_major = 1; + arena->version_minor = 1; + + if (available % BTT_PG_SIZE) + available -= (available % BTT_PG_SIZE); + + /* Two pages are reserved for the super block and its copy */ + available -= 2 * BTT_PG_SIZE; + + /* The log takes a fixed amount of space based on nfree */ + logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), + BTT_PG_SIZE); + available -= logsize; + + /* Calculate optimal split between map and data area */ + arena->internal_nlba = div_u64(available - BTT_PG_SIZE, + arena->internal_lbasize + MAP_ENT_SIZE); + arena->external_nlba = arena->internal_nlba - arena->nfree; + + mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE); + datasize = available - mapsize; + + /* 'Absolute' values, relative to start of storage space */ + arena->infooff = arena_off; + arena->dataoff = arena->infooff + BTT_PG_SIZE; + arena->mapoff = arena->dataoff + datasize; + arena->logoff = arena->mapoff + mapsize; + arena->info2off = arena->logoff + logsize; + return arena; +} + +static void free_arenas(struct btt *btt) +{ + struct arena_info *arena, *next; + + list_for_each_entry_safe(arena, next, &btt->arena_list, list) { + list_del(&arena->list); + kfree(arena->rtt); + kfree(arena->map_locks); + kfree(arena->freelist); + debugfs_remove_recursive(arena->debugfs_dir); + kfree(arena); + } +} + +/* + * This function checks if the metadata layout is valid and error free + */ +static int arena_is_valid(struct arena_info *arena, struct btt_sb *super, + u8 *uuid, u32 lbasize) +{ + u64 checksum; + + if (memcmp(super->uuid, uuid, 16)) + return 0; + + checksum = le64_to_cpu(super->checksum); + super->checksum = 0; + if (checksum != nd_btt_sb_checksum(super)) + return 0; + super->checksum = cpu_to_le64(checksum); + + if (lbasize != le32_to_cpu(super->external_lbasize)) + return 0; + + /* TODO: figure out action for this */ + if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) + dev_info(to_dev(arena), "Found arena with an error flag\n"); + + return 1; +} + +/* + * This function reads an existing valid btt superblock and + * populates the corresponding arena_info struct + */ +static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, + u64 arena_off) +{ + arena->internal_nlba = le32_to_cpu(super->internal_nlba); + arena->internal_lbasize = le32_to_cpu(super->internal_lbasize); + arena->external_nlba = le32_to_cpu(super->external_nlba); + arena->external_lbasize = le32_to_cpu(super->external_lbasize); + arena->nfree = le32_to_cpu(super->nfree); + arena->version_major = le16_to_cpu(super->version_major); + arena->version_minor = le16_to_cpu(super->version_minor); + + arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off + + le64_to_cpu(super->nextoff)); + arena->infooff = arena_off; + arena->dataoff = arena_off + le64_to_cpu(super->dataoff); + arena->mapoff = arena_off + le64_to_cpu(super->mapoff); + arena->logoff = arena_off + le64_to_cpu(super->logoff); + arena->info2off = arena_off + le64_to_cpu(super->info2off); + + arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) : + (arena->info2off - arena->infooff + BTT_PG_SIZE); + + arena->flags = le32_to_cpu(super->flags); +} + +static int discover_arenas(struct btt *btt) +{ + int ret = 0; + struct arena_info *arena; + struct btt_sb *super; + size_t remaining = btt->rawsize; + u64 cur_nlba = 0; + size_t cur_off = 0; + int num_arenas = 0; + + super = kzalloc(sizeof(*super), GFP_KERNEL); + if (!super) + return -ENOMEM; + + while (remaining) { + /* Alloc memory for arena */ + arena = alloc_arena(btt, 0, 0, 0); + if (!arena) { + ret = -ENOMEM; + goto out_super; + } + + arena->infooff = cur_off; + ret = btt_info_read(arena, super); + if (ret) + goto out; + + if (!arena_is_valid(arena, super, btt->nd_btt->uuid, + btt->lbasize)) { + if (remaining == btt->rawsize) { + btt->init_state = INIT_NOTFOUND; + dev_info(to_dev(arena), "No existing arenas\n"); + goto out; + } else { + dev_info(to_dev(arena), + "Found corrupted metadata!\n"); + ret = -ENODEV; + goto out; + } + } + + arena->external_lba_start = cur_nlba; + parse_arena_meta(arena, super, cur_off); + + ret = btt_freelist_init(arena); + if (ret) + goto out; + + ret = btt_rtt_init(arena); + if (ret) + goto out; + + ret = btt_maplocks_init(arena); + if (ret) + goto out; + + list_add_tail(&arena->list, &btt->arena_list); + + remaining -= arena->size; + cur_off += arena->size; + cur_nlba += arena->external_nlba; + num_arenas++; + + if (arena->nextoff == 0) + break; + } + btt->num_arenas = num_arenas; + btt->nlba = cur_nlba; + btt->init_state = INIT_READY; + + kfree(super); + return ret; + + out: + kfree(arena); + free_arenas(btt); + out_super: + kfree(super); + return ret; +} + +static int create_arenas(struct btt *btt) +{ + size_t remaining = btt->rawsize; + size_t cur_off = 0; + + while (remaining) { + struct arena_info *arena; + size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining); + + remaining -= arena_size; + if (arena_size < ARENA_MIN_SIZE) + break; + + arena = alloc_arena(btt, arena_size, btt->nlba, cur_off); + if (!arena) { + free_arenas(btt); + return -ENOMEM; + } + btt->nlba += arena->external_nlba; + if (remaining >= ARENA_MIN_SIZE) + arena->nextoff = arena->size; + else + arena->nextoff = 0; + cur_off += arena_size; + list_add_tail(&arena->list, &btt->arena_list); + } + + return 0; +} + +/* + * This function completes arena initialization by writing + * all the metadata. + * It is only called for an uninitialized arena when a write + * to that arena occurs for the first time. + */ +static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) +{ + int ret; + struct btt_sb *super; + + ret = btt_map_init(arena); + if (ret) + return ret; + + ret = btt_log_init(arena); + if (ret) + return ret; + + super = kzalloc(sizeof(struct btt_sb), GFP_NOIO); + if (!super) + return -ENOMEM; + + strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); + memcpy(super->uuid, uuid, 16); + super->flags = cpu_to_le32(arena->flags); + super->version_major = cpu_to_le16(arena->version_major); + super->version_minor = cpu_to_le16(arena->version_minor); + super->external_lbasize = cpu_to_le32(arena->external_lbasize); + super->external_nlba = cpu_to_le32(arena->external_nlba); + super->internal_lbasize = cpu_to_le32(arena->internal_lbasize); + super->internal_nlba = cpu_to_le32(arena->internal_nlba); + super->nfree = cpu_to_le32(arena->nfree); + super->infosize = cpu_to_le32(sizeof(struct btt_sb)); + super->nextoff = cpu_to_le64(arena->nextoff); + /* + * Subtract arena->infooff (arena start) so numbers are relative + * to 'this' arena + */ + super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff); + super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff); + super->logoff = cpu_to_le64(arena->logoff - arena->infooff); + super->info2off = cpu_to_le64(arena->info2off - arena->infooff); + + super->flags = 0; + super->checksum = cpu_to_le64(nd_btt_sb_checksum(super)); + + ret = btt_info_write(arena, super); + + kfree(super); + return ret; +} + +/* + * This function completes the initialization for the BTT namespace + * such that it is ready to accept IOs + */ +static int btt_meta_init(struct btt *btt) +{ + int ret = 0; + struct arena_info *arena; + + mutex_lock(&btt->init_lock); + list_for_each_entry(arena, &btt->arena_list, list) { + ret = btt_arena_write_layout(arena, btt->nd_btt->uuid); + if (ret) + goto unlock; + + ret = btt_freelist_init(arena); + if (ret) + goto unlock; + + ret = btt_rtt_init(arena); + if (ret) + goto unlock; + + ret = btt_maplocks_init(arena); + if (ret) + goto unlock; + } + + btt->init_state = INIT_READY; + + unlock: + mutex_unlock(&btt->init_lock); + return ret; +} + +static u32 btt_meta_size(struct btt *btt) +{ + return btt->lbasize - btt->sector_size; +} + +/* + * This function calculates the arena in which the given LBA lies + * by doing a linear walk. This is acceptable since we expect only + * a few arenas. If we have backing devices that get much larger, + * we can construct a balanced binary tree of arenas at init time + * so that this range search becomes faster. + */ +static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap, + struct arena_info **arena) +{ + struct arena_info *arena_list; + __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size); + + list_for_each_entry(arena_list, &btt->arena_list, list) { + if (lba < arena_list->external_nlba) { + *arena = arena_list; + *premap = lba; + return 0; + } + lba -= arena_list->external_nlba; + } + + return -EIO; +} + +/* + * The following (lock_map, unlock_map) are mostly just to improve + * readability, since they index into an array of locks + */ +static void lock_map(struct arena_info *arena, u32 premap) + __acquires(&arena->map_locks[idx].lock) +{ + u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; + + spin_lock(&arena->map_locks[idx].lock); +} + +static void unlock_map(struct arena_info *arena, u32 premap) + __releases(&arena->map_locks[idx].lock) +{ + u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; + + spin_unlock(&arena->map_locks[idx].lock); +} + +static u64 to_namespace_offset(struct arena_info *arena, u64 lba) +{ + return arena->dataoff + ((u64)lba * arena->internal_lbasize); +} + +static int btt_data_read(struct arena_info *arena, struct page *page, + unsigned int off, u32 lba, u32 len) +{ + int ret; + u64 nsoff = to_namespace_offset(arena, lba); + void *mem = kmap_atomic(page); + + ret = arena_read_bytes(arena, nsoff, mem + off, len); + kunmap_atomic(mem); + + return ret; +} + +static int btt_data_write(struct arena_info *arena, u32 lba, + struct page *page, unsigned int off, u32 len) +{ + int ret; + u64 nsoff = to_namespace_offset(arena, lba); + void *mem = kmap_atomic(page); + + ret = arena_write_bytes(arena, nsoff, mem + off, len); + kunmap_atomic(mem); + + return ret; +} + +static void zero_fill_data(struct page *page, unsigned int off, u32 len) +{ + void *mem = kmap_atomic(page); + + memset(mem + off, 0, len); + kunmap_atomic(mem); +} + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, + struct arena_info *arena, u32 postmap, int rw) +{ + unsigned int len = btt_meta_size(btt); + u64 meta_nsoff; + int ret = 0; + + if (bip == NULL) + return 0; + + meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size; + + while (len) { + unsigned int cur_len; + struct bio_vec bv; + void *mem; + + bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); + /* + * The 'bv' obtained from bvec_iter_bvec has its .bv_len and + * .bv_offset already adjusted for iter->bi_bvec_done, and we + * can use those directly + */ + + cur_len = min(len, bv.bv_len); + mem = kmap_atomic(bv.bv_page); + if (rw) + ret = arena_write_bytes(arena, meta_nsoff, + mem + bv.bv_offset, cur_len); + else + ret = arena_read_bytes(arena, meta_nsoff, + mem + bv.bv_offset, cur_len); + + kunmap_atomic(mem); + if (ret) + return ret; + + len -= cur_len; + meta_nsoff += cur_len; + bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); + } + + return ret; +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, + struct arena_info *arena, u32 postmap, int rw) +{ + return 0; +} +#endif + +static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, + struct page *page, unsigned int off, sector_t sector, + unsigned int len) +{ + int ret = 0; + int t_flag, e_flag; + struct arena_info *arena = NULL; + u32 lane = 0, premap, postmap; + + while (len) { + u32 cur_len; + + lane = nd_region_acquire_lane(btt->nd_region); + + ret = lba_to_arena(btt, sector, &premap, &arena); + if (ret) + goto out_lane; + + cur_len = min(btt->sector_size, len); + + ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag); + if (ret) + goto out_lane; + + /* + * We loop to make sure that the post map LBA didn't change + * from under us between writing the RTT and doing the actual + * read. + */ + while (1) { + u32 new_map; + + if (t_flag) { + zero_fill_data(page, off, cur_len); + goto out_lane; + } + + if (e_flag) { + ret = -EIO; + goto out_lane; + } + + arena->rtt[lane] = RTT_VALID | postmap; + /* + * Barrier to make sure this write is not reordered + * to do the verification map_read before the RTT store + */ + barrier(); + + ret = btt_map_read(arena, premap, &new_map, &t_flag, + &e_flag); + if (ret) + goto out_rtt; + + if (postmap == new_map) + break; + + postmap = new_map; + } + + ret = btt_data_read(arena, page, off, postmap, cur_len); + if (ret) + goto out_rtt; + + if (bip) { + ret = btt_rw_integrity(btt, bip, arena, postmap, READ); + if (ret) + goto out_rtt; + } + + arena->rtt[lane] = RTT_INVALID; + nd_region_release_lane(btt->nd_region, lane); + + len -= cur_len; + off += cur_len; + sector += btt->sector_size >> SECTOR_SHIFT; + } + + return 0; + + out_rtt: + arena->rtt[lane] = RTT_INVALID; + out_lane: + nd_region_release_lane(btt->nd_region, lane); + return ret; +} + +static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, + sector_t sector, struct page *page, unsigned int off, + unsigned int len) +{ + int ret = 0; + struct arena_info *arena = NULL; + u32 premap = 0, old_postmap, new_postmap, lane = 0, i; + struct log_entry log; + int sub; + + while (len) { + u32 cur_len; + + lane = nd_region_acquire_lane(btt->nd_region); + + ret = lba_to_arena(btt, sector, &premap, &arena); + if (ret) + goto out_lane; + cur_len = min(btt->sector_size, len); + + if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) { + ret = -EIO; + goto out_lane; + } + + new_postmap = arena->freelist[lane].block; + + /* Wait if the new block is being read from */ + for (i = 0; i < arena->nfree; i++) + while (arena->rtt[i] == (RTT_VALID | new_postmap)) + cpu_relax(); + + + if (new_postmap >= arena->internal_nlba) { + ret = -EIO; + goto out_lane; + } + + ret = btt_data_write(arena, new_postmap, page, off, cur_len); + if (ret) + goto out_lane; + + if (bip) { + ret = btt_rw_integrity(btt, bip, arena, new_postmap, + WRITE); + if (ret) + goto out_lane; + } + + lock_map(arena, premap); + ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL); + if (ret) + goto out_map; + if (old_postmap >= arena->internal_nlba) { + ret = -EIO; + goto out_map; + } + + log.lba = cpu_to_le32(premap); + log.old_map = cpu_to_le32(old_postmap); + log.new_map = cpu_to_le32(new_postmap); + log.seq = cpu_to_le32(arena->freelist[lane].seq); + sub = arena->freelist[lane].sub; + ret = btt_flog_write(arena, lane, sub, &log); + if (ret) + goto out_map; + + ret = btt_map_write(arena, premap, new_postmap, 0, 0); + if (ret) + goto out_map; + + unlock_map(arena, premap); + nd_region_release_lane(btt->nd_region, lane); + + len -= cur_len; + off += cur_len; + sector += btt->sector_size >> SECTOR_SHIFT; + } + + return 0; + + out_map: + unlock_map(arena, premap); + out_lane: + nd_region_release_lane(btt->nd_region, lane); + return ret; +} + +static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, + struct page *page, unsigned int len, unsigned int off, + int rw, sector_t sector) +{ + int ret; + + if (rw == READ) { + ret = btt_read_pg(btt, bip, page, off, sector, len); + flush_dcache_page(page); + } else { + flush_dcache_page(page); + ret = btt_write_pg(btt, bip, sector, page, off, len); + } + + return ret; +} + +static void btt_make_request(struct request_queue *q, struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct btt *btt = q->queuedata; + struct bvec_iter iter; + unsigned long start; + struct bio_vec bvec; + int err = 0, rw; + bool do_acct; + + /* + * bio_integrity_enabled also checks if the bio already has an + * integrity payload attached. If it does, we *don't* do a + * bio_integrity_prep here - the payload has been generated by + * another kernel subsystem, and we just pass it through. + */ + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + err = -EIO; + goto out; + } + + do_acct = nd_iostat_start(bio, &start); + rw = bio_data_dir(bio); + bio_for_each_segment(bvec, bio, iter) { + unsigned int len = bvec.bv_len; + + BUG_ON(len > PAGE_SIZE); + /* Make sure len is in multiples of sector size. */ + /* XXX is this right? */ + BUG_ON(len < btt->sector_size); + BUG_ON(len % btt->sector_size); + + err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, + rw, iter.bi_sector); + if (err) { + dev_info(&btt->nd_btt->dev, + "io error in %s sector %lld, len %d,\n", + (rw == READ) ? "READ" : "WRITE", + (unsigned long long) iter.bi_sector, len); + break; + } + } + if (do_acct) + nd_iostat_end(bio, start); + +out: + bio_endio(bio, err); +} + +static int btt_rw_page(struct block_device *bdev, sector_t sector, + struct page *page, int rw) +{ + struct btt *btt = bdev->bd_disk->private_data; + + btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector); + page_endio(page, rw & WRITE, 0); + return 0; +} + + +static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo) +{ + /* some standard values */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bd->bd_disk) >> 11; + return 0; +} + +static const struct block_device_operations btt_fops = { + .owner = THIS_MODULE, + .rw_page = btt_rw_page, + .getgeo = btt_getgeo, + .revalidate_disk = nvdimm_revalidate_disk, +}; + +static int btt_blk_init(struct btt *btt) +{ + struct nd_btt *nd_btt = btt->nd_btt; + struct nd_namespace_common *ndns = nd_btt->ndns; + + /* create a new disk and request queue for btt */ + btt->btt_queue = blk_alloc_queue(GFP_KERNEL); + if (!btt->btt_queue) + return -ENOMEM; + + btt->btt_disk = alloc_disk(0); + if (!btt->btt_disk) { + blk_cleanup_queue(btt->btt_queue); + return -ENOMEM; + } + + nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); + btt->btt_disk->driverfs_dev = &btt->nd_btt->dev; + btt->btt_disk->major = btt_major; + btt->btt_disk->first_minor = 0; + btt->btt_disk->fops = &btt_fops; + btt->btt_disk->private_data = btt; + btt->btt_disk->queue = btt->btt_queue; + btt->btt_disk->flags = GENHD_FL_EXT_DEVT; + + blk_queue_make_request(btt->btt_queue, btt_make_request); + blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); + blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); + blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); + btt->btt_queue->queuedata = btt; + + set_capacity(btt->btt_disk, 0); + add_disk(btt->btt_disk); + if (btt_meta_size(btt)) { + int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); + + if (rc) { + del_gendisk(btt->btt_disk); + put_disk(btt->btt_disk); + blk_cleanup_queue(btt->btt_queue); + return rc; + } + } + set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); + revalidate_disk(btt->btt_disk); + + return 0; +} + +static void btt_blk_cleanup(struct btt *btt) +{ + blk_integrity_unregister(btt->btt_disk); + del_gendisk(btt->btt_disk); + put_disk(btt->btt_disk); + blk_cleanup_queue(btt->btt_queue); +} + +/** + * btt_init - initialize a block translation table for the given device + * @nd_btt: device with BTT geometry and backing device info + * @rawsize: raw size in bytes of the backing device + * @lbasize: lba size of the backing device + * @uuid: A uuid for the backing device - this is stored on media + * @maxlane: maximum number of parallel requests the device can handle + * + * Initialize a Block Translation Table on a backing device to provide + * single sector power fail atomicity. + * + * Context: + * Might sleep. + * + * Returns: + * Pointer to a new struct btt on success, NULL on failure. + */ +static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, + u32 lbasize, u8 *uuid, struct nd_region *nd_region) +{ + int ret; + struct btt *btt; + struct device *dev = &nd_btt->dev; + + btt = kzalloc(sizeof(struct btt), GFP_KERNEL); + if (!btt) + return NULL; + + btt->nd_btt = nd_btt; + btt->rawsize = rawsize; + btt->lbasize = lbasize; + btt->sector_size = ((lbasize >= 4096) ? 4096 : 512); + INIT_LIST_HEAD(&btt->arena_list); + mutex_init(&btt->init_lock); + btt->nd_region = nd_region; + + ret = discover_arenas(btt); + if (ret) { + dev_err(dev, "init: error in arena_discover: %d\n", ret); + goto out_free; + } + + if (btt->init_state != INIT_READY && nd_region->ro) { + dev_info(dev, "%s is read-only, unable to init btt metadata\n", + dev_name(&nd_region->dev)); + goto out_free; + } else if (btt->init_state != INIT_READY) { + btt->num_arenas = (rawsize / ARENA_MAX_SIZE) + + ((rawsize % ARENA_MAX_SIZE) ? 1 : 0); + dev_dbg(dev, "init: %d arenas for %llu rawsize\n", + btt->num_arenas, rawsize); + + ret = create_arenas(btt); + if (ret) { + dev_info(dev, "init: create_arenas: %d\n", ret); + goto out_free; + } + + ret = btt_meta_init(btt); + if (ret) { + dev_err(dev, "init: error in meta_init: %d\n", ret); + goto out_free; + } + } + + ret = btt_blk_init(btt); + if (ret) { + dev_err(dev, "init: error in blk_init: %d\n", ret); + goto out_free; + } + + btt_debugfs_init(btt); + + return btt; + + out_free: + kfree(btt); + return NULL; +} + +/** + * btt_fini - de-initialize a BTT + * @btt: the BTT handle that was generated by btt_init + * + * De-initialize a Block Translation Table on device removal + * + * Context: + * Might sleep. + */ +static void btt_fini(struct btt *btt) +{ + if (btt) { + btt_blk_cleanup(btt); + free_arenas(btt); + debugfs_remove_recursive(btt->debugfs_dir); + kfree(btt); + } +} + +int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) +{ + struct nd_btt *nd_btt = to_nd_btt(ndns->claim); + struct nd_region *nd_region; + struct btt *btt; + size_t rawsize; + + if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) + return -ENODEV; + + rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K; + if (rawsize < ARENA_MIN_SIZE) { + return -ENXIO; + } + nd_region = to_nd_region(nd_btt->dev.parent); + btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid, + nd_region); + if (!btt) + return -ENOMEM; + nd_btt->btt = btt; + + return 0; +} +EXPORT_SYMBOL(nvdimm_namespace_attach_btt); + +int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns) +{ + struct nd_btt *nd_btt = to_nd_btt(ndns->claim); + struct btt *btt = nd_btt->btt; + + btt_fini(btt); + nd_btt->btt = NULL; + + return 0; +} +EXPORT_SYMBOL(nvdimm_namespace_detach_btt); + +static int __init nd_btt_init(void) +{ + int rc; + + BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); + + btt_major = register_blkdev(0, "btt"); + if (btt_major < 0) + return btt_major; + + debugfs_root = debugfs_create_dir("btt", NULL); + if (IS_ERR_OR_NULL(debugfs_root)) { + rc = -ENXIO; + goto err_debugfs; + } + + return 0; + + err_debugfs: + unregister_blkdev(btt_major, "btt"); + + return rc; +} + +static void __exit nd_btt_exit(void) +{ + debugfs_remove_recursive(debugfs_root); + unregister_blkdev(btt_major, "btt"); +} + +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); +MODULE_AUTHOR("Vishal Verma <[email protected]>"); +MODULE_LICENSE("GPL v2"); +module_init(nd_btt_init); +module_exit(nd_btt_exit); diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h new file mode 100644 index 000000000000..75b0d80a6bd9 --- /dev/null +++ b/drivers/nvdimm/btt.h @@ -0,0 +1,185 @@ +/* + * Block Translation Table library + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _LINUX_BTT_H +#define _LINUX_BTT_H + +#include <linux/types.h> + +#define BTT_SIG_LEN 16 +#define BTT_SIG "BTT_ARENA_INFO\0" +#define MAP_ENT_SIZE 4 +#define MAP_TRIM_SHIFT 31 +#define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT) +#define MAP_ERR_SHIFT 30 +#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) +#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) +#define MAP_ENT_NORMAL 0xC0000000 +#define LOG_ENT_SIZE sizeof(struct log_entry) +#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ +#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ +#define RTT_VALID (1UL << 31) +#define RTT_INVALID 0 +#define BTT_PG_SIZE 4096 +#define BTT_DEFAULT_NFREE ND_MAX_LANES +#define LOG_SEQ_INIT 1 + +#define IB_FLAG_ERROR 0x00000001 +#define IB_FLAG_ERROR_MASK 0x00000001 + +enum btt_init_state { + INIT_UNCHECKED = 0, + INIT_NOTFOUND, + INIT_READY +}; + +struct log_entry { + __le32 lba; + __le32 old_map; + __le32 new_map; + __le32 seq; + __le64 padding[2]; +}; + +struct btt_sb { + u8 signature[BTT_SIG_LEN]; + u8 uuid[16]; + u8 parent_uuid[16]; + __le32 flags; + __le16 version_major; + __le16 version_minor; + __le32 external_lbasize; + __le32 external_nlba; + __le32 internal_lbasize; + __le32 internal_nlba; + __le32 nfree; + __le32 infosize; + __le64 nextoff; + __le64 dataoff; + __le64 mapoff; + __le64 logoff; + __le64 info2off; + u8 padding[3968]; + __le64 checksum; +}; + +struct free_entry { + u32 block; + u8 sub; + u8 seq; +}; + +struct aligned_lock { + union { + spinlock_t lock; + u8 cacheline_padding[L1_CACHE_BYTES]; + }; +}; + +/** + * struct arena_info - handle for an arena + * @size: Size in bytes this arena occupies on the raw device. + * This includes arena metadata. + * @external_lba_start: The first external LBA in this arena. + * @internal_nlba: Number of internal blocks available in the arena + * including nfree reserved blocks + * @internal_lbasize: Internal and external lba sizes may be different as + * we can round up 'odd' external lbasizes such as 520B + * to be aligned. + * @external_nlba: Number of blocks contributed by the arena to the number + * reported to upper layers. (internal_nlba - nfree) + * @external_lbasize: LBA size as exposed to upper layers. + * @nfree: A reserve number of 'free' blocks that is used to + * handle incoming writes. + * @version_major: Metadata layout version major. + * @version_minor: Metadata layout version minor. + * @nextoff: Offset in bytes to the start of the next arena. + * @infooff: Offset in bytes to the info block of this arena. + * @dataoff: Offset in bytes to the data area of this arena. + * @mapoff: Offset in bytes to the map area of this arena. + * @logoff: Offset in bytes to the log area of this arena. + * @info2off: Offset in bytes to the backup info block of this arena. + * @freelist: Pointer to in-memory list of free blocks + * @rtt: Pointer to in-memory "Read Tracking Table" + * @map_locks: Spinlocks protecting concurrent map writes + * @nd_btt: Pointer to parent nd_btt structure. + * @list: List head for list of arenas + * @debugfs_dir: Debugfs dentry + * @flags: Arena flags - may signify error states. + * + * arena_info is a per-arena handle. Once an arena is narrowed down for an + * IO, this struct is passed around for the duration of the IO. + */ +struct arena_info { + u64 size; /* Total bytes for this arena */ + u64 external_lba_start; + u32 internal_nlba; + u32 internal_lbasize; + u32 external_nlba; + u32 external_lbasize; + u32 nfree; + u16 version_major; + u16 version_minor; + /* Byte offsets to the different on-media structures */ + u64 nextoff; + u64 infooff; + u64 dataoff; + u64 mapoff; + u64 logoff; + u64 info2off; + /* Pointers to other in-memory structures for this arena */ + struct free_entry *freelist; + u32 *rtt; + struct aligned_lock *map_locks; + struct nd_btt *nd_btt; + struct list_head list; + struct dentry *debugfs_dir; + /* Arena flags */ + u32 flags; +}; + +/** + * struct btt - handle for a BTT instance + * @btt_disk: Pointer to the gendisk for BTT device + * @btt_queue: Pointer to the request queue for the BTT device + * @arena_list: Head of the list of arenas + * @debugfs_dir: Debugfs dentry + * @nd_btt: Parent nd_btt struct + * @nlba: Number of logical blocks exposed to the upper layers + * after removing the amount of space needed by metadata + * @rawsize: Total size in bytes of the available backing device + * @lbasize: LBA size as requested and presented to upper layers. + * This is sector_size + size of any metadata. + * @sector_size: The Linux sector size - 512 or 4096 + * @lanes: Per-lane spinlocks + * @init_lock: Mutex used for the BTT initialization + * @init_state: Flag describing the initialization state for the BTT + * @num_arenas: Number of arenas in the BTT instance + */ +struct btt { + struct gendisk *btt_disk; + struct request_queue *btt_queue; + struct list_head arena_list; + struct dentry *debugfs_dir; + struct nd_btt *nd_btt; + u64 nlba; + unsigned long long rawsize; + u32 lbasize; + u32 sector_size; + struct nd_region *nd_region; + struct mutex init_lock; + int init_state; + int num_arenas; +}; +#endif diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c new file mode 100644 index 000000000000..6ac8c0fea3ec --- /dev/null +++ b/drivers/nvdimm/btt_devs.c @@ -0,0 +1,425 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/genhd.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include "nd-core.h" +#include "btt.h" +#include "nd.h" + +static void __nd_btt_detach_ndns(struct nd_btt *nd_btt) +{ + struct nd_namespace_common *ndns = nd_btt->ndns; + + dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) + || ndns->claim != &nd_btt->dev, + "%s: invalid claim\n", __func__); + ndns->claim = NULL; + nd_btt->ndns = NULL; + put_device(&ndns->dev); +} + +static void nd_btt_detach_ndns(struct nd_btt *nd_btt) +{ + struct nd_namespace_common *ndns = nd_btt->ndns; + + if (!ndns) + return; + get_device(&ndns->dev); + device_lock(&ndns->dev); + __nd_btt_detach_ndns(nd_btt); + device_unlock(&ndns->dev); + put_device(&ndns->dev); +} + +static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt, + struct nd_namespace_common *ndns) +{ + if (ndns->claim) + return false; + dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) + || nd_btt->ndns, + "%s: invalid claim\n", __func__); + ndns->claim = &nd_btt->dev; + nd_btt->ndns = ndns; + get_device(&ndns->dev); + return true; +} + +static bool nd_btt_attach_ndns(struct nd_btt *nd_btt, + struct nd_namespace_common *ndns) +{ + bool claimed; + + device_lock(&ndns->dev); + claimed = __nd_btt_attach_ndns(nd_btt, ndns); + device_unlock(&ndns->dev); + return claimed; +} + +static void nd_btt_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_btt *nd_btt = to_nd_btt(dev); + + dev_dbg(dev, "%s\n", __func__); + nd_btt_detach_ndns(nd_btt); + ida_simple_remove(&nd_region->btt_ida, nd_btt->id); + kfree(nd_btt->uuid); + kfree(nd_btt); +} + +static struct device_type nd_btt_device_type = { + .name = "nd_btt", + .release = nd_btt_release, +}; + +bool is_nd_btt(struct device *dev) +{ + return dev->type == &nd_btt_device_type; +} +EXPORT_SYMBOL(is_nd_btt); + +struct nd_btt *to_nd_btt(struct device *dev) +{ + struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev); + + WARN_ON(!is_nd_btt(dev)); + return nd_btt; +} +EXPORT_SYMBOL(to_nd_btt); + +static const unsigned long btt_lbasize_supported[] = { 512, 520, 528, + 4096, 4104, 4160, 4224, 0 }; + +static ssize_t sector_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + + return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf); +} + +static ssize_t sector_size_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + ssize_t rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize, + btt_lbasize_supported); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(sector_size); + +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + + if (nd_btt->uuid) + return sprintf(buf, "%pUb\n", nd_btt->uuid); + return sprintf(buf, "\n"); +} + +static ssize_t uuid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + ssize_t rc; + + device_lock(dev); + rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(uuid); + +static ssize_t namespace_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + rc = sprintf(buf, "%s\n", nd_btt->ndns + ? dev_name(&nd_btt->ndns->dev) : ""); + nvdimm_bus_unlock(dev); + return rc; +} + +static int namespace_match(struct device *dev, void *data) +{ + char *name = data; + + return strcmp(name, dev_name(dev)) == 0; +} + +static bool is_nd_btt_idle(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_btt *nd_btt = to_nd_btt(dev); + + if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver) + return false; + return true; +} + +static ssize_t __namespace_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + struct nd_namespace_common *ndns; + struct device *found; + char *name; + + if (dev->driver) { + dev_dbg(dev, "%s: -EBUSY\n", __func__); + return -EBUSY; + } + + name = kstrndup(buf, len, GFP_KERNEL); + if (!name) + return -ENOMEM; + strim(name); + + if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0) + /* pass */; + else { + len = -EINVAL; + goto out; + } + + ndns = nd_btt->ndns; + if (strcmp(name, "") == 0) { + /* detach the namespace and destroy / reset the btt device */ + nd_btt_detach_ndns(nd_btt); + if (is_nd_btt_idle(dev)) + nd_device_unregister(dev, ND_ASYNC); + else { + nd_btt->lbasize = 0; + kfree(nd_btt->uuid); + nd_btt->uuid = NULL; + } + goto out; + } else if (ndns) { + dev_dbg(dev, "namespace already set to: %s\n", + dev_name(&ndns->dev)); + len = -EBUSY; + goto out; + } + + found = device_find_child(dev->parent, name, namespace_match); + if (!found) { + dev_dbg(dev, "'%s' not found under %s\n", name, + dev_name(dev->parent)); + len = -ENODEV; + goto out; + } + + ndns = to_ndns(found); + if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { + dev_dbg(dev, "%s too small to host btt\n", name); + len = -ENXIO; + goto out_attach; + } + + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev)); + if (!nd_btt_attach_ndns(nd_btt, ndns)) { + dev_dbg(dev, "%s already claimed\n", + dev_name(&ndns->dev)); + len = -EBUSY; + } + + out_attach: + put_device(&ndns->dev); /* from device_find_child */ + out: + kfree(name); + return len; +} + +static ssize_t namespace_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + ssize_t rc; + + nvdimm_bus_lock(dev); + device_lock(dev); + rc = __namespace_store(dev, attr, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RW(namespace); + +static struct attribute *nd_btt_attributes[] = { + &dev_attr_sector_size.attr, + &dev_attr_namespace.attr, + &dev_attr_uuid.attr, + NULL, +}; + +static struct attribute_group nd_btt_attribute_group = { + .attrs = nd_btt_attributes, +}; + +static const struct attribute_group *nd_btt_attribute_groups[] = { + &nd_btt_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static struct device *__nd_btt_create(struct nd_region *nd_region, + unsigned long lbasize, u8 *uuid, + struct nd_namespace_common *ndns) +{ + struct nd_btt *nd_btt; + struct device *dev; + + nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL); + if (!nd_btt) + return NULL; + + nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL); + if (nd_btt->id < 0) { + kfree(nd_btt); + return NULL; + } + + nd_btt->lbasize = lbasize; + if (uuid) + uuid = kmemdup(uuid, 16, GFP_KERNEL); + nd_btt->uuid = uuid; + dev = &nd_btt->dev; + dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id); + dev->parent = &nd_region->dev; + dev->type = &nd_btt_device_type; + dev->groups = nd_btt_attribute_groups; + device_initialize(&nd_btt->dev); + if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) { + dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", + __func__, dev_name(ndns->claim)); + put_device(dev); + return NULL; + } + return dev; +} + +struct device *nd_btt_create(struct nd_region *nd_region) +{ + struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); + + if (dev) + __nd_device_register(dev); + return dev; +} + +/* + * nd_btt_sb_checksum: compute checksum for btt info block + * + * Returns a fletcher64 checksum of everything in the given info block + * except the last field (since that's where the checksum lives). + */ +u64 nd_btt_sb_checksum(struct btt_sb *btt_sb) +{ + u64 sum; + __le64 sum_save; + + sum_save = btt_sb->checksum; + btt_sb->checksum = 0; + sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1); + btt_sb->checksum = sum_save; + return sum; +} +EXPORT_SYMBOL(nd_btt_sb_checksum); + +static int __nd_btt_probe(struct nd_btt *nd_btt, + struct nd_namespace_common *ndns, struct btt_sb *btt_sb) +{ + u64 checksum; + + if (!btt_sb || !ndns || !nd_btt) + return -ENODEV; + + if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb))) + return -ENXIO; + + if (nvdimm_namespace_capacity(ndns) < SZ_16M) + return -ENXIO; + + if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0) + return -ENODEV; + + checksum = le64_to_cpu(btt_sb->checksum); + btt_sb->checksum = 0; + if (checksum != nd_btt_sb_checksum(btt_sb)) + return -ENODEV; + btt_sb->checksum = cpu_to_le64(checksum); + + nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize); + nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL); + if (!nd_btt->uuid) + return -ENOMEM; + + __nd_device_register(&nd_btt->dev); + + return 0; +} + +int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + int rc; + struct device *dev; + struct btt_sb *btt_sb; + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + + if (ndns->force_raw) + return -ENODEV; + + nvdimm_bus_lock(&ndns->dev); + dev = __nd_btt_create(nd_region, 0, NULL, ndns); + nvdimm_bus_unlock(&ndns->dev); + if (!dev) + return -ENOMEM; + dev_set_drvdata(dev, drvdata); + btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL); + rc = __nd_btt_probe(to_nd_btt(dev), ndns, btt_sb); + kfree(btt_sb); + dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__, + rc == 0 ? dev_name(dev) : "<none>"); + if (rc < 0) { + __nd_btt_detach_ndns(to_nd_btt(dev)); + put_device(dev); + } + + return rc; +} +EXPORT_SYMBOL(nd_btt_probe); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c new file mode 100644 index 000000000000..8eb22c0ca7ce --- /dev/null +++ b/drivers/nvdimm/bus.c @@ -0,0 +1,730 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/vmalloc.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/fcntl.h> +#include <linux/async.h> +#include <linux/genhd.h> +#include <linux/ndctl.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/nd.h> +#include "nd-core.h" +#include "nd.h" + +int nvdimm_major; +static int nvdimm_bus_major; +static struct class *nd_class; + +static int to_nd_device_type(struct device *dev) +{ + if (is_nvdimm(dev)) + return ND_DEVICE_DIMM; + else if (is_nd_pmem(dev)) + return ND_DEVICE_REGION_PMEM; + else if (is_nd_blk(dev)) + return ND_DEVICE_REGION_BLK; + else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) + return nd_region_to_nstype(to_nd_region(dev->parent)); + + return 0; +} + +static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + /* + * Ensure that region devices always have their numa node set as + * early as possible. + */ + if (is_nd_pmem(dev) || is_nd_blk(dev)) + set_dev_node(dev, to_nd_region(dev)->numa_node); + return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, + to_nd_device_type(dev)); +} + +static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(drv); + + return test_bit(to_nd_device_type(dev), &nd_drv->type); +} + +static struct module *to_bus_provider(struct device *dev) +{ + /* pin bus providers while regions are enabled */ + if (is_nd_pmem(dev) || is_nd_blk(dev)) { + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + return nvdimm_bus->module; + } + return NULL; +} + +static void nvdimm_bus_probe_start(struct nvdimm_bus *nvdimm_bus) +{ + nvdimm_bus_lock(&nvdimm_bus->dev); + nvdimm_bus->probe_active++; + nvdimm_bus_unlock(&nvdimm_bus->dev); +} + +static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus) +{ + nvdimm_bus_lock(&nvdimm_bus->dev); + if (--nvdimm_bus->probe_active == 0) + wake_up(&nvdimm_bus->probe_wait); + nvdimm_bus_unlock(&nvdimm_bus->dev); +} + +static int nvdimm_bus_probe(struct device *dev) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); + struct module *provider = to_bus_provider(dev); + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + int rc; + + if (!try_module_get(provider)) + return -ENXIO; + + nvdimm_bus_probe_start(nvdimm_bus); + rc = nd_drv->probe(dev); + if (rc == 0) + nd_region_probe_success(nvdimm_bus, dev); + else + nd_region_disable(nvdimm_bus, dev); + nvdimm_bus_probe_end(nvdimm_bus); + + dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name, + dev_name(dev), rc); + + if (rc != 0) + module_put(provider); + return rc; +} + +static int nvdimm_bus_remove(struct device *dev) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); + struct module *provider = to_bus_provider(dev); + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + int rc; + + rc = nd_drv->remove(dev); + nd_region_disable(nvdimm_bus, dev); + + dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, + dev_name(dev), rc); + module_put(provider); + return rc; +} + +static struct bus_type nvdimm_bus_type = { + .name = "nd", + .uevent = nvdimm_bus_uevent, + .match = nvdimm_bus_match, + .probe = nvdimm_bus_probe, + .remove = nvdimm_bus_remove, +}; + +static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain); + +void nd_synchronize(void) +{ + async_synchronize_full_domain(&nd_async_domain); +} +EXPORT_SYMBOL_GPL(nd_synchronize); + +static void nd_async_device_register(void *d, async_cookie_t cookie) +{ + struct device *dev = d; + + if (device_add(dev) != 0) { + dev_err(dev, "%s: failed\n", __func__); + put_device(dev); + } + put_device(dev); +} + +static void nd_async_device_unregister(void *d, async_cookie_t cookie) +{ + struct device *dev = d; + + /* flush bus operations before delete */ + nvdimm_bus_lock(dev); + nvdimm_bus_unlock(dev); + + device_unregister(dev); + put_device(dev); +} + +void __nd_device_register(struct device *dev) +{ + dev->bus = &nvdimm_bus_type; + get_device(dev); + async_schedule_domain(nd_async_device_register, dev, + &nd_async_domain); +} + +void nd_device_register(struct device *dev) +{ + device_initialize(dev); + __nd_device_register(dev); +} +EXPORT_SYMBOL(nd_device_register); + +void nd_device_unregister(struct device *dev, enum nd_async_mode mode) +{ + switch (mode) { + case ND_ASYNC: + get_device(dev); + async_schedule_domain(nd_async_device_unregister, dev, + &nd_async_domain); + break; + case ND_SYNC: + nd_synchronize(); + device_unregister(dev); + break; + } +} +EXPORT_SYMBOL(nd_device_unregister); + +/** + * __nd_driver_register() - register a region or a namespace driver + * @nd_drv: driver to register + * @owner: automatically set by nd_driver_register() macro + * @mod_name: automatically set by nd_driver_register() macro + */ +int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner, + const char *mod_name) +{ + struct device_driver *drv = &nd_drv->drv; + + if (!nd_drv->type) { + pr_debug("driver type bitmask not set (%pf)\n", + __builtin_return_address(0)); + return -EINVAL; + } + + if (!nd_drv->probe || !nd_drv->remove) { + pr_debug("->probe() and ->remove() must be specified\n"); + return -EINVAL; + } + + drv->bus = &nvdimm_bus_type; + drv->owner = owner; + drv->mod_name = mod_name; + + return driver_register(drv); +} +EXPORT_SYMBOL(__nd_driver_register); + +int nvdimm_revalidate_disk(struct gendisk *disk) +{ + struct device *dev = disk->driverfs_dev; + struct nd_region *nd_region = to_nd_region(dev->parent); + const char *pol = nd_region->ro ? "only" : "write"; + + if (nd_region->ro == get_disk_ro(disk)) + return 0; + + dev_info(dev, "%s read-%s, marking %s read-%s\n", + dev_name(&nd_region->dev), pol, disk->disk_name, pol); + set_disk_ro(disk, nd_region->ro); + + return 0; + +} +EXPORT_SYMBOL(nvdimm_revalidate_disk); + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n", + to_nd_device_type(dev)); +} +static DEVICE_ATTR_RO(modalias); + +static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", dev->type->name); +} +static DEVICE_ATTR_RO(devtype); + +static struct attribute *nd_device_attributes[] = { + &dev_attr_modalias.attr, + &dev_attr_devtype.attr, + NULL, +}; + +/** + * nd_device_attribute_group - generic attributes for all devices on an nd bus + */ +struct attribute_group nd_device_attribute_group = { + .attrs = nd_device_attributes, +}; +EXPORT_SYMBOL_GPL(nd_device_attribute_group); + +static ssize_t numa_node_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", dev_to_node(dev)); +} +static DEVICE_ATTR_RO(numa_node); + +static struct attribute *nd_numa_attributes[] = { + &dev_attr_numa_node.attr, + NULL, +}; + +static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a, + int n) +{ + if (!IS_ENABLED(CONFIG_NUMA)) + return 0; + + return a->mode; +} + +/** + * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus + */ +struct attribute_group nd_numa_attribute_group = { + .attrs = nd_numa_attributes, + .is_visible = nd_numa_attr_visible, +}; +EXPORT_SYMBOL_GPL(nd_numa_attribute_group); + +int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) +{ + dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id); + struct device *dev; + + dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus, + "ndctl%d", nvdimm_bus->id); + + if (IS_ERR(dev)) { + dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n", + nvdimm_bus->id, PTR_ERR(dev)); + return PTR_ERR(dev); + } + return 0; +} + +void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus) +{ + device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id)); +} + +static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = { + [ND_CMD_IMPLEMENTED] = { }, + [ND_CMD_SMART] = { + .out_num = 2, + .out_sizes = { 4, 8, }, + }, + [ND_CMD_SMART_THRESHOLD] = { + .out_num = 2, + .out_sizes = { 4, 8, }, + }, + [ND_CMD_DIMM_FLAGS] = { + .out_num = 2, + .out_sizes = { 4, 4 }, + }, + [ND_CMD_GET_CONFIG_SIZE] = { + .out_num = 3, + .out_sizes = { 4, 4, 4, }, + }, + [ND_CMD_GET_CONFIG_DATA] = { + .in_num = 2, + .in_sizes = { 4, 4, }, + .out_num = 2, + .out_sizes = { 4, UINT_MAX, }, + }, + [ND_CMD_SET_CONFIG_DATA] = { + .in_num = 3, + .in_sizes = { 4, 4, UINT_MAX, }, + .out_num = 1, + .out_sizes = { 4, }, + }, + [ND_CMD_VENDOR] = { + .in_num = 3, + .in_sizes = { 4, 4, UINT_MAX, }, + .out_num = 3, + .out_sizes = { 4, 4, UINT_MAX, }, + }, +}; + +const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd) +{ + if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs)) + return &__nd_cmd_dimm_descs[cmd]; + return NULL; +} +EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc); + +static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { + [ND_CMD_IMPLEMENTED] = { }, + [ND_CMD_ARS_CAP] = { + .in_num = 2, + .in_sizes = { 8, 8, }, + .out_num = 2, + .out_sizes = { 4, 4, }, + }, + [ND_CMD_ARS_START] = { + .in_num = 4, + .in_sizes = { 8, 8, 2, 6, }, + .out_num = 1, + .out_sizes = { 4, }, + }, + [ND_CMD_ARS_STATUS] = { + .out_num = 2, + .out_sizes = { 4, UINT_MAX, }, + }, +}; + +const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) +{ + if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs)) + return &__nd_cmd_bus_descs[cmd]; + return NULL; +} +EXPORT_SYMBOL_GPL(nd_cmd_bus_desc); + +u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, void *buf) +{ + if (idx >= desc->in_num) + return UINT_MAX; + + if (desc->in_sizes[idx] < UINT_MAX) + return desc->in_sizes[idx]; + + if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) { + struct nd_cmd_set_config_hdr *hdr = buf; + + return hdr->in_length; + } else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) { + struct nd_cmd_vendor_hdr *hdr = buf; + + return hdr->in_length; + } + + return UINT_MAX; +} +EXPORT_SYMBOL_GPL(nd_cmd_in_size); + +u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, const u32 *in_field, + const u32 *out_field) +{ + if (idx >= desc->out_num) + return UINT_MAX; + + if (desc->out_sizes[idx] < UINT_MAX) + return desc->out_sizes[idx]; + + if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1) + return in_field[1]; + else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) + return out_field[1]; + else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1) + return ND_CMD_ARS_STATUS_MAX; + + return UINT_MAX; +} +EXPORT_SYMBOL_GPL(nd_cmd_out_size); + +void wait_nvdimm_bus_probe_idle(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + do { + if (nvdimm_bus->probe_active == 0) + break; + nvdimm_bus_unlock(&nvdimm_bus->dev); + wait_event(nvdimm_bus->probe_wait, + nvdimm_bus->probe_active == 0); + nvdimm_bus_lock(&nvdimm_bus->dev); + } while (true); +} + +/* set_config requires an idle interleave set */ +static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd) +{ + struct nvdimm_bus *nvdimm_bus; + + if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) + return 0; + + nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); + wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); + + if (atomic_read(&nvdimm->busy)) + return -EBUSY; + return 0; +} + +static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, + int read_only, unsigned int ioctl_cmd, unsigned long arg) +{ + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + size_t buf_len = 0, in_len = 0, out_len = 0; + static char out_env[ND_CMD_MAX_ENVELOPE]; + static char in_env[ND_CMD_MAX_ENVELOPE]; + const struct nd_cmd_desc *desc = NULL; + unsigned int cmd = _IOC_NR(ioctl_cmd); + void __user *p = (void __user *) arg; + struct device *dev = &nvdimm_bus->dev; + const char *cmd_name, *dimm_name; + unsigned long dsm_mask; + void *buf; + int rc, i; + + if (nvdimm) { + desc = nd_cmd_dimm_desc(cmd); + cmd_name = nvdimm_cmd_name(cmd); + dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0; + dimm_name = dev_name(&nvdimm->dev); + } else { + desc = nd_cmd_bus_desc(cmd); + cmd_name = nvdimm_bus_cmd_name(cmd); + dsm_mask = nd_desc->dsm_mask; + dimm_name = "bus"; + } + + if (!desc || (desc->out_num + desc->in_num == 0) || + !test_bit(cmd, &dsm_mask)) + return -ENOTTY; + + /* fail write commands (when read-only) */ + if (read_only) + switch (ioctl_cmd) { + case ND_IOCTL_VENDOR: + case ND_IOCTL_SET_CONFIG_DATA: + case ND_IOCTL_ARS_START: + dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", + nvdimm ? nvdimm_cmd_name(cmd) + : nvdimm_bus_cmd_name(cmd)); + return -EPERM; + default: + break; + } + + /* process an input envelope */ + for (i = 0; i < desc->in_num; i++) { + u32 in_size, copy; + + in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env); + if (in_size == UINT_MAX) { + dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n", + __func__, dimm_name, cmd_name, i); + return -ENXIO; + } + if (!access_ok(VERIFY_READ, p + in_len, in_size)) + return -EFAULT; + if (in_len < sizeof(in_env)) + copy = min_t(u32, sizeof(in_env) - in_len, in_size); + else + copy = 0; + if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) + return -EFAULT; + in_len += in_size; + } + + /* process an output envelope */ + for (i = 0; i < desc->out_num; i++) { + u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, + (u32 *) in_env, (u32 *) out_env); + u32 copy; + + if (out_size == UINT_MAX) { + dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n", + __func__, dimm_name, cmd_name, i); + return -EFAULT; + } + if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size)) + return -EFAULT; + if (out_len < sizeof(out_env)) + copy = min_t(u32, sizeof(out_env) - out_len, out_size); + else + copy = 0; + if (copy && copy_from_user(&out_env[out_len], + p + in_len + out_len, copy)) + return -EFAULT; + out_len += out_size; + } + + buf_len = out_len + in_len; + if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len))) + return -EFAULT; + + if (buf_len > ND_IOCTL_MAX_BUFLEN) { + dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, + dimm_name, cmd_name, buf_len, + ND_IOCTL_MAX_BUFLEN); + return -EINVAL; + } + + buf = vmalloc(buf_len); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, p, buf_len)) { + rc = -EFAULT; + goto out; + } + + nvdimm_bus_lock(&nvdimm_bus->dev); + rc = nd_cmd_clear_to_send(nvdimm, cmd); + if (rc) + goto out_unlock; + + rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len); + if (rc < 0) + goto out_unlock; + if (copy_to_user(p, buf, buf_len)) + rc = -EFAULT; + out_unlock: + nvdimm_bus_unlock(&nvdimm_bus->dev); + out: + vfree(buf); + return rc; +} + +static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long id = (long) file->private_data; + int rc = -ENXIO, read_only; + struct nvdimm_bus *nvdimm_bus; + + read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + mutex_lock(&nvdimm_bus_list_mutex); + list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { + if (nvdimm_bus->id == id) { + rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg); + break; + } + } + mutex_unlock(&nvdimm_bus_list_mutex); + + return rc; +} + +static int match_dimm(struct device *dev, void *data) +{ + long id = (long) data; + + if (is_nvdimm(dev)) { + struct nvdimm *nvdimm = to_nvdimm(dev); + + return nvdimm->id == id; + } + + return 0; +} + +static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int rc = -ENXIO, read_only; + struct nvdimm_bus *nvdimm_bus; + + read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + mutex_lock(&nvdimm_bus_list_mutex); + list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { + struct device *dev = device_find_child(&nvdimm_bus->dev, + file->private_data, match_dimm); + struct nvdimm *nvdimm; + + if (!dev) + continue; + + nvdimm = to_nvdimm(dev); + rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg); + put_device(dev); + break; + } + mutex_unlock(&nvdimm_bus_list_mutex); + + return rc; +} + +static int nd_open(struct inode *inode, struct file *file) +{ + long minor = iminor(inode); + + file->private_data = (void *) minor; + return 0; +} + +static const struct file_operations nvdimm_bus_fops = { + .owner = THIS_MODULE, + .open = nd_open, + .unlocked_ioctl = nd_ioctl, + .compat_ioctl = nd_ioctl, + .llseek = noop_llseek, +}; + +static const struct file_operations nvdimm_fops = { + .owner = THIS_MODULE, + .open = nd_open, + .unlocked_ioctl = nvdimm_ioctl, + .compat_ioctl = nvdimm_ioctl, + .llseek = noop_llseek, +}; + +int __init nvdimm_bus_init(void) +{ + int rc; + + rc = bus_register(&nvdimm_bus_type); + if (rc) + return rc; + + rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops); + if (rc < 0) + goto err_bus_chrdev; + nvdimm_bus_major = rc; + + rc = register_chrdev(0, "dimmctl", &nvdimm_fops); + if (rc < 0) + goto err_dimm_chrdev; + nvdimm_major = rc; + + nd_class = class_create(THIS_MODULE, "nd"); + if (IS_ERR(nd_class)) + goto err_class; + + return 0; + + err_class: + unregister_chrdev(nvdimm_major, "dimmctl"); + err_dimm_chrdev: + unregister_chrdev(nvdimm_bus_major, "ndctl"); + err_bus_chrdev: + bus_unregister(&nvdimm_bus_type); + + return rc; +} + +void nvdimm_bus_exit(void) +{ + class_destroy(nd_class); + unregister_chrdev(nvdimm_bus_major, "ndctl"); + unregister_chrdev(nvdimm_major, "dimmctl"); + bus_unregister(&nvdimm_bus_type); +} diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c new file mode 100644 index 000000000000..cb62ec6a12d0 --- /dev/null +++ b/drivers/nvdimm/core.c @@ -0,0 +1,465 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/libnvdimm.h> +#include <linux/export.h> +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/ctype.h> +#include <linux/ndctl.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include "nd-core.h" +#include "nd.h" + +LIST_HEAD(nvdimm_bus_list); +DEFINE_MUTEX(nvdimm_bus_list_mutex); +static DEFINE_IDA(nd_ida); + +void nvdimm_bus_lock(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return; + mutex_lock(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(nvdimm_bus_lock); + +void nvdimm_bus_unlock(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return; + mutex_unlock(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(nvdimm_bus_unlock); + +bool is_nvdimm_bus_locked(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return false; + return mutex_is_locked(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(is_nvdimm_bus_locked); + +u64 nd_fletcher64(void *addr, size_t len, bool le) +{ + u32 *buf = addr; + u32 lo32 = 0; + u64 hi32 = 0; + int i; + + for (i = 0; i < len / sizeof(u32); i++) { + lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i]; + hi32 += lo32; + } + + return hi32 << 32 | lo32; +} +EXPORT_SYMBOL_GPL(nd_fletcher64); + +static void nvdimm_bus_release(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus; + + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); + ida_simple_remove(&nd_ida, nvdimm_bus->id); + kfree(nvdimm_bus); +} + +struct nvdimm_bus *to_nvdimm_bus(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus; + + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); + WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release); + return nvdimm_bus; +} +EXPORT_SYMBOL_GPL(to_nvdimm_bus); + +struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus) +{ + /* struct nvdimm_bus definition is private to libnvdimm */ + return nvdimm_bus->nd_desc; +} +EXPORT_SYMBOL_GPL(to_nd_desc); + +struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) +{ + struct device *dev; + + for (dev = nd_dev; dev; dev = dev->parent) + if (dev->release == nvdimm_bus_release) + break; + dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); + if (dev) + return to_nvdimm_bus(dev); + return NULL; +} + +static bool is_uuid_sep(char sep) +{ + if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0') + return true; + return false; +} + +static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf, + size_t len) +{ + const char *str = buf; + u8 uuid[16]; + int i; + + for (i = 0; i < 16; i++) { + if (!isxdigit(str[0]) || !isxdigit(str[1])) { + dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n", + __func__, i, str - buf, str[0], + str + 1 - buf, str[1]); + return -EINVAL; + } + + uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]); + str += 2; + if (is_uuid_sep(*str)) + str++; + } + + memcpy(uuid_out, uuid, sizeof(uuid)); + return 0; +} + +/** + * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes + * @dev: container device for the uuid property + * @uuid_out: uuid buffer to replace + * @buf: raw sysfs buffer to parse + * + * Enforce that uuids can only be changed while the device is disabled + * (driver detached) + * LOCKING: expects device_lock() is held on entry + */ +int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, + size_t len) +{ + u8 uuid[16]; + int rc; + + if (dev->driver) + return -EBUSY; + + rc = nd_uuid_parse(dev, uuid, buf, len); + if (rc) + return rc; + + kfree(*uuid_out); + *uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL); + if (!(*uuid_out)) + return -ENOMEM; + + return 0; +} + +ssize_t nd_sector_size_show(unsigned long current_lbasize, + const unsigned long *supported, char *buf) +{ + ssize_t len = 0; + int i; + + for (i = 0; supported[i]; i++) + if (current_lbasize == supported[i]) + len += sprintf(buf + len, "[%ld] ", supported[i]); + else + len += sprintf(buf + len, "%ld ", supported[i]); + len += sprintf(buf + len, "\n"); + return len; +} + +ssize_t nd_sector_size_store(struct device *dev, const char *buf, + unsigned long *current_lbasize, const unsigned long *supported) +{ + unsigned long lbasize; + int rc, i; + + if (dev->driver) + return -EBUSY; + + rc = kstrtoul(buf, 0, &lbasize); + if (rc) + return rc; + + for (i = 0; supported[i]; i++) + if (lbasize == supported[i]) + break; + + if (supported[i]) { + *current_lbasize = lbasize; + return 0; + } else { + return -EINVAL; + } +} + +void __nd_iostat_start(struct bio *bio, unsigned long *start) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + const int rw = bio_data_dir(bio); + int cpu = part_stat_lock(); + + *start = jiffies; + part_round_stats(cpu, &disk->part0); + part_stat_inc(cpu, &disk->part0, ios[rw]); + part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio)); + part_inc_in_flight(&disk->part0, rw); + part_stat_unlock(); +} +EXPORT_SYMBOL(__nd_iostat_start); + +void nd_iostat_end(struct bio *bio, unsigned long start) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + unsigned long duration = jiffies - start; + const int rw = bio_data_dir(bio); + int cpu = part_stat_lock(); + + part_stat_add(cpu, &disk->part0, ticks[rw], duration); + part_round_stats(cpu, &disk->part0); + part_dec_in_flight(&disk->part0, rw); + part_stat_unlock(); +} +EXPORT_SYMBOL(nd_iostat_end); + +static ssize_t commands_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cmd, len = 0; + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG) + len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd)); + len += sprintf(buf + len, "\n"); + return len; +} +static DEVICE_ATTR_RO(commands); + +static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus) +{ + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + struct device *parent = nvdimm_bus->dev.parent; + + if (nd_desc->provider_name) + return nd_desc->provider_name; + else if (parent) + return dev_name(parent); + else + return "unknown"; +} + +static ssize_t provider_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + + return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus)); +} +static DEVICE_ATTR_RO(provider); + +static int flush_namespaces(struct device *dev, void *data) +{ + device_lock(dev); + device_unlock(dev); + return 0; +} + +static int flush_regions_dimms(struct device *dev, void *data) +{ + device_lock(dev); + device_unlock(dev); + device_for_each_child(dev, NULL, flush_namespaces); + return 0; +} + +static ssize_t wait_probe_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + nd_synchronize(); + device_for_each_child(dev, NULL, flush_regions_dimms); + return sprintf(buf, "1\n"); +} +static DEVICE_ATTR_RO(wait_probe); + +static struct attribute *nvdimm_bus_attributes[] = { + &dev_attr_commands.attr, + &dev_attr_wait_probe.attr, + &dev_attr_provider.attr, + NULL, +}; + +struct attribute_group nvdimm_bus_attribute_group = { + .attrs = nvdimm_bus_attributes, +}; +EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); + +struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nd_desc, struct module *module) +{ + struct nvdimm_bus *nvdimm_bus; + int rc; + + nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); + if (!nvdimm_bus) + return NULL; + INIT_LIST_HEAD(&nvdimm_bus->list); + init_waitqueue_head(&nvdimm_bus->probe_wait); + nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); + mutex_init(&nvdimm_bus->reconfig_mutex); + if (nvdimm_bus->id < 0) { + kfree(nvdimm_bus); + return NULL; + } + nvdimm_bus->nd_desc = nd_desc; + nvdimm_bus->module = module; + nvdimm_bus->dev.parent = parent; + nvdimm_bus->dev.release = nvdimm_bus_release; + nvdimm_bus->dev.groups = nd_desc->attr_groups; + dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); + rc = device_register(&nvdimm_bus->dev); + if (rc) { + dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); + goto err; + } + + rc = nvdimm_bus_create_ndctl(nvdimm_bus); + if (rc) + goto err; + + mutex_lock(&nvdimm_bus_list_mutex); + list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); + mutex_unlock(&nvdimm_bus_list_mutex); + + return nvdimm_bus; + err: + put_device(&nvdimm_bus->dev); + return NULL; +} +EXPORT_SYMBOL_GPL(__nvdimm_bus_register); + +static int child_unregister(struct device *dev, void *data) +{ + /* + * the singular ndctl class device per bus needs to be + * "device_destroy"ed, so skip it here + * + * i.e. remove classless children + */ + if (dev->class) + /* pass */; + else + nd_device_unregister(dev, ND_SYNC); + return 0; +} + +void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) +{ + if (!nvdimm_bus) + return; + + mutex_lock(&nvdimm_bus_list_mutex); + list_del_init(&nvdimm_bus->list); + mutex_unlock(&nvdimm_bus_list_mutex); + + nd_synchronize(); + device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); + nvdimm_bus_destroy_ndctl(nvdimm_bus); + + device_unregister(&nvdimm_bus->dev); +} +EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nd_pi_nop_generate_verify(struct blk_integrity_iter *iter) +{ + return 0; +} + +int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) +{ + struct blk_integrity integrity = { + .name = "ND-PI-NOP", + .generate_fn = nd_pi_nop_generate_verify, + .verify_fn = nd_pi_nop_generate_verify, + .tuple_size = meta_size, + .tag_size = meta_size, + }; + int ret; + + if (meta_size == 0) + return 0; + + ret = blk_integrity_register(disk, &integrity); + if (ret) + return ret; + + blk_queue_max_integrity_segments(disk->queue, 1); + + return 0; +} +EXPORT_SYMBOL(nd_integrity_init); + +#else /* CONFIG_BLK_DEV_INTEGRITY */ +int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) +{ + return 0; +} +EXPORT_SYMBOL(nd_integrity_init); + +#endif + +static __init int libnvdimm_init(void) +{ + int rc; + + rc = nvdimm_bus_init(); + if (rc) + return rc; + rc = nvdimm_init(); + if (rc) + goto err_dimm; + rc = nd_region_init(); + if (rc) + goto err_region; + return 0; + err_region: + nvdimm_exit(); + err_dimm: + nvdimm_bus_exit(); + return rc; +} + +static __exit void libnvdimm_exit(void) +{ + WARN_ON(!list_empty(&nvdimm_bus_list)); + nd_region_exit(); + nvdimm_exit(); + nvdimm_bus_exit(); +} + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +subsys_initcall(libnvdimm_init); +module_exit(libnvdimm_exit); diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c new file mode 100644 index 000000000000..71d12bb67339 --- /dev/null +++ b/drivers/nvdimm/dimm.c @@ -0,0 +1,102 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/sizes.h> +#include <linux/ndctl.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/nd.h> +#include "label.h" +#include "nd.h" + +static int nvdimm_probe(struct device *dev) +{ + struct nvdimm_drvdata *ndd; + int rc; + + ndd = kzalloc(sizeof(*ndd), GFP_KERNEL); + if (!ndd) + return -ENOMEM; + + dev_set_drvdata(dev, ndd); + ndd->dpa.name = dev_name(dev); + ndd->ns_current = -1; + ndd->ns_next = -1; + ndd->dpa.start = 0; + ndd->dpa.end = -1; + ndd->dev = dev; + get_device(dev); + kref_init(&ndd->kref); + + rc = nvdimm_init_nsarea(ndd); + if (rc) + goto err; + + rc = nvdimm_init_config_data(ndd); + if (rc) + goto err; + + dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size); + + nvdimm_bus_lock(dev); + ndd->ns_current = nd_label_validate(ndd); + ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); + nd_label_copy(ndd, to_next_namespace_index(ndd), + to_current_namespace_index(ndd)); + rc = nd_label_reserve_dpa(ndd); + nvdimm_bus_unlock(dev); + + if (rc) + goto err; + + return 0; + + err: + put_ndd(ndd); + return rc; +} + +static int nvdimm_remove(struct device *dev) +{ + struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + + nvdimm_bus_lock(dev); + dev_set_drvdata(dev, NULL); + nvdimm_bus_unlock(dev); + put_ndd(ndd); + + return 0; +} + +static struct nd_device_driver nvdimm_driver = { + .probe = nvdimm_probe, + .remove = nvdimm_remove, + .drv = { + .name = "nvdimm", + }, + .type = ND_DRIVER_DIMM, +}; + +int __init nvdimm_init(void) +{ + return nd_driver_register(&nvdimm_driver); +} + +void nvdimm_exit(void) +{ + driver_unregister(&nvdimm_driver.drv); +} + +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DIMM); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c new file mode 100644 index 000000000000..c05eb807d674 --- /dev/null +++ b/drivers/nvdimm/dimm_devs.c @@ -0,0 +1,551 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/vmalloc.h> +#include <linux/device.h> +#include <linux/ndctl.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include "nd-core.h" +#include "label.h" +#include "nd.h" + +static DEFINE_IDA(dimm_ida); + +/* + * Retrieve bus and dimm handle and return if this bus supports + * get_config_data commands + */ +static int __validate_dimm(struct nvdimm_drvdata *ndd) +{ + struct nvdimm *nvdimm; + + if (!ndd) + return -EINVAL; + + nvdimm = to_nvdimm(ndd->dev); + + if (!nvdimm->dsm_mask) + return -ENXIO; + if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask)) + return -ENXIO; + + return 0; +} + +static int validate_dimm(struct nvdimm_drvdata *ndd) +{ + int rc = __validate_dimm(ndd); + + if (rc && ndd) + dev_dbg(ndd->dev, "%pf: %s error: %d\n", + __builtin_return_address(0), __func__, rc); + return rc; +} + +/** + * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area + * @nvdimm: dimm to initialize + */ +int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) +{ + struct nd_cmd_get_config_size *cmd = &ndd->nsarea; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nvdimm_bus_descriptor *nd_desc; + int rc = validate_dimm(ndd); + + if (rc) + return rc; + + if (cmd->config_size) + return 0; /* already valid */ + + memset(cmd, 0, sizeof(*cmd)); + nd_desc = nvdimm_bus->nd_desc; + return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), + ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd)); +} + +int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nd_cmd_get_config_data_hdr *cmd; + struct nvdimm_bus_descriptor *nd_desc; + int rc = validate_dimm(ndd); + u32 max_cmd_size, config_size; + size_t offset; + + if (rc) + return rc; + + if (ndd->data) + return 0; + + if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0 + || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) { + dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n", + ndd->nsarea.max_xfer, ndd->nsarea.config_size); + return -ENXIO; + } + + ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL); + if (!ndd->data) + ndd->data = vmalloc(ndd->nsarea.config_size); + + if (!ndd->data) + return -ENOMEM; + + max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer); + cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + nd_desc = nvdimm_bus->nd_desc; + for (config_size = ndd->nsarea.config_size, offset = 0; + config_size; config_size -= cmd->in_length, + offset += cmd->in_length) { + cmd->in_length = min(config_size, max_cmd_size); + cmd->in_offset = offset; + rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), + ND_CMD_GET_CONFIG_DATA, cmd, + cmd->in_length + sizeof(*cmd)); + if (rc || cmd->status) { + rc = -ENXIO; + break; + } + memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); + } + dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc); + kfree(cmd); + + return rc; +} + +int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, + void *buf, size_t len) +{ + int rc = validate_dimm(ndd); + size_t max_cmd_size, buf_offset; + struct nd_cmd_set_config_hdr *cmd; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + if (rc) + return rc; + + if (!ndd->data) + return -ENXIO; + + if (offset + len > ndd->nsarea.config_size) + return -ENXIO; + + max_cmd_size = min_t(u32, PAGE_SIZE, len); + max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer); + cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + for (buf_offset = 0; len; len -= cmd->in_length, + buf_offset += cmd->in_length) { + size_t cmd_size; + u32 *status; + + cmd->in_offset = offset + buf_offset; + cmd->in_length = min(max_cmd_size, len); + memcpy(cmd->in_buf, buf + buf_offset, cmd->in_length); + + /* status is output in the last 4-bytes of the command buffer */ + cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32); + status = ((void *) cmd) + cmd_size - sizeof(u32); + + rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), + ND_CMD_SET_CONFIG_DATA, cmd, cmd_size); + if (rc || *status) { + rc = rc ? rc : -ENXIO; + break; + } + } + kfree(cmd); + + return rc; +} + +static void nvdimm_release(struct device *dev) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + + ida_simple_remove(&dimm_ida, nvdimm->id); + kfree(nvdimm); +} + +static struct device_type nvdimm_device_type = { + .name = "nvdimm", + .release = nvdimm_release, +}; + +bool is_nvdimm(struct device *dev) +{ + return dev->type == &nvdimm_device_type; +} + +struct nvdimm *to_nvdimm(struct device *dev) +{ + struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev); + + WARN_ON(!is_nvdimm(dev)); + return nvdimm; +} +EXPORT_SYMBOL_GPL(to_nvdimm); + +struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr) +{ + struct nd_region *nd_region = &ndbr->nd_region; + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + + return nd_mapping->nvdimm; +} +EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm); + +struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping) +{ + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev)); + + return dev_get_drvdata(&nvdimm->dev); +} +EXPORT_SYMBOL(to_ndd); + +void nvdimm_drvdata_release(struct kref *kref) +{ + struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref); + struct device *dev = ndd->dev; + struct resource *res, *_r; + + dev_dbg(dev, "%s\n", __func__); + + nvdimm_bus_lock(dev); + for_each_dpa_resource_safe(ndd, res, _r) + nvdimm_free_dpa(ndd, res); + nvdimm_bus_unlock(dev); + + if (ndd->data && is_vmalloc_addr(ndd->data)) + vfree(ndd->data); + else + kfree(ndd->data); + kfree(ndd); + put_device(dev); +} + +void get_ndd(struct nvdimm_drvdata *ndd) +{ + kref_get(&ndd->kref); +} + +void put_ndd(struct nvdimm_drvdata *ndd) +{ + if (ndd) + kref_put(&ndd->kref, nvdimm_drvdata_release); +} + +const char *nvdimm_name(struct nvdimm *nvdimm) +{ + return dev_name(&nvdimm->dev); +} +EXPORT_SYMBOL_GPL(nvdimm_name); + +void *nvdimm_provider_data(struct nvdimm *nvdimm) +{ + if (nvdimm) + return nvdimm->provider_data; + return NULL; +} +EXPORT_SYMBOL_GPL(nvdimm_provider_data); + +static ssize_t commands_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + int cmd, len = 0; + + if (!nvdimm->dsm_mask) + return sprintf(buf, "\n"); + + for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG) + len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd)); + len += sprintf(buf + len, "\n"); + return len; +} +static DEVICE_ATTR_RO(commands); + +static ssize_t state_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + + /* + * The state may be in the process of changing, userspace should + * quiesce probing if it wants a static answer + */ + nvdimm_bus_lock(dev); + nvdimm_bus_unlock(dev); + return sprintf(buf, "%s\n", atomic_read(&nvdimm->busy) + ? "active" : "idle"); +} +static DEVICE_ATTR_RO(state); + +static ssize_t available_slots_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + ssize_t rc; + u32 nfree; + + if (!ndd) + return -ENXIO; + + nvdimm_bus_lock(dev); + nfree = nd_label_nfree(ndd); + if (nfree - 1 > nfree) { + dev_WARN_ONCE(dev, 1, "we ate our last label?\n"); + nfree = 0; + } else + nfree--; + rc = sprintf(buf, "%d\n", nfree); + nvdimm_bus_unlock(dev); + return rc; +} +static DEVICE_ATTR_RO(available_slots); + +static struct attribute *nvdimm_attributes[] = { + &dev_attr_state.attr, + &dev_attr_commands.attr, + &dev_attr_available_slots.attr, + NULL, +}; + +struct attribute_group nvdimm_attribute_group = { + .attrs = nvdimm_attributes, +}; +EXPORT_SYMBOL_GPL(nvdimm_attribute_group); + +struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, + const struct attribute_group **groups, unsigned long flags, + unsigned long *dsm_mask) +{ + struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); + struct device *dev; + + if (!nvdimm) + return NULL; + + nvdimm->id = ida_simple_get(&dimm_ida, 0, 0, GFP_KERNEL); + if (nvdimm->id < 0) { + kfree(nvdimm); + return NULL; + } + nvdimm->provider_data = provider_data; + nvdimm->flags = flags; + nvdimm->dsm_mask = dsm_mask; + atomic_set(&nvdimm->busy, 0); + dev = &nvdimm->dev; + dev_set_name(dev, "nmem%d", nvdimm->id); + dev->parent = &nvdimm_bus->dev; + dev->type = &nvdimm_device_type; + dev->devt = MKDEV(nvdimm_major, nvdimm->id); + dev->groups = groups; + nd_device_register(dev); + + return nvdimm; +} +EXPORT_SYMBOL_GPL(nvdimm_create); + +/** + * nd_blk_available_dpa - account the unused dpa of BLK region + * @nd_mapping: container of dpa-resource-root + labels + * + * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges. + */ +resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + resource_size_t map_end, busy = 0, available; + struct resource *res; + + if (!ndd) + return 0; + + map_end = nd_mapping->start + nd_mapping->size - 1; + for_each_dpa_resource(ndd, res) + if (res->start >= nd_mapping->start && res->start < map_end) { + resource_size_t end = min(map_end, res->end); + + busy += end - res->start + 1; + } else if (res->end >= nd_mapping->start + && res->end <= map_end) { + busy += res->end - nd_mapping->start; + } else if (nd_mapping->start > res->start + && nd_mapping->start < res->end) { + /* total eclipse of the BLK region mapping */ + busy += nd_mapping->size; + } + + available = map_end - nd_mapping->start + 1; + if (busy < available) + return available - busy; + return 0; +} + +/** + * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa + * @nd_mapping: container of dpa-resource-root + labels + * @nd_region: constrain available space check to this reference region + * @overlap: calculate available space assuming this level of overlap + * + * Validate that a PMEM label, if present, aligns with the start of an + * interleave set and truncate the available size at the lowest BLK + * overlap point. + * + * The expectation is that this routine is called multiple times as it + * probes for the largest BLK encroachment for any single member DIMM of + * the interleave set. Once that value is determined the PMEM-limit for + * the set can be established. + */ +resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, resource_size_t *overlap) +{ + resource_size_t map_start, map_end, busy = 0, available, blk_start; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + const char *reason; + + if (!ndd) + return 0; + + map_start = nd_mapping->start; + map_end = map_start + nd_mapping->size - 1; + blk_start = max(map_start, map_end + 1 - *overlap); + for_each_dpa_resource(ndd, res) + if (res->start >= map_start && res->start < map_end) { + if (strncmp(res->name, "blk", 3) == 0) + blk_start = min(blk_start, res->start); + else if (res->start != map_start) { + reason = "misaligned to iset"; + goto err; + } else { + if (busy) { + reason = "duplicate overlapping PMEM reservations?"; + goto err; + } + busy += resource_size(res); + continue; + } + } else if (res->end >= map_start && res->end <= map_end) { + if (strncmp(res->name, "blk", 3) == 0) { + /* + * If a BLK allocation overlaps the start of + * PMEM the entire interleave set may now only + * be used for BLK. + */ + blk_start = map_start; + } else { + reason = "misaligned to iset"; + goto err; + } + } else if (map_start > res->start && map_start < res->end) { + /* total eclipse of the mapping */ + busy += nd_mapping->size; + blk_start = map_start; + } + + *overlap = map_end + 1 - blk_start; + available = blk_start - map_start; + if (busy < available) + return available - busy; + return 0; + + err: + /* + * Something is wrong, PMEM must align with the start of the + * interleave set, and there can only be one allocation per set. + */ + nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason); + return 0; +} + +void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res) +{ + WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); + kfree(res->name); + __release_region(&ndd->dpa, res->start, resource_size(res)); +} + +struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id, resource_size_t start, + resource_size_t n) +{ + char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL); + struct resource *res; + + if (!name) + return NULL; + + WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); + res = __request_region(&ndd->dpa, start, n, name, 0); + if (!res) + kfree(name); + return res; +} + +/** + * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id + * @nvdimm: container of dpa-resource-root + labels + * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid> + */ +resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id) +{ + resource_size_t allocated = 0; + struct resource *res; + + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id->id) == 0) + allocated += resource_size(res); + + return allocated; +} + +static int count_dimms(struct device *dev, void *c) +{ + int *count = c; + + if (is_nvdimm(dev)) + (*count)++; + return 0; +} + +int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count) +{ + int count = 0; + /* Flush any possible dimm registration failures */ + nd_synchronize(); + + device_for_each_child(&nvdimm_bus->dev, &count, count_dimms); + dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count); + if (count != dimm_count) + return -ENXIO; + return 0; +} +EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c new file mode 100644 index 000000000000..96526dcfdd37 --- /dev/null +++ b/drivers/nvdimm/label.c @@ -0,0 +1,927 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/device.h> +#include <linux/ndctl.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/nd.h> +#include "nd-core.h" +#include "label.h" +#include "nd.h" + +static u32 best_seq(u32 a, u32 b) +{ + a &= NSINDEX_SEQ_MASK; + b &= NSINDEX_SEQ_MASK; + + if (a == 0 || a == b) + return b; + else if (b == 0) + return a; + else if (nd_inc_seq(a) == b) + return b; + else + return a; +} + +size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) +{ + u32 index_span; + + if (ndd->nsindex_size) + return ndd->nsindex_size; + + /* + * The minimum index space is 512 bytes, with that amount of + * index we can describe ~1400 labels which is less than a byte + * of overhead per label. Round up to a byte of overhead per + * label and determine the size of the index region. Yes, this + * starts to waste space at larger config_sizes, but it's + * unlikely we'll ever see anything but 128K. + */ + index_span = ndd->nsarea.config_size / 129; + index_span /= NSINDEX_ALIGN * 2; + ndd->nsindex_size = index_span * NSINDEX_ALIGN; + + return ndd->nsindex_size; +} + +int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) +{ + return ndd->nsarea.config_size / 129; +} + +int nd_label_validate(struct nvdimm_drvdata *ndd) +{ + /* + * On media label format consists of two index blocks followed + * by an array of labels. None of these structures are ever + * updated in place. A sequence number tracks the current + * active index and the next one to write, while labels are + * written to free slots. + * + * +------------+ + * | | + * | nsindex0 | + * | | + * +------------+ + * | | + * | nsindex1 | + * | | + * +------------+ + * | label0 | + * +------------+ + * | label1 | + * +------------+ + * | | + * ....nslot... + * | | + * +------------+ + * | labelN | + * +------------+ + */ + struct nd_namespace_index *nsindex[] = { + to_namespace_index(ndd, 0), + to_namespace_index(ndd, 1), + }; + const int num_index = ARRAY_SIZE(nsindex); + struct device *dev = ndd->dev; + bool valid[2] = { 0 }; + int i, num_valid = 0; + u32 seq; + + for (i = 0; i < num_index; i++) { + u32 nslot; + u8 sig[NSINDEX_SIG_LEN]; + u64 sum_save, sum, size; + + memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); + if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { + dev_dbg(dev, "%s: nsindex%d signature invalid\n", + __func__, i); + continue; + } + sum_save = __le64_to_cpu(nsindex[i]->checksum); + nsindex[i]->checksum = __cpu_to_le64(0); + sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1); + nsindex[i]->checksum = __cpu_to_le64(sum_save); + if (sum != sum_save) { + dev_dbg(dev, "%s: nsindex%d checksum invalid\n", + __func__, i); + continue; + } + + seq = __le32_to_cpu(nsindex[i]->seq); + if ((seq & NSINDEX_SEQ_MASK) == 0) { + dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n", + __func__, i, seq); + continue; + } + + /* sanity check the index against expected values */ + if (__le64_to_cpu(nsindex[i]->myoff) + != i * sizeof_namespace_index(ndd)) { + dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n", + __func__, i, (unsigned long long) + __le64_to_cpu(nsindex[i]->myoff)); + continue; + } + if (__le64_to_cpu(nsindex[i]->otheroff) + != (!i) * sizeof_namespace_index(ndd)) { + dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n", + __func__, i, (unsigned long long) + __le64_to_cpu(nsindex[i]->otheroff)); + continue; + } + + size = __le64_to_cpu(nsindex[i]->mysize); + if (size > sizeof_namespace_index(ndd) + || size < sizeof(struct nd_namespace_index)) { + dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n", + __func__, i, size); + continue; + } + + nslot = __le32_to_cpu(nsindex[i]->nslot); + if (nslot * sizeof(struct nd_namespace_label) + + 2 * sizeof_namespace_index(ndd) + > ndd->nsarea.config_size) { + dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n", + __func__, i, nslot, + ndd->nsarea.config_size); + continue; + } + valid[i] = true; + num_valid++; + } + + switch (num_valid) { + case 0: + break; + case 1: + for (i = 0; i < num_index; i++) + if (valid[i]) + return i; + /* can't have num_valid > 0 but valid[] = { false, false } */ + WARN_ON(1); + break; + default: + /* pick the best index... */ + seq = best_seq(__le32_to_cpu(nsindex[0]->seq), + __le32_to_cpu(nsindex[1]->seq)); + if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK)) + return 1; + else + return 0; + break; + } + + return -1; +} + +void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, + struct nd_namespace_index *src) +{ + if (dst && src) + /* pass */; + else + return; + + memcpy(dst, src, sizeof_namespace_index(ndd)); +} + +static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd) +{ + void *base = to_namespace_index(ndd, 0); + + return base + 2 * sizeof_namespace_index(ndd); +} + +static int to_slot(struct nvdimm_drvdata *ndd, + struct nd_namespace_label *nd_label) +{ + return nd_label - nd_label_base(ndd); +} + +#define for_each_clear_bit_le(bit, addr, size) \ + for ((bit) = find_next_zero_bit_le((addr), (size), 0); \ + (bit) < (size); \ + (bit) = find_next_zero_bit_le((addr), (size), (bit) + 1)) + +/** + * preamble_index - common variable initialization for nd_label_* routines + * @ndd: dimm container for the relevant label set + * @idx: namespace_index index + * @nsindex_out: on return set to the currently active namespace index + * @free: on return set to the free label bitmap in the index + * @nslot: on return set to the number of slots in the label space + */ +static bool preamble_index(struct nvdimm_drvdata *ndd, int idx, + struct nd_namespace_index **nsindex_out, + unsigned long **free, u32 *nslot) +{ + struct nd_namespace_index *nsindex; + + nsindex = to_namespace_index(ndd, idx); + if (nsindex == NULL) + return false; + + *free = (unsigned long *) nsindex->free; + *nslot = __le32_to_cpu(nsindex->nslot); + *nsindex_out = nsindex; + + return true; +} + +char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags) +{ + if (!label_id || !uuid) + return NULL; + snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb", + flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid); + return label_id->id; +} + +static bool preamble_current(struct nvdimm_drvdata *ndd, + struct nd_namespace_index **nsindex, + unsigned long **free, u32 *nslot) +{ + return preamble_index(ndd, ndd->ns_current, nsindex, + free, nslot); +} + +static bool preamble_next(struct nvdimm_drvdata *ndd, + struct nd_namespace_index **nsindex, + unsigned long **free, u32 *nslot) +{ + return preamble_index(ndd, ndd->ns_next, nsindex, + free, nslot); +} + +static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot) +{ + /* check that we are written where we expect to be written */ + if (slot != __le32_to_cpu(nd_label->slot)) + return false; + + /* check that DPA allocations are page aligned */ + if ((__le64_to_cpu(nd_label->dpa) + | __le64_to_cpu(nd_label->rawsize)) % SZ_4K) + return false; + + return true; +} + +int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return 0; /* no label, nothing to reserve */ + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + struct nd_region *nd_region = NULL; + u8 label_uuid[NSLABEL_UUID_LEN]; + struct nd_label_id label_id; + struct resource *res; + u32 flags; + + nd_label = nd_label_base(ndd) + slot; + + if (!slot_valid(nd_label, slot)) + continue; + + memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); + flags = __le32_to_cpu(nd_label->flags); + nd_label_gen_id(&label_id, label_uuid, flags); + res = nvdimm_allocate_dpa(ndd, &label_id, + __le64_to_cpu(nd_label->dpa), + __le64_to_cpu(nd_label->rawsize)); + nd_dbg_dpa(nd_region, ndd, res, "reserve\n"); + if (!res) + return -EBUSY; + } + + return 0; +} + +int nd_label_active_count(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + int count = 0; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return 0; + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + + nd_label = nd_label_base(ndd) + slot; + + if (!slot_valid(nd_label, slot)) { + u32 label_slot = __le32_to_cpu(nd_label->slot); + u64 size = __le64_to_cpu(nd_label->rawsize); + u64 dpa = __le64_to_cpu(nd_label->dpa); + + dev_dbg(ndd->dev, + "%s: slot%d invalid slot: %d dpa: %llx size: %llx\n", + __func__, slot, label_slot, dpa, size); + continue; + } + count++; + } + return count; +} + +struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return NULL; + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + + nd_label = nd_label_base(ndd) + slot; + if (!slot_valid(nd_label, slot)) + continue; + + if (n-- == 0) + return nd_label_base(ndd) + slot; + } + + return NULL; +} + +u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return UINT_MAX; + + WARN_ON(!is_nvdimm_bus_locked(ndd->dev)); + + slot = find_next_bit_le(free, nslot, 0); + if (slot == nslot) + return UINT_MAX; + + clear_bit_le(slot, free); + + return slot; +} + +bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot; + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return false; + + WARN_ON(!is_nvdimm_bus_locked(ndd->dev)); + + if (slot < nslot) + return !test_and_set_bit_le(slot, free); + return false; +} + +u32 nd_label_nfree(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot; + + WARN_ON(!is_nvdimm_bus_locked(ndd->dev)); + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return nvdimm_num_label_slots(ndd); + + return bitmap_weight(free, nslot); +} + +static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq, + unsigned long flags) +{ + struct nd_namespace_index *nsindex; + unsigned long offset; + u64 checksum; + u32 nslot; + int rc; + + nsindex = to_namespace_index(ndd, index); + if (flags & ND_NSINDEX_INIT) + nslot = nvdimm_num_label_slots(ndd); + else + nslot = __le32_to_cpu(nsindex->nslot); + + memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); + nsindex->flags = __cpu_to_le32(0); + nsindex->seq = __cpu_to_le32(seq); + offset = (unsigned long) nsindex + - (unsigned long) to_namespace_index(ndd, 0); + nsindex->myoff = __cpu_to_le64(offset); + nsindex->mysize = __cpu_to_le64(sizeof_namespace_index(ndd)); + offset = (unsigned long) to_namespace_index(ndd, + nd_label_next_nsindex(index)) + - (unsigned long) to_namespace_index(ndd, 0); + nsindex->otheroff = __cpu_to_le64(offset); + offset = (unsigned long) nd_label_base(ndd) + - (unsigned long) to_namespace_index(ndd, 0); + nsindex->labeloff = __cpu_to_le64(offset); + nsindex->nslot = __cpu_to_le32(nslot); + nsindex->major = __cpu_to_le16(1); + nsindex->minor = __cpu_to_le16(1); + nsindex->checksum = __cpu_to_le64(0); + if (flags & ND_NSINDEX_INIT) { + unsigned long *free = (unsigned long *) nsindex->free; + u32 nfree = ALIGN(nslot, BITS_PER_LONG); + int last_bits, i; + + memset(nsindex->free, 0xff, nfree / 8); + for (i = 0, last_bits = nfree - nslot; i < last_bits; i++) + clear_bit_le(nslot + i, free); + } + checksum = nd_fletcher64(nsindex, sizeof_namespace_index(ndd), 1); + nsindex->checksum = __cpu_to_le64(checksum); + rc = nvdimm_set_config_data(ndd, __le64_to_cpu(nsindex->myoff), + nsindex, sizeof_namespace_index(ndd)); + if (rc < 0) + return rc; + + if (flags & ND_NSINDEX_INIT) + return 0; + + /* copy the index we just wrote to the new 'next' */ + WARN_ON(index != ndd->ns_next); + nd_label_copy(ndd, to_current_namespace_index(ndd), nsindex); + ndd->ns_current = nd_label_next_nsindex(ndd->ns_current); + ndd->ns_next = nd_label_next_nsindex(ndd->ns_next); + WARN_ON(ndd->ns_current == ndd->ns_next); + + return 0; +} + +static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd, + struct nd_namespace_label *nd_label) +{ + return (unsigned long) nd_label + - (unsigned long) to_namespace_index(ndd, 0); +} + +static int __pmem_label_update(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, + int pos) +{ + u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_label *victim_label; + struct nd_namespace_label *nd_label; + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + size_t offset; + int rc; + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return -ENXIO; + + /* allocate and write the label to the staging (next) index */ + slot = nd_label_alloc_slot(ndd); + if (slot == UINT_MAX) + return -ENXIO; + dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); + + nd_label = nd_label_base(ndd) + slot; + memset(nd_label, 0, sizeof(struct nd_namespace_label)); + memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN); + if (nspm->alt_name) + memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN); + nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING); + nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings); + nd_label->position = __cpu_to_le16(pos); + nd_label->isetcookie = __cpu_to_le64(cookie); + rawsize = div_u64(resource_size(&nspm->nsio.res), + nd_region->ndr_mappings); + nd_label->rawsize = __cpu_to_le64(rawsize); + nd_label->dpa = __cpu_to_le64(nd_mapping->start); + nd_label->slot = __cpu_to_le32(slot); + + /* update label */ + offset = nd_label_offset(ndd, nd_label); + rc = nvdimm_set_config_data(ndd, offset, nd_label, + sizeof(struct nd_namespace_label)); + if (rc < 0) + return rc; + + /* Garbage collect the previous label */ + victim_label = nd_mapping->labels[0]; + if (victim_label) { + slot = to_slot(ndd, victim_label); + nd_label_free_slot(ndd, slot); + dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + } + + /* update index */ + rc = nd_label_write_index(ndd, ndd->ns_next, + nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0); + if (rc < 0) + return rc; + + nd_mapping->labels[0] = nd_label; + + return 0; +} + +static void del_label(struct nd_mapping *nd_mapping, int l) +{ + struct nd_namespace_label *next_label, *nd_label; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + unsigned int slot; + int j; + + nd_label = nd_mapping->labels[l]; + slot = to_slot(ndd, nd_label); + dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot); + + for (j = l; (next_label = nd_mapping->labels[j + 1]); j++) + nd_mapping->labels[j] = next_label; + nd_mapping->labels[j] = NULL; +} + +static bool is_old_resource(struct resource *res, struct resource **list, int n) +{ + int i; + + if (res->flags & DPA_RESOURCE_ADJUSTED) + return false; + for (i = 0; i < n; i++) + if (res == list[i]) + return true; + return false; +} + +static struct resource *to_resource(struct nvdimm_drvdata *ndd, + struct nd_namespace_label *nd_label) +{ + struct resource *res; + + for_each_dpa_resource(ndd, res) { + if (res->start != __le64_to_cpu(nd_label->dpa)) + continue; + if (resource_size(res) != __le64_to_cpu(nd_label->rawsize)) + continue; + return res; + } + + return NULL; +} + +/* + * 1/ Account all the labels that can be freed after this update + * 2/ Allocate and write the label to the staging (next) index + * 3/ Record the resources in the namespace device + */ +static int __blk_label_update(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk, + int num_labels) +{ + int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_label *nd_label; + struct nd_namespace_index *nsindex; + unsigned long *free, *victim_map = NULL; + struct resource *res, **old_res_list; + struct nd_label_id label_id; + u8 uuid[NSLABEL_UUID_LEN]; + u32 nslot, slot; + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return -ENXIO; + + old_res_list = nsblk->res; + nfree = nd_label_nfree(ndd); + old_num_resources = nsblk->num_resources; + nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL); + + /* + * We need to loop over the old resources a few times, which seems a + * bit inefficient, but we need to know that we have the label + * space before we start mutating the tracking structures. + * Otherwise the recovery method of last resort for userspace is + * disable and re-enable the parent region. + */ + alloc = 0; + for_each_dpa_resource(ndd, res) { + if (strcmp(res->name, label_id.id) != 0) + continue; + if (!is_old_resource(res, old_res_list, old_num_resources)) + alloc++; + } + + victims = 0; + if (old_num_resources) { + /* convert old local-label-map to dimm-slot victim-map */ + victim_map = kcalloc(BITS_TO_LONGS(nslot), sizeof(long), + GFP_KERNEL); + if (!victim_map) + return -ENOMEM; + + /* mark unused labels for garbage collection */ + for_each_clear_bit_le(slot, free, nslot) { + nd_label = nd_label_base(ndd) + slot; + memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); + if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) + continue; + res = to_resource(ndd, nd_label); + if (res && is_old_resource(res, old_res_list, + old_num_resources)) + continue; + slot = to_slot(ndd, nd_label); + set_bit(slot, victim_map); + victims++; + } + } + + /* don't allow updates that consume the last label */ + if (nfree - alloc < 0 || nfree - alloc + victims < 1) { + dev_info(&nsblk->common.dev, "insufficient label space\n"); + kfree(victim_map); + return -ENOSPC; + } + /* from here on we need to abort on error */ + + + /* assign all resources to the namespace before writing the labels */ + nsblk->res = NULL; + nsblk->num_resources = 0; + for_each_dpa_resource(ndd, res) { + if (strcmp(res->name, label_id.id) != 0) + continue; + if (!nsblk_add_resource(nd_region, ndd, nsblk, res->start)) { + rc = -ENOMEM; + goto abort; + } + } + + for (i = 0; i < nsblk->num_resources; i++) { + size_t offset; + + res = nsblk->res[i]; + if (is_old_resource(res, old_res_list, old_num_resources)) + continue; /* carry-over */ + slot = nd_label_alloc_slot(ndd); + if (slot == UINT_MAX) + goto abort; + dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); + + nd_label = nd_label_base(ndd) + slot; + memset(nd_label, 0, sizeof(struct nd_namespace_label)); + memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN); + if (nsblk->alt_name) + memcpy(nd_label->name, nsblk->alt_name, + NSLABEL_NAME_LEN); + nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL); + nd_label->nlabel = __cpu_to_le16(0); /* N/A */ + nd_label->position = __cpu_to_le16(0); /* N/A */ + nd_label->isetcookie = __cpu_to_le64(0); /* N/A */ + nd_label->dpa = __cpu_to_le64(res->start); + nd_label->rawsize = __cpu_to_le64(resource_size(res)); + nd_label->lbasize = __cpu_to_le64(nsblk->lbasize); + nd_label->slot = __cpu_to_le32(slot); + + /* update label */ + offset = nd_label_offset(ndd, nd_label); + rc = nvdimm_set_config_data(ndd, offset, nd_label, + sizeof(struct nd_namespace_label)); + if (rc < 0) + goto abort; + } + + /* free up now unused slots in the new index */ + for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) { + dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + nd_label_free_slot(ndd, slot); + } + + /* update index */ + rc = nd_label_write_index(ndd, ndd->ns_next, + nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0); + if (rc) + goto abort; + + /* + * Now that the on-dimm labels are up to date, fix up the tracking + * entries in nd_mapping->labels + */ + nlabel = 0; + for_each_label(l, nd_label, nd_mapping->labels) { + nlabel++; + memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); + if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) + continue; + nlabel--; + del_label(nd_mapping, l); + l--; /* retry with the new label at this index */ + } + if (nlabel + nsblk->num_resources > num_labels) { + /* + * Bug, we can't end up with more resources than + * available labels + */ + WARN_ON_ONCE(1); + rc = -ENXIO; + goto out; + } + + for_each_clear_bit_le(slot, free, nslot) { + nd_label = nd_label_base(ndd) + slot; + memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); + if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) + continue; + res = to_resource(ndd, nd_label); + res->flags &= ~DPA_RESOURCE_ADJUSTED; + dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n", + l, slot); + nd_mapping->labels[l++] = nd_label; + } + nd_mapping->labels[l] = NULL; + + out: + kfree(old_res_list); + kfree(victim_map); + return rc; + + abort: + /* + * 1/ repair the allocated label bitmap in the index + * 2/ restore the resource list + */ + nd_label_copy(ndd, nsindex, to_current_namespace_index(ndd)); + kfree(nsblk->res); + nsblk->res = old_res_list; + nsblk->num_resources = old_num_resources; + old_res_list = NULL; + goto out; +} + +static int init_labels(struct nd_mapping *nd_mapping, int num_labels) +{ + int i, l, old_num_labels = 0; + struct nd_namespace_index *nsindex; + struct nd_namespace_label *nd_label; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *); + + for_each_label(l, nd_label, nd_mapping->labels) + old_num_labels++; + + /* + * We need to preserve all the old labels for the mapping so + * they can be garbage collected after writing the new labels. + */ + if (num_labels > old_num_labels) { + struct nd_namespace_label **labels; + + labels = krealloc(nd_mapping->labels, size, GFP_KERNEL); + if (!labels) + return -ENOMEM; + nd_mapping->labels = labels; + } + if (!nd_mapping->labels) + return -ENOMEM; + + for (i = old_num_labels; i <= num_labels; i++) + nd_mapping->labels[i] = NULL; + + if (ndd->ns_current == -1 || ndd->ns_next == -1) + /* pass */; + else + return max(num_labels, old_num_labels); + + nsindex = to_namespace_index(ndd, 0); + memset(nsindex, 0, ndd->nsarea.config_size); + for (i = 0; i < 2; i++) { + int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT); + + if (rc) + return rc; + } + ndd->ns_next = 1; + ndd->ns_current = 0; + + return max(num_labels, old_num_labels); +} + +static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_label *nd_label; + struct nd_namespace_index *nsindex; + u8 label_uuid[NSLABEL_UUID_LEN]; + int l, num_freed = 0; + unsigned long *free; + u32 nslot, slot; + + if (!uuid) + return 0; + + /* no index || no labels == nothing to delete */ + if (!preamble_next(ndd, &nsindex, &free, &nslot) + || !nd_mapping->labels) + return 0; + + for_each_label(l, nd_label, nd_mapping->labels) { + memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); + if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0) + continue; + slot = to_slot(ndd, nd_label); + nd_label_free_slot(ndd, slot); + dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + del_label(nd_mapping, l); + num_freed++; + l--; /* retry with new label at this index */ + } + + if (num_freed > l) { + /* + * num_freed will only ever be > l when we delete the last + * label + */ + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + dev_dbg(ndd->dev, "%s: no more labels\n", __func__); + } + + return nd_label_write_index(ndd, ndd->ns_next, + nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0); +} + +int nd_pmem_namespace_label_update(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm, resource_size_t size) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + int rc; + + if (size == 0) { + rc = del_labels(nd_mapping, nspm->uuid); + if (rc) + return rc; + continue; + } + + rc = init_labels(nd_mapping, 1); + if (rc < 0) + return rc; + + rc = __pmem_label_update(nd_region, nd_mapping, nspm, i); + if (rc) + return rc; + } + + return 0; +} + +int nd_blk_namespace_label_update(struct nd_region *nd_region, + struct nd_namespace_blk *nsblk, resource_size_t size) +{ + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct resource *res; + int count = 0; + + if (size == 0) + return del_labels(nd_mapping, nsblk->uuid); + + for_each_dpa_resource(to_ndd(nd_mapping), res) + count++; + + count = init_labels(nd_mapping, count); + if (count < 0) + return count; + + return __blk_label_update(nd_region, nd_mapping, nsblk, count); +} diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h new file mode 100644 index 000000000000..a59ef6eef2a3 --- /dev/null +++ b/drivers/nvdimm/label.h @@ -0,0 +1,141 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LABEL_H__ +#define __LABEL_H__ + +#include <linux/ndctl.h> +#include <linux/sizes.h> +#include <linux/io.h> + +enum { + NSINDEX_SIG_LEN = 16, + NSINDEX_ALIGN = 256, + NSINDEX_SEQ_MASK = 0x3, + NSLABEL_UUID_LEN = 16, + NSLABEL_NAME_LEN = 64, + NSLABEL_FLAG_ROLABEL = 0x1, /* read-only label */ + NSLABEL_FLAG_LOCAL = 0x2, /* DIMM-local namespace */ + NSLABEL_FLAG_BTT = 0x4, /* namespace contains a BTT */ + NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */ + BTT_ALIGN = 4096, /* all btt structures */ + BTTINFO_SIG_LEN = 16, + BTTINFO_UUID_LEN = 16, + BTTINFO_FLAG_ERROR = 0x1, /* error state (read-only) */ + BTTINFO_MAJOR_VERSION = 1, + ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */ + ND_LABEL_ID_SIZE = 50, + ND_NSINDEX_INIT = 0x1, +}; + +static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; + +/** + * struct nd_namespace_index - label set superblock + * @sig: NAMESPACE_INDEX\0 + * @flags: placeholder + * @seq: sequence number for this index + * @myoff: offset of this index in label area + * @mysize: size of this index struct + * @otheroff: offset of other index + * @labeloff: offset of first label slot + * @nslot: total number of label slots + * @major: label area major version + * @minor: label area minor version + * @checksum: fletcher64 of all fields + * @free[0]: bitmap, nlabel bits + * + * The size of free[] is rounded up so the total struct size is a + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond + * nlabel bits must be zero. + */ +struct nd_namespace_index { + u8 sig[NSINDEX_SIG_LEN]; + __le32 flags; + __le32 seq; + __le64 myoff; + __le64 mysize; + __le64 otheroff; + __le64 labeloff; + __le32 nslot; + __le16 major; + __le16 minor; + __le64 checksum; + u8 free[0]; +}; + +/** + * struct nd_namespace_label - namespace superblock + * @uuid: UUID per RFC 4122 + * @name: optional name (NULL-terminated) + * @flags: see NSLABEL_FLAG_* + * @nlabel: num labels to describe this ns + * @position: labels position in set + * @isetcookie: interleave set cookie + * @lbasize: LBA size in bytes or 0 for pmem + * @dpa: DPA of NVM range on this DIMM + * @rawsize: size of namespace + * @slot: slot of this label in label area + * @unused: must be zero + */ +struct nd_namespace_label { + u8 uuid[NSLABEL_UUID_LEN]; + u8 name[NSLABEL_NAME_LEN]; + __le32 flags; + __le16 nlabel; + __le16 position; + __le64 isetcookie; + __le64 lbasize; + __le64 dpa; + __le64 rawsize; + __le32 slot; + __le32 unused; +}; + +/** + * struct nd_label_id - identifier string for dpa allocation + * @id: "{blk|pmem}-<namespace uuid>" + */ +struct nd_label_id { + char id[ND_LABEL_ID_SIZE]; +}; + +/* + * If the 'best' index is invalid, so is the 'next' index. Otherwise, + * the next index is MOD(index+1, 2) + */ +static inline int nd_label_next_nsindex(int index) +{ + if (index < 0) + return -1; + + return (index + 1) % 2; +} + +struct nvdimm_drvdata; +int nd_label_validate(struct nvdimm_drvdata *ndd); +void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, + struct nd_namespace_index *src); +size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd); +int nd_label_active_count(struct nvdimm_drvdata *ndd); +struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n); +u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd); +bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot); +u32 nd_label_nfree(struct nvdimm_drvdata *ndd); +struct nd_region; +struct nd_namespace_pmem; +struct nd_namespace_blk; +int nd_pmem_namespace_label_update(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm, resource_size_t size); +int nd_blk_namespace_label_update(struct nd_region *nd_region, + struct nd_namespace_blk *nsblk, resource_size_t size); +#endif /* __LABEL_H__ */ diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c new file mode 100644 index 000000000000..fef0dd80d4ad --- /dev/null +++ b/drivers/nvdimm/namespace_devs.c @@ -0,0 +1,1870 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/module.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/nd.h> +#include "nd-core.h" +#include "nd.h" + +static void namespace_io_release(struct device *dev) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + kfree(nsio); +} + +static void namespace_pmem_release(struct device *dev) +{ + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + kfree(nspm->alt_name); + kfree(nspm->uuid); + kfree(nspm); +} + +static void namespace_blk_release(struct device *dev) +{ + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + struct nd_region *nd_region = to_nd_region(dev->parent); + + if (nsblk->id >= 0) + ida_simple_remove(&nd_region->ns_ida, nsblk->id); + kfree(nsblk->alt_name); + kfree(nsblk->uuid); + kfree(nsblk->res); + kfree(nsblk); +} + +static struct device_type namespace_io_device_type = { + .name = "nd_namespace_io", + .release = namespace_io_release, +}; + +static struct device_type namespace_pmem_device_type = { + .name = "nd_namespace_pmem", + .release = namespace_pmem_release, +}; + +static struct device_type namespace_blk_device_type = { + .name = "nd_namespace_blk", + .release = namespace_blk_release, +}; + +static bool is_namespace_pmem(struct device *dev) +{ + return dev ? dev->type == &namespace_pmem_device_type : false; +} + +static bool is_namespace_blk(struct device *dev) +{ + return dev ? dev->type == &namespace_blk_device_type : false; +} + +static bool is_namespace_io(struct device *dev) +{ + return dev ? dev->type == &namespace_io_device_type : false; +} + +const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, + char *name) +{ + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + const char *suffix = ""; + + if (ndns->claim && is_nd_btt(ndns->claim)) + suffix = "s"; + + if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) + sprintf(name, "pmem%d%s", nd_region->id, suffix); + else if (is_namespace_blk(&ndns->dev)) { + struct nd_namespace_blk *nsblk; + + nsblk = to_nd_namespace_blk(&ndns->dev); + sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix); + } else { + return NULL; + } + + return name; +} +EXPORT_SYMBOL(nvdimm_namespace_disk_name); + +static ssize_t nstype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + + return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region)); +} +static DEVICE_ATTR_RO(nstype); + +static ssize_t __alt_name_store(struct device *dev, const char *buf, + const size_t len) +{ + char *input, *pos, *alt_name, **ns_altname; + ssize_t rc; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_altname = &nspm->alt_name; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + ns_altname = &nsblk->alt_name; + } else + return -ENXIO; + + if (dev->driver || to_ndns(dev)->claim) + return -EBUSY; + + input = kmemdup(buf, len + 1, GFP_KERNEL); + if (!input) + return -ENOMEM; + + input[len] = '\0'; + pos = strim(input); + if (strlen(pos) + 1 > NSLABEL_NAME_LEN) { + rc = -EINVAL; + goto out; + } + + alt_name = kzalloc(NSLABEL_NAME_LEN, GFP_KERNEL); + if (!alt_name) { + rc = -ENOMEM; + goto out; + } + kfree(*ns_altname); + *ns_altname = alt_name; + sprintf(*ns_altname, "%s", pos); + rc = len; + +out: + kfree(input); + return rc; +} + +static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk) +{ + struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent); + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_label_id label_id; + resource_size_t size = 0; + struct resource *res; + + if (!nsblk->uuid) + return 0; + nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL); + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id.id) == 0) + size += resource_size(res); + return size; +} + +static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk) +{ + struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent); + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_label_id label_id; + struct resource *res; + int count, i; + + if (!nsblk->uuid || !nsblk->lbasize || !ndd) + return false; + + count = 0; + nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL); + for_each_dpa_resource(ndd, res) { + if (strcmp(res->name, label_id.id) != 0) + continue; + /* + * Resources with unacknoweldged adjustments indicate a + * failure to update labels + */ + if (res->flags & DPA_RESOURCE_ADJUSTED) + return false; + count++; + } + + /* These values match after a successful label update */ + if (count != nsblk->num_resources) + return false; + + for (i = 0; i < nsblk->num_resources; i++) { + struct resource *found = NULL; + + for_each_dpa_resource(ndd, res) + if (res == nsblk->res[i]) { + found = res; + break; + } + /* stale resource */ + if (!found) + return false; + } + + return true; +} + +resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk) +{ + resource_size_t size; + + nvdimm_bus_lock(&nsblk->common.dev); + size = __nd_namespace_blk_validate(nsblk); + nvdimm_bus_unlock(&nsblk->common.dev); + + return size; +} +EXPORT_SYMBOL(nd_namespace_blk_validate); + + +static int nd_namespace_label_update(struct nd_region *nd_region, + struct device *dev) +{ + dev_WARN_ONCE(dev, dev->driver || to_ndns(dev)->claim, + "namespace must be idle during label update\n"); + if (dev->driver || to_ndns(dev)->claim) + return 0; + + /* + * Only allow label writes that will result in a valid namespace + * or deletion of an existing namespace. + */ + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + resource_size_t size = resource_size(&nspm->nsio.res); + + if (size == 0 && nspm->uuid) + /* delete allocation */; + else if (!nspm->uuid) + return 0; + + return nd_pmem_namespace_label_update(nd_region, nspm, size); + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + resource_size_t size = nd_namespace_blk_size(nsblk); + + if (size == 0 && nsblk->uuid) + /* delete allocation */; + else if (!nsblk->uuid || !nsblk->lbasize) + return 0; + + return nd_blk_namespace_label_update(nd_region, nsblk, size); + } else + return -ENXIO; +} + +static ssize_t alt_name_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + ssize_t rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = __alt_name_store(dev, buf, len); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc < 0 ? rc : len; +} + +static ssize_t alt_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + char *ns_altname; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_altname = nspm->alt_name; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + ns_altname = nsblk->alt_name; + } else + return -ENXIO; + + return sprintf(buf, "%s\n", ns_altname ? ns_altname : ""); +} +static DEVICE_ATTR_RW(alt_name); + +static int scan_free(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_label_id *label_id, + resource_size_t n) +{ + bool is_blk = strncmp(label_id->id, "blk", 3) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + int rc = 0; + + while (n) { + struct resource *res, *last; + resource_size_t new_start; + + last = NULL; + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id->id) == 0) + last = res; + res = last; + if (!res) + return 0; + + if (n >= resource_size(res)) { + n -= resource_size(res); + nd_dbg_dpa(nd_region, ndd, res, "delete %d\n", rc); + nvdimm_free_dpa(ndd, res); + /* retry with last resource deleted */ + continue; + } + + /* + * Keep BLK allocations relegated to high DPA as much as + * possible + */ + if (is_blk) + new_start = res->start + n; + else + new_start = res->start; + + rc = adjust_resource(res, new_start, resource_size(res) - n); + if (rc == 0) + res->flags |= DPA_RESOURCE_ADJUSTED; + nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc); + break; + } + + return rc; +} + +/** + * shrink_dpa_allocation - for each dimm in region free n bytes for label_id + * @nd_region: the set of dimms to reclaim @n bytes from + * @label_id: unique identifier for the namespace consuming this dpa range + * @n: number of bytes per-dimm to release + * + * Assumes resources are ordered. Starting from the end try to + * adjust_resource() the allocation to @n, but if @n is larger than the + * allocation delete it and find the 'new' last allocation in the label + * set. + */ +static int shrink_dpa_allocation(struct nd_region *nd_region, + struct nd_label_id *label_id, resource_size_t n) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + int rc; + + rc = scan_free(nd_region, nd_mapping, label_id, n); + if (rc) + return rc; + } + + return 0; +} + +static resource_size_t init_dpa_allocation(struct nd_label_id *label_id, + struct nd_region *nd_region, struct nd_mapping *nd_mapping, + resource_size_t n) +{ + bool is_blk = strncmp(label_id->id, "blk", 3) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + resource_size_t first_dpa; + struct resource *res; + int rc = 0; + + /* allocate blk from highest dpa first */ + if (is_blk) + first_dpa = nd_mapping->start + nd_mapping->size - n; + else + first_dpa = nd_mapping->start; + + /* first resource allocation for this label-id or dimm */ + res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n); + if (!res) + rc = -EBUSY; + + nd_dbg_dpa(nd_region, ndd, res, "init %d\n", rc); + return rc ? n : 0; +} + +static bool space_valid(bool is_pmem, bool is_reserve, + struct nd_label_id *label_id, struct resource *res) +{ + /* + * For BLK-space any space is valid, for PMEM-space, it must be + * contiguous with an existing allocation unless we are + * reserving pmem. + */ + if (is_reserve || !is_pmem) + return true; + if (!res || strcmp(res->name, label_id->id) == 0) + return true; + return false; +} + +enum alloc_loc { + ALLOC_ERR = 0, ALLOC_BEFORE, ALLOC_MID, ALLOC_AFTER, +}; + +static resource_size_t scan_allocate(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_label_id *label_id, + resource_size_t n) +{ + resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1; + bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0; + bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + const resource_size_t to_allocate = n; + struct resource *res; + int first; + + retry: + first = 0; + for_each_dpa_resource(ndd, res) { + resource_size_t allocate, available = 0, free_start, free_end; + struct resource *next = res->sibling, *new_res = NULL; + enum alloc_loc loc = ALLOC_ERR; + const char *action; + int rc = 0; + + /* ignore resources outside this nd_mapping */ + if (res->start > mapping_end) + continue; + if (res->end < nd_mapping->start) + continue; + + /* space at the beginning of the mapping */ + if (!first++ && res->start > nd_mapping->start) { + free_start = nd_mapping->start; + available = res->start - free_start; + if (space_valid(is_pmem, is_reserve, label_id, NULL)) + loc = ALLOC_BEFORE; + } + + /* space between allocations */ + if (!loc && next) { + free_start = res->start + resource_size(res); + free_end = min(mapping_end, next->start - 1); + if (space_valid(is_pmem, is_reserve, label_id, res) + && free_start < free_end) { + available = free_end + 1 - free_start; + loc = ALLOC_MID; + } + } + + /* space at the end of the mapping */ + if (!loc && !next) { + free_start = res->start + resource_size(res); + free_end = mapping_end; + if (space_valid(is_pmem, is_reserve, label_id, res) + && free_start < free_end) { + available = free_end + 1 - free_start; + loc = ALLOC_AFTER; + } + } + + if (!loc || !available) + continue; + allocate = min(available, n); + switch (loc) { + case ALLOC_BEFORE: + if (strcmp(res->name, label_id->id) == 0) { + /* adjust current resource up */ + if (is_pmem && !is_reserve) + return n; + rc = adjust_resource(res, res->start - allocate, + resource_size(res) + allocate); + action = "cur grow up"; + } else + action = "allocate"; + break; + case ALLOC_MID: + if (strcmp(next->name, label_id->id) == 0) { + /* adjust next resource up */ + if (is_pmem && !is_reserve) + return n; + rc = adjust_resource(next, next->start + - allocate, resource_size(next) + + allocate); + new_res = next; + action = "next grow up"; + } else if (strcmp(res->name, label_id->id) == 0) { + action = "grow down"; + } else + action = "allocate"; + break; + case ALLOC_AFTER: + if (strcmp(res->name, label_id->id) == 0) + action = "grow down"; + else + action = "allocate"; + break; + default: + return n; + } + + if (strcmp(action, "allocate") == 0) { + /* BLK allocate bottom up */ + if (!is_pmem) + free_start += available - allocate; + else if (!is_reserve && free_start != nd_mapping->start) + return n; + + new_res = nvdimm_allocate_dpa(ndd, label_id, + free_start, allocate); + if (!new_res) + rc = -EBUSY; + } else if (strcmp(action, "grow down") == 0) { + /* adjust current resource down */ + rc = adjust_resource(res, res->start, resource_size(res) + + allocate); + if (rc == 0) + res->flags |= DPA_RESOURCE_ADJUSTED; + } + + if (!new_res) + new_res = res; + + nd_dbg_dpa(nd_region, ndd, new_res, "%s(%d) %d\n", + action, loc, rc); + + if (rc) + return n; + + n -= allocate; + if (n) { + /* + * Retry scan with newly inserted resources. + * For example, if we did an ALLOC_BEFORE + * insertion there may also have been space + * available for an ALLOC_AFTER insertion, so we + * need to check this same resource again + */ + goto retry; + } else + return 0; + } + + /* + * If we allocated nothing in the BLK case it may be because we are in + * an initial "pmem-reserve pass". Only do an initial BLK allocation + * when none of the DPA space is reserved. + */ + if ((is_pmem || !ndd->dpa.child) && n == to_allocate) + return init_dpa_allocation(label_id, nd_region, nd_mapping, n); + return n; +} + +static int merge_dpa(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_label_id *label_id) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + + if (strncmp("pmem", label_id->id, 4) == 0) + return 0; + retry: + for_each_dpa_resource(ndd, res) { + int rc; + struct resource *next = res->sibling; + resource_size_t end = res->start + resource_size(res); + + if (!next || strcmp(res->name, label_id->id) != 0 + || strcmp(next->name, label_id->id) != 0 + || end != next->start) + continue; + end += resource_size(next); + nvdimm_free_dpa(ndd, next); + rc = adjust_resource(res, res->start, end - res->start); + nd_dbg_dpa(nd_region, ndd, res, "merge %d\n", rc); + if (rc) + return rc; + res->flags |= DPA_RESOURCE_ADJUSTED; + goto retry; + } + + return 0; +} + +static int __reserve_free_pmem(struct device *dev, void *data) +{ + struct nvdimm *nvdimm = data; + struct nd_region *nd_region; + struct nd_label_id label_id; + int i; + + if (!is_nd_pmem(dev)) + return 0; + + nd_region = to_nd_region(dev); + if (nd_region->ndr_mappings == 0) + return 0; + + memset(&label_id, 0, sizeof(label_id)); + strcat(label_id.id, "pmem-reserve"); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + resource_size_t n, rem = 0; + + if (nd_mapping->nvdimm != nvdimm) + continue; + + n = nd_pmem_available_dpa(nd_region, nd_mapping, &rem); + if (n == 0) + return 0; + rem = scan_allocate(nd_region, nd_mapping, &label_id, n); + dev_WARN_ONCE(&nd_region->dev, rem, + "pmem reserve underrun: %#llx of %#llx bytes\n", + (unsigned long long) n - rem, + (unsigned long long) n); + return rem ? -ENXIO : 0; + } + + return 0; +} + +static void release_free_pmem(struct nvdimm_bus *nvdimm_bus, + struct nd_mapping *nd_mapping) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res, *_res; + + for_each_dpa_resource_safe(ndd, res, _res) + if (strcmp(res->name, "pmem-reserve") == 0) + nvdimm_free_dpa(ndd, res); +} + +static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus, + struct nd_mapping *nd_mapping) +{ + struct nvdimm *nvdimm = nd_mapping->nvdimm; + int rc; + + rc = device_for_each_child(&nvdimm_bus->dev, nvdimm, + __reserve_free_pmem); + if (rc) + release_free_pmem(nvdimm_bus, nd_mapping); + return rc; +} + +/** + * grow_dpa_allocation - for each dimm allocate n bytes for @label_id + * @nd_region: the set of dimms to allocate @n more bytes from + * @label_id: unique identifier for the namespace consuming this dpa range + * @n: number of bytes per-dimm to add to the existing allocation + * + * Assumes resources are ordered. For BLK regions, first consume + * BLK-only available DPA free space, then consume PMEM-aliased DPA + * space starting at the highest DPA. For PMEM regions start + * allocations from the start of an interleave set and end at the first + * BLK allocation or the end of the interleave set, whichever comes + * first. + */ +static int grow_dpa_allocation(struct nd_region *nd_region, + struct nd_label_id *label_id, resource_size_t n) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0; + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + resource_size_t rem = n; + int rc, j; + + /* + * In the BLK case try once with all unallocated PMEM + * reserved, and once without + */ + for (j = is_pmem; j < 2; j++) { + bool blk_only = j == 0; + + if (blk_only) { + rc = reserve_free_pmem(nvdimm_bus, nd_mapping); + if (rc) + return rc; + } + rem = scan_allocate(nd_region, nd_mapping, + label_id, rem); + if (blk_only) + release_free_pmem(nvdimm_bus, nd_mapping); + + /* try again and allow encroachments into PMEM */ + if (rem == 0) + break; + } + + dev_WARN_ONCE(&nd_region->dev, rem, + "allocation underrun: %#llx of %#llx bytes\n", + (unsigned long long) n - rem, + (unsigned long long) n); + if (rem) + return -ENXIO; + + rc = merge_dpa(nd_region, nd_mapping, label_id); + if (rc) + return rc; + } + + return 0; +} + +static void nd_namespace_pmem_set_size(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm, resource_size_t size) +{ + struct resource *res = &nspm->nsio.res; + + res->start = nd_region->ndr_start; + res->end = nd_region->ndr_start + size - 1; +} + +static ssize_t __size_store(struct device *dev, unsigned long long val) +{ + resource_size_t allocated = 0, available = 0; + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_mapping *nd_mapping; + struct nvdimm_drvdata *ndd; + struct nd_label_id label_id; + u32 flags = 0, remainder; + u8 *uuid = NULL; + int rc, i; + + if (dev->driver || to_ndns(dev)->claim) + return -EBUSY; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + uuid = nsblk->uuid; + flags = NSLABEL_FLAG_LOCAL; + } + + /* + * We need a uuid for the allocation-label and dimm(s) on which + * to store the label. + */ + if (!uuid || nd_region->ndr_mappings == 0) + return -ENXIO; + + div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder); + if (remainder) { + dev_dbg(dev, "%llu is not %dK aligned\n", val, + (SZ_4K * nd_region->ndr_mappings) / SZ_1K); + return -EINVAL; + } + + nd_label_gen_id(&label_id, uuid, flags); + for (i = 0; i < nd_region->ndr_mappings; i++) { + nd_mapping = &nd_region->mapping[i]; + ndd = to_ndd(nd_mapping); + + /* + * All dimms in an interleave set, or the base dimm for a blk + * region, need to be enabled for the size to be changed. + */ + if (!ndd) + return -ENXIO; + + allocated += nvdimm_allocated_dpa(ndd, &label_id); + } + available = nd_region_available_dpa(nd_region); + + if (val > available + allocated) + return -ENOSPC; + + if (val == allocated) + return 0; + + val = div_u64(val, nd_region->ndr_mappings); + allocated = div_u64(allocated, nd_region->ndr_mappings); + if (val < allocated) + rc = shrink_dpa_allocation(nd_region, &label_id, + allocated - val); + else + rc = grow_dpa_allocation(nd_region, &label_id, val - allocated); + + if (rc) + return rc; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + nd_namespace_pmem_set_size(nd_region, nspm, + val * nd_region->ndr_mappings); + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + /* + * Try to delete the namespace if we deleted all of its + * allocation, this is not the seed device for the + * region, and it is not actively claimed by a btt + * instance. + */ + if (val == 0 && nd_region->ns_seed != dev + && !nsblk->common.claim) + nd_device_unregister(dev, ND_ASYNC); + } + + return rc; +} + +static ssize_t size_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + unsigned long long val; + u8 **uuid = NULL; + int rc; + + rc = kstrtoull(buf, 0, &val); + if (rc) + return rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = __size_store(dev, val); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = &nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + uuid = &nsblk->uuid; + } + + if (rc == 0 && val == 0 && uuid) { + /* setting size zero == 'delete namespace' */ + kfree(*uuid); + *uuid = NULL; + } + + dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0 + ? "fail" : "success", rc); + + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc < 0 ? rc : len; +} + +resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns) +{ + struct device *dev = &ndns->dev; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + return resource_size(&nspm->nsio.res); + } else if (is_namespace_blk(dev)) { + return nd_namespace_blk_size(to_nd_namespace_blk(dev)); + } else if (is_namespace_io(dev)) { + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + return resource_size(&nsio->res); + } else + WARN_ONCE(1, "unknown namespace type\n"); + return 0; +} + +resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns) +{ + resource_size_t size; + + nvdimm_bus_lock(&ndns->dev); + size = __nvdimm_namespace_capacity(ndns); + nvdimm_bus_unlock(&ndns->dev); + + return size; +} +EXPORT_SYMBOL(nvdimm_namespace_capacity); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%llu\n", (unsigned long long) + nvdimm_namespace_capacity(to_ndns(dev))); +} +static DEVICE_ATTR(size, S_IRUGO, size_show, size_store); + +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 *uuid; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + uuid = nsblk->uuid; + } else + return -ENXIO; + + if (uuid) + return sprintf(buf, "%pUb\n", uuid); + return sprintf(buf, "\n"); +} + +/** + * namespace_update_uuid - check for a unique uuid and whether we're "renaming" + * @nd_region: parent region so we can updates all dimms in the set + * @dev: namespace type for generating label_id + * @new_uuid: incoming uuid + * @old_uuid: reference to the uuid storage location in the namespace object + */ +static int namespace_update_uuid(struct nd_region *nd_region, + struct device *dev, u8 *new_uuid, u8 **old_uuid) +{ + u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0; + struct nd_label_id old_label_id; + struct nd_label_id new_label_id; + int i; + + if (!nd_is_uuid_unique(dev, new_uuid)) + return -EINVAL; + + if (*old_uuid == NULL) + goto out; + + /* + * If we've already written a label with this uuid, then it's + * too late to rename because we can't reliably update the uuid + * without losing the old namespace. Userspace must delete this + * namespace to abandon the old uuid. + */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + /* + * This check by itself is sufficient because old_uuid + * would be NULL above if this uuid did not exist in the + * currently written set. + * + * FIXME: can we delete uuid with zero dpa allocated? + */ + if (nd_mapping->labels) + return -EBUSY; + } + + nd_label_gen_id(&old_label_id, *old_uuid, flags); + nd_label_gen_id(&new_label_id, new_uuid, flags); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, old_label_id.id) == 0) + sprintf((void *) res->name, "%s", + new_label_id.id); + } + kfree(*old_uuid); + out: + *old_uuid = new_uuid; + return 0; +} + +static ssize_t uuid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + u8 *uuid = NULL; + ssize_t rc = 0; + u8 **ns_uuid; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_uuid = &nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + ns_uuid = &nsblk->uuid; + } else + return -ENXIO; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + if (to_ndns(dev)->claim) + rc = -EBUSY; + if (rc >= 0) + rc = nd_uuid_store(dev, &uuid, buf, len); + if (rc >= 0) + rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + else + kfree(uuid); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc < 0 ? rc : len; +} +static DEVICE_ATTR_RW(uuid); + +static ssize_t resource_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct resource *res; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + res = &nspm->nsio.res; + } else if (is_namespace_io(dev)) { + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + res = &nsio->res; + } else + return -ENXIO; + + /* no address to convey if the namespace has no allocation */ + if (resource_size(res) == 0) + return -ENXIO; + return sprintf(buf, "%#llx\n", (unsigned long long) res->start); +} +static DEVICE_ATTR_RO(resource); + +static const unsigned long ns_lbasize_supported[] = { 512, 520, 528, + 4096, 4104, 4160, 4224, 0 }; + +static ssize_t sector_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + if (!is_namespace_blk(dev)) + return -ENXIO; + + return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf); +} + +static ssize_t sector_size_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + struct nd_region *nd_region = to_nd_region(dev->parent); + ssize_t rc = 0; + + if (!is_namespace_blk(dev)) + return -ENXIO; + + device_lock(dev); + nvdimm_bus_lock(dev); + if (to_ndns(dev)->claim) + rc = -EBUSY; + if (rc >= 0) + rc = nd_sector_size_store(dev, buf, &nsblk->lbasize, + ns_lbasize_supported); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, + rc, rc < 0 ? "tried" : "wrote", buf, + buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(sector_size); + +static ssize_t dpa_extents_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_label_id label_id; + int count = 0, i; + u8 *uuid = NULL; + u32 flags = 0; + + nvdimm_bus_lock(dev); + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = nspm->uuid; + flags = 0; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + uuid = nsblk->uuid; + flags = NSLABEL_FLAG_LOCAL; + } + + if (!uuid) + goto out; + + nd_label_gen_id(&label_id, uuid, flags); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id.id) == 0) + count++; + } + out: + nvdimm_bus_unlock(dev); + + return sprintf(buf, "%d\n", count); +} +static DEVICE_ATTR_RO(dpa_extents); + +static ssize_t holder_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_namespace_common *ndns = to_ndns(dev); + ssize_t rc; + + device_lock(dev); + rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : ""); + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(holder); + +static ssize_t force_raw_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + bool force_raw; + int rc = strtobool(buf, &force_raw); + + if (rc) + return rc; + + to_ndns(dev)->force_raw = force_raw; + return len; +} + +static ssize_t force_raw_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", to_ndns(dev)->force_raw); +} +static DEVICE_ATTR_RW(force_raw); + +static struct attribute *nd_namespace_attributes[] = { + &dev_attr_nstype.attr, + &dev_attr_size.attr, + &dev_attr_uuid.attr, + &dev_attr_holder.attr, + &dev_attr_resource.attr, + &dev_attr_alt_name.attr, + &dev_attr_force_raw.attr, + &dev_attr_sector_size.attr, + &dev_attr_dpa_extents.attr, + NULL, +}; + +static umode_t namespace_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + if (a == &dev_attr_resource.attr) { + if (is_namespace_blk(dev)) + return 0; + return a->mode; + } + + if (is_namespace_pmem(dev) || is_namespace_blk(dev)) { + if (a == &dev_attr_size.attr) + return S_IWUSR | S_IRUGO; + + if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr) + return 0; + + return a->mode; + } + + if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr + || a == &dev_attr_holder.attr + || a == &dev_attr_force_raw.attr) + return a->mode; + + return 0; +} + +static struct attribute_group nd_namespace_attribute_group = { + .attrs = nd_namespace_attributes, + .is_visible = namespace_visible, +}; + +static const struct attribute_group *nd_namespace_attribute_groups[] = { + &nd_device_attribute_group, + &nd_namespace_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) +{ + struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; + struct nd_namespace_common *ndns; + resource_size_t size; + + if (nd_btt) { + ndns = nd_btt->ndns; + if (!ndns) + return ERR_PTR(-ENODEV); + + /* + * Flush any in-progess probes / removals in the driver + * for the raw personality of this namespace. + */ + device_lock(&ndns->dev); + device_unlock(&ndns->dev); + if (ndns->dev.driver) { + dev_dbg(&ndns->dev, "is active, can't bind %s\n", + dev_name(&nd_btt->dev)); + return ERR_PTR(-EBUSY); + } + if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev, + "host (%s) vs claim (%s) mismatch\n", + dev_name(&nd_btt->dev), + dev_name(ndns->claim))) + return ERR_PTR(-ENXIO); + } else { + ndns = to_ndns(dev); + if (ndns->claim) { + dev_dbg(dev, "claimed by %s, failing probe\n", + dev_name(ndns->claim)); + + return ERR_PTR(-ENXIO); + } + } + + size = nvdimm_namespace_capacity(ndns); + if (size < ND_MIN_NAMESPACE_SIZE) { + dev_dbg(&ndns->dev, "%pa, too small must be at least %#x\n", + &size, ND_MIN_NAMESPACE_SIZE); + return ERR_PTR(-ENODEV); + } + + if (is_namespace_pmem(&ndns->dev)) { + struct nd_namespace_pmem *nspm; + + nspm = to_nd_namespace_pmem(&ndns->dev); + if (!nspm->uuid) { + dev_dbg(&ndns->dev, "%s: uuid not set\n", __func__); + return ERR_PTR(-ENODEV); + } + } else if (is_namespace_blk(&ndns->dev)) { + struct nd_namespace_blk *nsblk; + + nsblk = to_nd_namespace_blk(&ndns->dev); + if (!nd_namespace_blk_validate(nsblk)) + return ERR_PTR(-ENODEV); + } + + return ndns; +} +EXPORT_SYMBOL(nvdimm_namespace_common_probe); + +static struct device **create_namespace_io(struct nd_region *nd_region) +{ + struct nd_namespace_io *nsio; + struct device *dev, **devs; + struct resource *res; + + nsio = kzalloc(sizeof(*nsio), GFP_KERNEL); + if (!nsio) + return NULL; + + devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL); + if (!devs) { + kfree(nsio); + return NULL; + } + + dev = &nsio->common.dev; + dev->type = &namespace_io_device_type; + dev->parent = &nd_region->dev; + res = &nsio->res; + res->name = dev_name(&nd_region->dev); + res->flags = IORESOURCE_MEM; + res->start = nd_region->ndr_start; + res->end = res->start + nd_region->ndr_size - 1; + + devs[0] = dev; + return devs; +} + +static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, + u64 cookie, u16 pos) +{ + struct nd_namespace_label *found = NULL; + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *nd_label; + bool found_uuid = false; + int l; + + for_each_label(l, nd_label, nd_mapping->labels) { + u64 isetcookie = __le64_to_cpu(nd_label->isetcookie); + u16 position = __le16_to_cpu(nd_label->position); + u16 nlabel = __le16_to_cpu(nd_label->nlabel); + + if (isetcookie != cookie) + continue; + + if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0) + continue; + + if (found_uuid) { + dev_dbg(to_ndd(nd_mapping)->dev, + "%s duplicate entry for uuid\n", + __func__); + return false; + } + found_uuid = true; + if (nlabel != nd_region->ndr_mappings) + continue; + if (position != pos) + continue; + found = nd_label; + break; + } + if (found) + break; + } + return found != NULL; +} + +static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) +{ + struct nd_namespace_label *select = NULL; + int i; + + if (!pmem_id) + return -ENODEV; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *nd_label; + u64 hw_start, hw_end, pmem_start, pmem_end; + int l; + + for_each_label(l, nd_label, nd_mapping->labels) + if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0) + break; + + if (!nd_label) { + WARN_ON(1); + return -EINVAL; + } + + select = nd_label; + /* + * Check that this label is compliant with the dpa + * range published in NFIT + */ + hw_start = nd_mapping->start; + hw_end = hw_start + nd_mapping->size; + pmem_start = __le64_to_cpu(select->dpa); + pmem_end = pmem_start + __le64_to_cpu(select->rawsize); + if (pmem_start == hw_start && pmem_end <= hw_end) + /* pass */; + else + return -EINVAL; + + nd_mapping->labels[0] = select; + nd_mapping->labels[1] = NULL; + } + return 0; +} + +/** + * find_pmem_label_set - validate interleave set labelling, retrieve label0 + * @nd_region: region with mappings to validate + */ +static int find_pmem_label_set(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm) +{ + u64 cookie = nd_region_interleave_set_cookie(nd_region); + struct nd_namespace_label *nd_label; + u8 select_id[NSLABEL_UUID_LEN]; + resource_size_t size = 0; + u8 *pmem_id = NULL; + int rc = -ENODEV, l; + u16 i; + + if (cookie == 0) + return -ENXIO; + + /* + * Find a complete set of labels by uuid. By definition we can start + * with any mapping as the reference label + */ + for_each_label(l, nd_label, nd_region->mapping[0].labels) { + u64 isetcookie = __le64_to_cpu(nd_label->isetcookie); + + if (isetcookie != cookie) + continue; + + for (i = 0; nd_region->ndr_mappings; i++) + if (!has_uuid_at_pos(nd_region, nd_label->uuid, + cookie, i)) + break; + if (i < nd_region->ndr_mappings) { + /* + * Give up if we don't find an instance of a + * uuid at each position (from 0 to + * nd_region->ndr_mappings - 1), or if we find a + * dimm with two instances of the same uuid. + */ + rc = -EINVAL; + goto err; + } else if (pmem_id) { + /* + * If there is more than one valid uuid set, we + * need userspace to clean this up. + */ + rc = -EBUSY; + goto err; + } + memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN); + pmem_id = select_id; + } + + /* + * Fix up each mapping's 'labels' to have the validated pmem label for + * that position at labels[0], and NULL at labels[1]. In the process, + * check that the namespace aligns with interleave-set. We know + * that it does not overlap with any blk namespaces by virtue of + * the dimm being enabled (i.e. nd_label_reserve_dpa() + * succeeded). + */ + rc = select_pmem_id(nd_region, pmem_id); + if (rc) + goto err; + + /* Calculate total size and populate namespace properties from label0 */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *label0 = nd_mapping->labels[0]; + + size += __le64_to_cpu(label0->rawsize); + if (__le16_to_cpu(label0->position) != 0) + continue; + WARN_ON(nspm->alt_name || nspm->uuid); + nspm->alt_name = kmemdup((void __force *) label0->name, + NSLABEL_NAME_LEN, GFP_KERNEL); + nspm->uuid = kmemdup((void __force *) label0->uuid, + NSLABEL_UUID_LEN, GFP_KERNEL); + } + + if (!nspm->alt_name || !nspm->uuid) { + rc = -ENOMEM; + goto err; + } + + nd_namespace_pmem_set_size(nd_region, nspm, size); + + return 0; + err: + switch (rc) { + case -EINVAL: + dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__); + break; + case -ENODEV: + dev_dbg(&nd_region->dev, "%s: label not found\n", __func__); + break; + default: + dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n", + __func__, rc); + break; + } + return rc; +} + +static struct device **create_namespace_pmem(struct nd_region *nd_region) +{ + struct nd_namespace_pmem *nspm; + struct device *dev, **devs; + struct resource *res; + int rc; + + nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); + if (!nspm) + return NULL; + + dev = &nspm->nsio.common.dev; + dev->type = &namespace_pmem_device_type; + dev->parent = &nd_region->dev; + res = &nspm->nsio.res; + res->name = dev_name(&nd_region->dev); + res->flags = IORESOURCE_MEM; + rc = find_pmem_label_set(nd_region, nspm); + if (rc == -ENODEV) { + int i; + + /* Pass, try to permit namespace creation... */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + } + + /* Publish a zero-sized namespace for userspace to configure. */ + nd_namespace_pmem_set_size(nd_region, nspm, 0); + + rc = 0; + } else if (rc) + goto err; + + devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL); + if (!devs) + goto err; + + devs[0] = dev; + return devs; + + err: + namespace_pmem_release(&nspm->nsio.common.dev); + return NULL; +} + +struct resource *nsblk_add_resource(struct nd_region *nd_region, + struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk, + resource_size_t start) +{ + struct nd_label_id label_id; + struct resource *res; + + nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL); + res = krealloc(nsblk->res, + sizeof(void *) * (nsblk->num_resources + 1), + GFP_KERNEL); + if (!res) + return NULL; + nsblk->res = (struct resource **) res; + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id.id) == 0 + && res->start == start) { + nsblk->res[nsblk->num_resources++] = res; + return res; + } + return NULL; +} + +static struct device *nd_namespace_blk_create(struct nd_region *nd_region) +{ + struct nd_namespace_blk *nsblk; + struct device *dev; + + if (!is_nd_blk(&nd_region->dev)) + return NULL; + + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); + if (!nsblk) + return NULL; + + dev = &nsblk->common.dev; + dev->type = &namespace_blk_device_type; + nsblk->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL); + if (nsblk->id < 0) { + kfree(nsblk); + return NULL; + } + dev_set_name(dev, "namespace%d.%d", nd_region->id, nsblk->id); + dev->parent = &nd_region->dev; + dev->groups = nd_namespace_attribute_groups; + + return &nsblk->common.dev; +} + +void nd_region_create_blk_seed(struct nd_region *nd_region) +{ + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + nd_region->ns_seed = nd_namespace_blk_create(nd_region); + /* + * Seed creation failures are not fatal, provisioning is simply + * disabled until memory becomes available + */ + if (!nd_region->ns_seed) + dev_err(&nd_region->dev, "failed to create blk namespace\n"); + else + nd_device_register(nd_region->ns_seed); +} + +void nd_region_create_btt_seed(struct nd_region *nd_region) +{ + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + nd_region->btt_seed = nd_btt_create(nd_region); + /* + * Seed creation failures are not fatal, provisioning is simply + * disabled until memory becomes available + */ + if (!nd_region->btt_seed) + dev_err(&nd_region->dev, "failed to create btt namespace\n"); +} + +static struct device **create_namespace_blk(struct nd_region *nd_region) +{ + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nd_namespace_label *nd_label; + struct device *dev, **devs = NULL; + struct nd_namespace_blk *nsblk; + struct nvdimm_drvdata *ndd; + int i, l, count = 0; + struct resource *res; + + if (nd_region->ndr_mappings == 0) + return NULL; + + ndd = to_ndd(nd_mapping); + for_each_label(l, nd_label, nd_mapping->labels) { + u32 flags = __le32_to_cpu(nd_label->flags); + char *name[NSLABEL_NAME_LEN]; + struct device **__devs; + + if (flags & NSLABEL_FLAG_LOCAL) + /* pass */; + else + continue; + + for (i = 0; i < count; i++) { + nsblk = to_nd_namespace_blk(devs[i]); + if (memcmp(nsblk->uuid, nd_label->uuid, + NSLABEL_UUID_LEN) == 0) { + res = nsblk_add_resource(nd_region, ndd, nsblk, + __le64_to_cpu(nd_label->dpa)); + if (!res) + goto err; + nd_dbg_dpa(nd_region, ndd, res, "%s assign\n", + dev_name(&nsblk->common.dev)); + break; + } + } + if (i < count) + continue; + __devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL); + if (!__devs) + goto err; + memcpy(__devs, devs, sizeof(dev) * count); + kfree(devs); + devs = __devs; + + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); + if (!nsblk) + goto err; + dev = &nsblk->common.dev; + dev->type = &namespace_blk_device_type; + dev->parent = &nd_region->dev; + dev_set_name(dev, "namespace%d.%d", nd_region->id, count); + devs[count++] = dev; + nsblk->id = -1; + nsblk->lbasize = __le64_to_cpu(nd_label->lbasize); + nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, + GFP_KERNEL); + if (!nsblk->uuid) + goto err; + memcpy(name, nd_label->name, NSLABEL_NAME_LEN); + if (name[0]) + nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, + GFP_KERNEL); + res = nsblk_add_resource(nd_region, ndd, nsblk, + __le64_to_cpu(nd_label->dpa)); + if (!res) + goto err; + nd_dbg_dpa(nd_region, ndd, res, "%s assign\n", + dev_name(&nsblk->common.dev)); + } + + dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n", + __func__, count, count == 1 ? "" : "s"); + + if (count == 0) { + /* Publish a zero-sized namespace for userspace to configure. */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + } + + devs = kcalloc(2, sizeof(dev), GFP_KERNEL); + if (!devs) + goto err; + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); + if (!nsblk) + goto err; + dev = &nsblk->common.dev; + dev->type = &namespace_blk_device_type; + dev->parent = &nd_region->dev; + devs[count++] = dev; + } + + return devs; + +err: + for (i = 0; i < count; i++) { + nsblk = to_nd_namespace_blk(devs[i]); + namespace_blk_release(&nsblk->common.dev); + } + kfree(devs); + return NULL; +} + +static int init_active_labels(struct nd_region *nd_region) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nvdimm *nvdimm = nd_mapping->nvdimm; + int count, j; + + /* + * If the dimm is disabled then prevent the region from + * being activated if it aliases DPA. + */ + if (!ndd) { + if ((nvdimm->flags & NDD_ALIASING) == 0) + return 0; + dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n", + dev_name(&nd_mapping->nvdimm->dev)); + return -ENXIO; + } + nd_mapping->ndd = ndd; + atomic_inc(&nvdimm->busy); + get_ndd(ndd); + + count = nd_label_active_count(ndd); + dev_dbg(ndd->dev, "%s: %d\n", __func__, count); + if (!count) + continue; + nd_mapping->labels = kcalloc(count + 1, sizeof(void *), + GFP_KERNEL); + if (!nd_mapping->labels) + return -ENOMEM; + for (j = 0; j < count; j++) { + struct nd_namespace_label *label; + + label = nd_label_active(ndd, j); + nd_mapping->labels[j] = label; + } + } + + return 0; +} + +int nd_region_register_namespaces(struct nd_region *nd_region, int *err) +{ + struct device **devs = NULL; + int i, rc = 0, type; + + *err = 0; + nvdimm_bus_lock(&nd_region->dev); + rc = init_active_labels(nd_region); + if (rc) { + nvdimm_bus_unlock(&nd_region->dev); + return rc; + } + + type = nd_region_to_nstype(nd_region); + switch (type) { + case ND_DEVICE_NAMESPACE_IO: + devs = create_namespace_io(nd_region); + break; + case ND_DEVICE_NAMESPACE_PMEM: + devs = create_namespace_pmem(nd_region); + break; + case ND_DEVICE_NAMESPACE_BLK: + devs = create_namespace_blk(nd_region); + break; + default: + break; + } + nvdimm_bus_unlock(&nd_region->dev); + + if (!devs) + return -ENODEV; + + for (i = 0; devs[i]; i++) { + struct device *dev = devs[i]; + int id; + + if (type == ND_DEVICE_NAMESPACE_BLK) { + struct nd_namespace_blk *nsblk; + + nsblk = to_nd_namespace_blk(dev); + id = ida_simple_get(&nd_region->ns_ida, 0, 0, + GFP_KERNEL); + nsblk->id = id; + } else + id = i; + + if (id < 0) + break; + dev_set_name(dev, "namespace%d.%d", nd_region->id, id); + dev->groups = nd_namespace_attribute_groups; + nd_device_register(dev); + } + if (i) + nd_region->ns_seed = devs[0]; + + if (devs[i]) { + int j; + + for (j = i; devs[j]; j++) { + struct device *dev = devs[j]; + + device_initialize(dev); + put_device(dev); + } + *err = j - i; + /* + * All of the namespaces we tried to register failed, so + * fail region activation. + */ + if (*err == 0) + rc = -ENODEV; + } + kfree(devs); + + if (rc == -ENODEV) + return rc; + + return i; +} diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h new file mode 100644 index 000000000000..e1970c71ad1c --- /dev/null +++ b/drivers/nvdimm/nd-core.h @@ -0,0 +1,83 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __ND_CORE_H__ +#define __ND_CORE_H__ +#include <linux/libnvdimm.h> +#include <linux/device.h> +#include <linux/libnvdimm.h> +#include <linux/sizes.h> +#include <linux/mutex.h> +#include <linux/nd.h> + +extern struct list_head nvdimm_bus_list; +extern struct mutex nvdimm_bus_list_mutex; +extern int nvdimm_major; + +struct nvdimm_bus { + struct nvdimm_bus_descriptor *nd_desc; + wait_queue_head_t probe_wait; + struct module *module; + struct list_head list; + struct device dev; + int id, probe_active; + struct mutex reconfig_mutex; +}; + +struct nvdimm { + unsigned long flags; + void *provider_data; + unsigned long *dsm_mask; + struct device dev; + atomic_t busy; + int id; +}; + +bool is_nvdimm(struct device *dev); +bool is_nd_pmem(struct device *dev); +bool is_nd_blk(struct device *dev); +struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); +int __init nvdimm_bus_init(void); +void nvdimm_bus_exit(void); +void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev); +struct nd_region; +void nd_region_create_blk_seed(struct nd_region *nd_region); +void nd_region_create_btt_seed(struct nd_region *nd_region); +void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev); +int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); +void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); +void nd_synchronize(void); +int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus); +int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus); +int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus); +void __nd_device_register(struct device *dev); +int nd_match_dimm(struct device *dev, void *data); +struct nd_label_id; +char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags); +bool nd_is_uuid_unique(struct device *dev, u8 *uuid); +struct nd_region; +struct nvdimm_drvdata; +struct nd_mapping; +resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, resource_size_t *overlap); +resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping); +resource_size_t nd_region_available_dpa(struct nd_region *nd_region); +resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id); +struct nd_mapping; +struct resource *nsblk_add_resource(struct nd_region *nd_region, + struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk, + resource_size_t start); +int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd); +void get_ndd(struct nvdimm_drvdata *ndd); +resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns); +#endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h new file mode 100644 index 000000000000..c41f53e74277 --- /dev/null +++ b/drivers/nvdimm/nd.h @@ -0,0 +1,220 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __ND_H__ +#define __ND_H__ +#include <linux/libnvdimm.h> +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/ndctl.h> +#include <linux/types.h> +#include "label.h" + +enum { + /* + * Limits the maximum number of block apertures a dimm can + * support and is an input to the geometry/on-disk-format of a + * BTT instance + */ + ND_MAX_LANES = 256, + SECTOR_SHIFT = 9, + INT_LBASIZE_ALIGNMENT = 64, +}; + +struct nvdimm_drvdata { + struct device *dev; + int nsindex_size; + struct nd_cmd_get_config_size nsarea; + void *data; + int ns_current, ns_next; + struct resource dpa; + struct kref kref; +}; + +struct nd_region_namespaces { + int count; + int active; +}; + +static inline struct nd_namespace_index *to_namespace_index( + struct nvdimm_drvdata *ndd, int i) +{ + if (i < 0) + return NULL; + + return ndd->data + sizeof_namespace_index(ndd) * i; +} + +static inline struct nd_namespace_index *to_current_namespace_index( + struct nvdimm_drvdata *ndd) +{ + return to_namespace_index(ndd, ndd->ns_current); +} + +static inline struct nd_namespace_index *to_next_namespace_index( + struct nvdimm_drvdata *ndd) +{ + return to_namespace_index(ndd, ndd->ns_next); +} + +#define nd_dbg_dpa(r, d, res, fmt, arg...) \ + dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \ + (r) ? dev_name((d)->dev) : "", res ? res->name : "null", \ + (unsigned long long) (res ? resource_size(res) : 0), \ + (unsigned long long) (res ? res->start : 0), ##arg) + +#define for_each_label(l, label, labels) \ + for (l = 0; (label = labels ? labels[l] : NULL); l++) + +#define for_each_dpa_resource(ndd, res) \ + for (res = (ndd)->dpa.child; res; res = res->sibling) + +#define for_each_dpa_resource_safe(ndd, res, next) \ + for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \ + res; res = next, next = next ? next->sibling : NULL) + +struct nd_percpu_lane { + int count; + spinlock_t lock; +}; + +struct nd_region { + struct device dev; + struct ida ns_ida; + struct ida btt_ida; + struct device *ns_seed; + struct device *btt_seed; + u16 ndr_mappings; + u64 ndr_size; + u64 ndr_start; + int id, num_lanes, ro, numa_node; + void *provider_data; + struct nd_interleave_set *nd_set; + struct nd_percpu_lane __percpu *lane; + struct nd_mapping mapping[0]; +}; + +struct nd_blk_region { + int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw); + void *blk_provider_data; + struct nd_region nd_region; +}; + +/* + * Lookup next in the repeating sequence of 01, 10, and 11. + */ +static inline unsigned nd_inc_seq(unsigned seq) +{ + static const unsigned next[] = { 0, 2, 3, 1 }; + + return next[seq & 3]; +} + +struct btt; +struct nd_btt { + struct device dev; + struct nd_namespace_common *ndns; + struct btt *btt; + unsigned long lbasize; + u8 *uuid; + int id; +}; + +enum nd_async_mode { + ND_SYNC, + ND_ASYNC, +}; + +int nd_integrity_init(struct gendisk *disk, unsigned long meta_size); +void wait_nvdimm_bus_probe_idle(struct device *dev); +void nd_device_register(struct device *dev); +void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, + size_t len); +ssize_t nd_sector_size_show(unsigned long current_lbasize, + const unsigned long *supported, char *buf); +ssize_t nd_sector_size_store(struct device *dev, const char *buf, + unsigned long *current_lbasize, const unsigned long *supported); +int __init nvdimm_init(void); +int __init nd_region_init(void); +void nvdimm_exit(void); +void nd_region_exit(void); +struct nvdimm; +struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping); +int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); +int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); +int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, + void *buf, size_t len); +struct nd_btt *to_nd_btt(struct device *dev); +struct btt_sb; +u64 nd_btt_sb_checksum(struct btt_sb *btt_sb); +#if IS_ENABLED(CONFIG_BTT) +int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata); +bool is_nd_btt(struct device *dev); +struct device *nd_btt_create(struct nd_region *nd_region); +#else +static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + return -ENODEV; +} + +static inline bool is_nd_btt(struct device *dev) +{ + return false; +} + +static inline struct device *nd_btt_create(struct nd_region *nd_region) +{ + return NULL; +} + +#endif +struct nd_region *to_nd_region(struct device *dev); +int nd_region_to_nstype(struct nd_region *nd_region); +int nd_region_register_namespaces(struct nd_region *nd_region, int *err); +u64 nd_region_interleave_set_cookie(struct nd_region *nd_region); +void nvdimm_bus_lock(struct device *dev); +void nvdimm_bus_unlock(struct device *dev); +bool is_nvdimm_bus_locked(struct device *dev); +int nvdimm_revalidate_disk(struct gendisk *disk); +void nvdimm_drvdata_release(struct kref *kref); +void put_ndd(struct nvdimm_drvdata *ndd); +int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd); +void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res); +struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id, resource_size_t start, + resource_size_t n); +resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns); +struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev); +int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns); +int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns); +const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, + char *name); +int nd_blk_region_init(struct nd_region *nd_region); +void __nd_iostat_start(struct bio *bio, unsigned long *start); +static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + + if (!blk_queue_io_stat(disk->queue)) + return false; + + __nd_iostat_start(bio, start); + return true; +} +void nd_iostat_end(struct bio *bio, unsigned long start); +resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); +#endif /* __ND_H__ */ diff --git a/drivers/block/pmem.c b/drivers/nvdimm/pmem.c index 095dfaadcaa5..ade9eb917a4d 100644 --- a/drivers/block/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -1,7 +1,7 @@ /* * Persistent Memory Driver * - * Copyright (c) 2014, Intel Corporation. + * Copyright (c) 2014-2015, Intel Corporation. * Copyright (c) 2015, Christoph Hellwig <[email protected]>. * Copyright (c) 2015, Boaz Harrosh <[email protected]>. * @@ -23,8 +23,9 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> - -#define PMEM_MINORS 16 +#include <linux/pmem.h> +#include <linux/nd.h> +#include "nd.h" struct pmem_device { struct request_queue *pmem_queue; @@ -32,12 +33,11 @@ struct pmem_device { /* One contiguous memory region per device */ phys_addr_t phys_addr; - void *virt_addr; + void __pmem *virt_addr; size_t size; }; static int pmem_major; -static atomic_t pmem_index; static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, @@ -45,13 +45,14 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, { void *mem = kmap_atomic(page); size_t pmem_off = sector << 9; + void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { - memcpy(mem + off, pmem->virt_addr + pmem_off, len); + memcpy_from_pmem(mem + off, pmem_addr, len); flush_dcache_page(page); } else { flush_dcache_page(page); - memcpy(pmem->virt_addr + pmem_off, mem + off, len); + memcpy_to_pmem(pmem_addr, mem + off, len); } kunmap_atomic(mem); @@ -59,31 +60,24 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, static void pmem_make_request(struct request_queue *q, struct bio *bio) { - struct block_device *bdev = bio->bi_bdev; - struct pmem_device *pmem = bdev->bd_disk->private_data; - int rw; + bool do_acct; + unsigned long start; struct bio_vec bvec; - sector_t sector; struct bvec_iter iter; - int err = 0; - - if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) { - err = -EIO; - goto out; - } - - BUG_ON(bio->bi_rw & REQ_DISCARD); + struct block_device *bdev = bio->bi_bdev; + struct pmem_device *pmem = bdev->bd_disk->private_data; - rw = bio_data_dir(bio); - sector = bio->bi_iter.bi_sector; - bio_for_each_segment(bvec, bio, iter) { + do_acct = nd_iostat_start(bio, &start); + bio_for_each_segment(bvec, bio, iter) pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset, - rw, sector); - sector += bvec.bv_len >> 9; - } + bio_data_dir(bio), iter.bi_sector); + if (do_acct) + nd_iostat_end(bio, start); -out: - bio_endio(bio, err); + if (bio_data_dir(bio)) + wmb_pmem(); + + bio_endio(bio, 0); } static int pmem_rw_page(struct block_device *bdev, sector_t sector, @@ -106,7 +100,8 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, if (!pmem) return -ENODEV; - *kaddr = pmem->virt_addr + offset; + /* FIXME convert DAX to comprehend that this mapping has a lifetime */ + *kaddr = (void __force *) pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; return pmem->size - offset; @@ -116,124 +111,165 @@ static const struct block_device_operations pmem_fops = { .owner = THIS_MODULE, .rw_page = pmem_rw_page, .direct_access = pmem_direct_access, + .revalidate_disk = nvdimm_revalidate_disk, }; -static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res) +static struct pmem_device *pmem_alloc(struct device *dev, + struct resource *res, int id) { struct pmem_device *pmem; - struct gendisk *disk; - int idx, err; - err = -ENOMEM; pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); if (!pmem) - goto out; + return ERR_PTR(-ENOMEM); pmem->phys_addr = res->start; pmem->size = resource_size(res); + if (!arch_has_pmem_api()) + dev_warn(dev, "unable to guarantee persistence of writes\n"); + + if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) { + dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", + &pmem->phys_addr, pmem->size); + kfree(pmem); + return ERR_PTR(-EBUSY); + } - err = -EINVAL; - if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) { - dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); - goto out_free_dev; + pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size); + if (!pmem->virt_addr) { + release_mem_region(pmem->phys_addr, pmem->size); + kfree(pmem); + return ERR_PTR(-ENXIO); } - /* - * Map the memory as write-through, as we can't write back the contents - * of the CPU caches in case of a crash. - */ - err = -ENOMEM; - pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size); - if (!pmem->virt_addr) - goto out_release_region; + return pmem; +} + +static void pmem_detach_disk(struct pmem_device *pmem) +{ + del_gendisk(pmem->pmem_disk); + put_disk(pmem->pmem_disk); + blk_cleanup_queue(pmem->pmem_queue); +} + +static int pmem_attach_disk(struct nd_namespace_common *ndns, + struct pmem_device *pmem) +{ + struct gendisk *disk; pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL); if (!pmem->pmem_queue) - goto out_unmap; + return -ENOMEM; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); - blk_queue_max_hw_sectors(pmem->pmem_queue, 1024); + blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue); - disk = alloc_disk(PMEM_MINORS); - if (!disk) - goto out_free_queue; - - idx = atomic_inc_return(&pmem_index) - 1; + disk = alloc_disk(0); + if (!disk) { + blk_cleanup_queue(pmem->pmem_queue); + return -ENOMEM; + } disk->major = pmem_major; - disk->first_minor = PMEM_MINORS * idx; + disk->first_minor = 0; disk->fops = &pmem_fops; disk->private_data = pmem; disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; - sprintf(disk->disk_name, "pmem%d", idx); - disk->driverfs_dev = dev; + nvdimm_namespace_disk_name(ndns, disk->disk_name); + disk->driverfs_dev = &ndns->dev; set_capacity(disk, pmem->size >> 9); pmem->pmem_disk = disk; add_disk(disk); + revalidate_disk(disk); - return pmem; + return 0; +} -out_free_queue: - blk_cleanup_queue(pmem->pmem_queue); -out_unmap: - iounmap(pmem->virt_addr); -out_release_region: - release_mem_region(pmem->phys_addr, pmem->size); -out_free_dev: - kfree(pmem); -out: - return ERR_PTR(err); +static int pmem_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size, int rw) +{ + struct pmem_device *pmem = dev_get_drvdata(ndns->claim); + + if (unlikely(offset + size > pmem->size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (rw == READ) + memcpy_from_pmem(buf, pmem->virt_addr + offset, size); + else { + memcpy_to_pmem(pmem->virt_addr + offset, buf, size); + wmb_pmem(); + } + + return 0; } static void pmem_free(struct pmem_device *pmem) { - del_gendisk(pmem->pmem_disk); - put_disk(pmem->pmem_disk); - blk_cleanup_queue(pmem->pmem_queue); - iounmap(pmem->virt_addr); + memunmap_pmem(pmem->virt_addr); release_mem_region(pmem->phys_addr, pmem->size); kfree(pmem); } -static int pmem_probe(struct platform_device *pdev) +static int nd_pmem_probe(struct device *dev) { + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_common *ndns; + struct nd_namespace_io *nsio; struct pmem_device *pmem; - struct resource *res; - - if (WARN_ON(pdev->num_resources > 1)) - return -ENXIO; + int rc; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENXIO; + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); - pmem = pmem_alloc(&pdev->dev, res); + nsio = to_nd_namespace_io(&ndns->dev); + pmem = pmem_alloc(dev, &nsio->res, nd_region->id); if (IS_ERR(pmem)) return PTR_ERR(pmem); - platform_set_drvdata(pdev, pmem); - - return 0; + dev_set_drvdata(dev, pmem); + ndns->rw_bytes = pmem_rw_bytes; + if (is_nd_btt(dev)) + rc = nvdimm_namespace_attach_btt(ndns); + else if (nd_btt_probe(ndns, pmem) == 0) { + /* we'll come back as btt-pmem */ + rc = -ENXIO; + } else + rc = pmem_attach_disk(ndns, pmem); + if (rc) + pmem_free(pmem); + return rc; } -static int pmem_remove(struct platform_device *pdev) +static int nd_pmem_remove(struct device *dev) { - struct pmem_device *pmem = platform_get_drvdata(pdev); + struct pmem_device *pmem = dev_get_drvdata(dev); + if (is_nd_btt(dev)) + nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + else + pmem_detach_disk(pmem); pmem_free(pmem); + return 0; } -static struct platform_driver pmem_driver = { - .probe = pmem_probe, - .remove = pmem_remove, - .driver = { - .owner = THIS_MODULE, - .name = "pmem", +MODULE_ALIAS("pmem"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); +static struct nd_device_driver nd_pmem_driver = { + .probe = nd_pmem_probe, + .remove = nd_pmem_remove, + .drv = { + .name = "nd_pmem", }, + .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM, }; static int __init pmem_init(void) @@ -244,16 +280,19 @@ static int __init pmem_init(void) if (pmem_major < 0) return pmem_major; - error = platform_driver_register(&pmem_driver); - if (error) + error = nd_driver_register(&nd_pmem_driver); + if (error) { unregister_blkdev(pmem_major, "pmem"); - return error; + return error; + } + + return 0; } module_init(pmem_init); static void pmem_exit(void) { - platform_driver_unregister(&pmem_driver); + driver_unregister(&nd_pmem_driver.drv); unregister_blkdev(pmem_major, "pmem"); } module_exit(pmem_exit); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c new file mode 100644 index 000000000000..f28f78ccff19 --- /dev/null +++ b/drivers/nvdimm/region.c @@ -0,0 +1,114 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/nd.h> +#include "nd.h" + +static int nd_region_probe(struct device *dev) +{ + int err, rc; + static unsigned long once; + struct nd_region_namespaces *num_ns; + struct nd_region *nd_region = to_nd_region(dev); + + if (nd_region->num_lanes > num_online_cpus() + && nd_region->num_lanes < num_possible_cpus() + && !test_and_set_bit(0, &once)) { + dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n", + num_online_cpus(), nd_region->num_lanes, + num_possible_cpus()); + dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n", + nd_region->num_lanes); + } + + rc = nd_blk_region_init(nd_region); + if (rc) + return rc; + + rc = nd_region_register_namespaces(nd_region, &err); + num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL); + if (!num_ns) + return -ENOMEM; + + if (rc < 0) + return rc; + + num_ns->active = rc; + num_ns->count = rc + err; + dev_set_drvdata(dev, num_ns); + + if (rc && err && rc == err) + return -ENODEV; + + nd_region->btt_seed = nd_btt_create(nd_region); + if (err == 0) + return 0; + + /* + * Given multiple namespaces per region, we do not want to + * disable all the successfully registered peer namespaces upon + * a single registration failure. If userspace is missing a + * namespace that it expects it can disable/re-enable the region + * to retry discovery after correcting the failure. + * <regionX>/namespaces returns the current + * "<async-registered>/<total>" namespace count. + */ + dev_err(dev, "failed to register %d namespace%s, continuing...\n", + err, err == 1 ? "" : "s"); + return 0; +} + +static int child_unregister(struct device *dev, void *data) +{ + nd_device_unregister(dev, ND_SYNC); + return 0; +} + +static int nd_region_remove(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev); + + /* flush attribute readers and disable */ + nvdimm_bus_lock(dev); + nd_region->ns_seed = NULL; + nd_region->btt_seed = NULL; + dev_set_drvdata(dev, NULL); + nvdimm_bus_unlock(dev); + + device_for_each_child(dev, NULL, child_unregister); + return 0; +} + +static struct nd_device_driver nd_region_driver = { + .probe = nd_region_probe, + .remove = nd_region_remove, + .drv = { + .name = "nd_region", + }, + .type = ND_DRIVER_REGION_BLK | ND_DRIVER_REGION_PMEM, +}; + +int __init nd_region_init(void) +{ + return nd_driver_register(&nd_region_driver); +} + +void nd_region_exit(void) +{ + driver_unregister(&nd_region_driver.drv); +} + +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c new file mode 100644 index 000000000000..a5233422f9dc --- /dev/null +++ b/drivers/nvdimm/region_devs.c @@ -0,0 +1,787 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/scatterlist.h> +#include <linux/highmem.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/sort.h> +#include <linux/io.h> +#include <linux/nd.h> +#include "nd-core.h" +#include "nd.h" + +static DEFINE_IDA(region_ida); + +static void nd_region_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev); + u16 i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + put_device(&nvdimm->dev); + } + free_percpu(nd_region->lane); + ida_simple_remove(®ion_ida, nd_region->id); + if (is_nd_blk(dev)) + kfree(to_nd_blk_region(dev)); + else + kfree(nd_region); +} + +static struct device_type nd_blk_device_type = { + .name = "nd_blk", + .release = nd_region_release, +}; + +static struct device_type nd_pmem_device_type = { + .name = "nd_pmem", + .release = nd_region_release, +}; + +static struct device_type nd_volatile_device_type = { + .name = "nd_volatile", + .release = nd_region_release, +}; + +bool is_nd_pmem(struct device *dev) +{ + return dev ? dev->type == &nd_pmem_device_type : false; +} + +bool is_nd_blk(struct device *dev) +{ + return dev ? dev->type == &nd_blk_device_type : false; +} + +struct nd_region *to_nd_region(struct device *dev) +{ + struct nd_region *nd_region = container_of(dev, struct nd_region, dev); + + WARN_ON(dev->type->release != nd_region_release); + return nd_region; +} +EXPORT_SYMBOL_GPL(to_nd_region); + +struct nd_blk_region *to_nd_blk_region(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev); + + WARN_ON(!is_nd_blk(dev)); + return container_of(nd_region, struct nd_blk_region, nd_region); +} +EXPORT_SYMBOL_GPL(to_nd_blk_region); + +void *nd_region_provider_data(struct nd_region *nd_region) +{ + return nd_region->provider_data; +} +EXPORT_SYMBOL_GPL(nd_region_provider_data); + +void *nd_blk_region_provider_data(struct nd_blk_region *ndbr) +{ + return ndbr->blk_provider_data; +} +EXPORT_SYMBOL_GPL(nd_blk_region_provider_data); + +void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data) +{ + ndbr->blk_provider_data = data; +} +EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data); + +/** + * nd_region_to_nstype() - region to an integer namespace type + * @nd_region: region-device to interrogate + * + * This is the 'nstype' attribute of a region as well, an input to the + * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match + * namespace devices with namespace drivers. + */ +int nd_region_to_nstype(struct nd_region *nd_region) +{ + if (is_nd_pmem(&nd_region->dev)) { + u16 i, alias; + + for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + if (nvdimm->flags & NDD_ALIASING) + alias++; + } + if (alias) + return ND_DEVICE_NAMESPACE_PMEM; + else + return ND_DEVICE_NAMESPACE_IO; + } else if (is_nd_blk(&nd_region->dev)) { + return ND_DEVICE_NAMESPACE_BLK; + } + + return 0; +} +EXPORT_SYMBOL(nd_region_to_nstype); + +static int is_uuid_busy(struct device *dev, void *data) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + u8 *uuid = data; + + switch (nd_region_to_nstype(nd_region)) { + case ND_DEVICE_NAMESPACE_PMEM: { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + if (!nspm->uuid) + break; + if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0) + return -EBUSY; + break; + } + case ND_DEVICE_NAMESPACE_BLK: { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + if (!nsblk->uuid) + break; + if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) == 0) + return -EBUSY; + break; + } + default: + break; + } + + return 0; +} + +static int is_namespace_uuid_busy(struct device *dev, void *data) +{ + if (is_nd_pmem(dev) || is_nd_blk(dev)) + return device_for_each_child(dev, data, is_uuid_busy); + return 0; +} + +/** + * nd_is_uuid_unique - verify that no other namespace has @uuid + * @dev: any device on a nvdimm_bus + * @uuid: uuid to check + */ +bool nd_is_uuid_unique(struct device *dev, u8 *uuid) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return false; + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev)); + if (device_for_each_child(&nvdimm_bus->dev, uuid, + is_namespace_uuid_busy) != 0) + return false; + return true; +} + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + unsigned long long size = 0; + + if (is_nd_pmem(dev)) { + size = nd_region->ndr_size; + } else if (nd_region->ndr_mappings == 1) { + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + + size = nd_mapping->size; + } + + return sprintf(buf, "%llu\n", size); +} +static DEVICE_ATTR_RO(size); + +static ssize_t mappings_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + + return sprintf(buf, "%d\n", nd_region->ndr_mappings); +} +static DEVICE_ATTR_RO(mappings); + +static ssize_t nstype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + + return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region)); +} +static DEVICE_ATTR_RO(nstype); + +static ssize_t set_cookie_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct nd_interleave_set *nd_set = nd_region->nd_set; + + if (is_nd_pmem(dev) && nd_set) + /* pass, should be precluded by region_visible */; + else + return -ENXIO; + + return sprintf(buf, "%#llx\n", nd_set->cookie); +} +static DEVICE_ATTR_RO(set_cookie); + +resource_size_t nd_region_available_dpa(struct nd_region *nd_region) +{ + resource_size_t blk_max_overlap = 0, available, overlap; + int i; + + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + + retry: + available = 0; + overlap = blk_max_overlap; + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + + /* if a dimm is disabled the available capacity is zero */ + if (!ndd) + return 0; + + if (is_nd_pmem(&nd_region->dev)) { + available += nd_pmem_available_dpa(nd_region, + nd_mapping, &overlap); + if (overlap > blk_max_overlap) { + blk_max_overlap = overlap; + goto retry; + } + } else if (is_nd_blk(&nd_region->dev)) { + available += nd_blk_available_dpa(nd_mapping); + } + } + + return available; +} + +static ssize_t available_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + unsigned long long available = 0; + + /* + * Flush in-flight updates and grab a snapshot of the available + * size. Of course, this value is potentially invalidated the + * memory nvdimm_bus_lock() is dropped, but that's userspace's + * problem to not race itself. + */ + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + available = nd_region_available_dpa(nd_region); + nvdimm_bus_unlock(dev); + + return sprintf(buf, "%llu\n", available); +} +static DEVICE_ATTR_RO(available_size); + +static ssize_t init_namespaces_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (num_ns) + rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); + else + rc = -ENXIO; + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(init_namespaces); + +static ssize_t namespace_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->ns_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + return rc; +} +static DEVICE_ATTR_RO(namespace_seed); + +static ssize_t btt_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->btt_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(btt_seed); + +static ssize_t read_only_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + + return sprintf(buf, "%d\n", nd_region->ro); +} + +static ssize_t read_only_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + bool ro; + int rc = strtobool(buf, &ro); + struct nd_region *nd_region = to_nd_region(dev); + + if (rc) + return rc; + + nd_region->ro = ro; + return len; +} +static DEVICE_ATTR_RW(read_only); + +static struct attribute *nd_region_attributes[] = { + &dev_attr_size.attr, + &dev_attr_nstype.attr, + &dev_attr_mappings.attr, + &dev_attr_btt_seed.attr, + &dev_attr_read_only.attr, + &dev_attr_set_cookie.attr, + &dev_attr_available_size.attr, + &dev_attr_namespace_seed.attr, + &dev_attr_init_namespaces.attr, + NULL, +}; + +static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct nd_region *nd_region = to_nd_region(dev); + struct nd_interleave_set *nd_set = nd_region->nd_set; + int type = nd_region_to_nstype(nd_region); + + if (a != &dev_attr_set_cookie.attr + && a != &dev_attr_available_size.attr) + return a->mode; + + if ((type == ND_DEVICE_NAMESPACE_PMEM + || type == ND_DEVICE_NAMESPACE_BLK) + && a == &dev_attr_available_size.attr) + return a->mode; + else if (is_nd_pmem(dev) && nd_set) + return a->mode; + + return 0; +} + +struct attribute_group nd_region_attribute_group = { + .attrs = nd_region_attributes, + .is_visible = region_visible, +}; +EXPORT_SYMBOL_GPL(nd_region_attribute_group); + +u64 nd_region_interleave_set_cookie(struct nd_region *nd_region) +{ + struct nd_interleave_set *nd_set = nd_region->nd_set; + + if (nd_set) + return nd_set->cookie; + return 0; +} + +/* + * Upon successful probe/remove, take/release a reference on the + * associated interleave set (if present), and plant new btt + namespace + * seeds. Also, on the removal of a BLK region, notify the provider to + * disable the region. + */ +static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, + struct device *dev, bool probe) +{ + struct nd_region *nd_region; + + if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) { + int i; + + nd_region = to_nd_region(dev); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = nd_mapping->ndd; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + put_ndd(ndd); + nd_mapping->ndd = NULL; + if (ndd) + atomic_dec(&nvdimm->busy); + } + + if (is_nd_pmem(dev)) + return; + + to_nd_blk_region(dev)->disable(nvdimm_bus, dev); + } + if (dev->parent && is_nd_blk(dev->parent) && probe) { + nd_region = to_nd_region(dev->parent); + nvdimm_bus_lock(dev); + if (nd_region->ns_seed == dev) + nd_region_create_blk_seed(nd_region); + nvdimm_bus_unlock(dev); + } + if (is_nd_btt(dev) && probe) { + nd_region = to_nd_region(dev->parent); + nvdimm_bus_lock(dev); + if (nd_region->btt_seed == dev) + nd_region_create_btt_seed(nd_region); + nvdimm_bus_unlock(dev); + } +} + +void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev) +{ + nd_region_notify_driver_action(nvdimm_bus, dev, true); +} + +void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev) +{ + nd_region_notify_driver_action(nvdimm_bus, dev, false); +} + +static ssize_t mappingN(struct device *dev, char *buf, int n) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct nd_mapping *nd_mapping; + struct nvdimm *nvdimm; + + if (n >= nd_region->ndr_mappings) + return -ENXIO; + nd_mapping = &nd_region->mapping[n]; + nvdimm = nd_mapping->nvdimm; + + return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev), + nd_mapping->start, nd_mapping->size); +} + +#define REGION_MAPPING(idx) \ +static ssize_t mapping##idx##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + return mappingN(dev, buf, idx); \ +} \ +static DEVICE_ATTR_RO(mapping##idx) + +/* + * 32 should be enough for a while, even in the presence of socket + * interleave a 32-way interleave set is a degenerate case. + */ +REGION_MAPPING(0); +REGION_MAPPING(1); +REGION_MAPPING(2); +REGION_MAPPING(3); +REGION_MAPPING(4); +REGION_MAPPING(5); +REGION_MAPPING(6); +REGION_MAPPING(7); +REGION_MAPPING(8); +REGION_MAPPING(9); +REGION_MAPPING(10); +REGION_MAPPING(11); +REGION_MAPPING(12); +REGION_MAPPING(13); +REGION_MAPPING(14); +REGION_MAPPING(15); +REGION_MAPPING(16); +REGION_MAPPING(17); +REGION_MAPPING(18); +REGION_MAPPING(19); +REGION_MAPPING(20); +REGION_MAPPING(21); +REGION_MAPPING(22); +REGION_MAPPING(23); +REGION_MAPPING(24); +REGION_MAPPING(25); +REGION_MAPPING(26); +REGION_MAPPING(27); +REGION_MAPPING(28); +REGION_MAPPING(29); +REGION_MAPPING(30); +REGION_MAPPING(31); + +static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nd_region *nd_region = to_nd_region(dev); + + if (n < nd_region->ndr_mappings) + return a->mode; + return 0; +} + +static struct attribute *mapping_attributes[] = { + &dev_attr_mapping0.attr, + &dev_attr_mapping1.attr, + &dev_attr_mapping2.attr, + &dev_attr_mapping3.attr, + &dev_attr_mapping4.attr, + &dev_attr_mapping5.attr, + &dev_attr_mapping6.attr, + &dev_attr_mapping7.attr, + &dev_attr_mapping8.attr, + &dev_attr_mapping9.attr, + &dev_attr_mapping10.attr, + &dev_attr_mapping11.attr, + &dev_attr_mapping12.attr, + &dev_attr_mapping13.attr, + &dev_attr_mapping14.attr, + &dev_attr_mapping15.attr, + &dev_attr_mapping16.attr, + &dev_attr_mapping17.attr, + &dev_attr_mapping18.attr, + &dev_attr_mapping19.attr, + &dev_attr_mapping20.attr, + &dev_attr_mapping21.attr, + &dev_attr_mapping22.attr, + &dev_attr_mapping23.attr, + &dev_attr_mapping24.attr, + &dev_attr_mapping25.attr, + &dev_attr_mapping26.attr, + &dev_attr_mapping27.attr, + &dev_attr_mapping28.attr, + &dev_attr_mapping29.attr, + &dev_attr_mapping30.attr, + &dev_attr_mapping31.attr, + NULL, +}; + +struct attribute_group nd_mapping_attribute_group = { + .is_visible = mapping_visible, + .attrs = mapping_attributes, +}; +EXPORT_SYMBOL_GPL(nd_mapping_attribute_group); + +int nd_blk_region_init(struct nd_region *nd_region) +{ + struct device *dev = &nd_region->dev; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!is_nd_blk(dev)) + return 0; + + if (nd_region->ndr_mappings < 1) { + dev_err(dev, "invalid BLK region\n"); + return -ENXIO; + } + + return to_nd_blk_region(dev)->enable(nvdimm_bus, dev); +} + +/** + * nd_region_acquire_lane - allocate and lock a lane + * @nd_region: region id and number of lanes possible + * + * A lane correlates to a BLK-data-window and/or a log slot in the BTT. + * We optimize for the common case where there are 256 lanes, one + * per-cpu. For larger systems we need to lock to share lanes. For now + * this implementation assumes the cost of maintaining an allocator for + * free lanes is on the order of the lock hold time, so it implements a + * static lane = cpu % num_lanes mapping. + * + * In the case of a BTT instance on top of a BLK namespace a lane may be + * acquired recursively. We lock on the first instance. + * + * In the case of a BTT instance on top of PMEM, we only acquire a lane + * for the BTT metadata updates. + */ +unsigned int nd_region_acquire_lane(struct nd_region *nd_region) +{ + unsigned int cpu, lane; + + cpu = get_cpu(); + if (nd_region->num_lanes < nr_cpu_ids) { + struct nd_percpu_lane *ndl_lock, *ndl_count; + + lane = cpu % nd_region->num_lanes; + ndl_count = per_cpu_ptr(nd_region->lane, cpu); + ndl_lock = per_cpu_ptr(nd_region->lane, lane); + if (ndl_count->count++ == 0) + spin_lock(&ndl_lock->lock); + } else + lane = cpu; + + return lane; +} +EXPORT_SYMBOL(nd_region_acquire_lane); + +void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane) +{ + if (nd_region->num_lanes < nr_cpu_ids) { + unsigned int cpu = get_cpu(); + struct nd_percpu_lane *ndl_lock, *ndl_count; + + ndl_count = per_cpu_ptr(nd_region->lane, cpu); + ndl_lock = per_cpu_ptr(nd_region->lane, lane); + if (--ndl_count->count == 0) + spin_unlock(&ndl_lock->lock); + put_cpu(); + } + put_cpu(); +} +EXPORT_SYMBOL(nd_region_release_lane); + +static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc, struct device_type *dev_type, + const char *caller) +{ + struct nd_region *nd_region; + struct device *dev; + void *region_buf; + unsigned int i; + int ro = 0; + + for (i = 0; i < ndr_desc->num_mappings; i++) { + struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + if ((nd_mapping->start | nd_mapping->size) % SZ_4K) { + dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n", + caller, dev_name(&nvdimm->dev), i); + + return NULL; + } + + if (nvdimm->flags & NDD_UNARMED) + ro = 1; + } + + if (dev_type == &nd_blk_device_type) { + struct nd_blk_region_desc *ndbr_desc; + struct nd_blk_region *ndbr; + + ndbr_desc = to_blk_region_desc(ndr_desc); + ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping) + * ndr_desc->num_mappings, + GFP_KERNEL); + if (ndbr) { + nd_region = &ndbr->nd_region; + ndbr->enable = ndbr_desc->enable; + ndbr->disable = ndbr_desc->disable; + ndbr->do_io = ndbr_desc->do_io; + } + region_buf = ndbr; + } else { + nd_region = kzalloc(sizeof(struct nd_region) + + sizeof(struct nd_mapping) + * ndr_desc->num_mappings, + GFP_KERNEL); + region_buf = nd_region; + } + + if (!region_buf) + return NULL; + nd_region->id = ida_simple_get(®ion_ida, 0, 0, GFP_KERNEL); + if (nd_region->id < 0) + goto err_id; + + nd_region->lane = alloc_percpu(struct nd_percpu_lane); + if (!nd_region->lane) + goto err_percpu; + + for (i = 0; i < nr_cpu_ids; i++) { + struct nd_percpu_lane *ndl; + + ndl = per_cpu_ptr(nd_region->lane, i); + spin_lock_init(&ndl->lock); + ndl->count = 0; + } + + memcpy(nd_region->mapping, ndr_desc->nd_mapping, + sizeof(struct nd_mapping) * ndr_desc->num_mappings); + for (i = 0; i < ndr_desc->num_mappings; i++) { + struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + get_device(&nvdimm->dev); + } + nd_region->ndr_mappings = ndr_desc->num_mappings; + nd_region->provider_data = ndr_desc->provider_data; + nd_region->nd_set = ndr_desc->nd_set; + nd_region->num_lanes = ndr_desc->num_lanes; + nd_region->ro = ro; + nd_region->numa_node = ndr_desc->numa_node; + ida_init(&nd_region->ns_ida); + ida_init(&nd_region->btt_ida); + dev = &nd_region->dev; + dev_set_name(dev, "region%d", nd_region->id); + dev->parent = &nvdimm_bus->dev; + dev->type = dev_type; + dev->groups = ndr_desc->attr_groups; + nd_region->ndr_size = resource_size(ndr_desc->res); + nd_region->ndr_start = ndr_desc->res->start; + nd_device_register(dev); + + return nd_region; + + err_percpu: + ida_simple_remove(®ion_ida, nd_region->id); + err_id: + kfree(region_buf); + return NULL; +} + +struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc) +{ + ndr_desc->num_lanes = ND_MAX_LANES; + return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type, + __func__); +} +EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create); + +struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc) +{ + if (ndr_desc->num_mappings > 1) + return NULL; + ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES); + return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type, + __func__); +} +EXPORT_SYMBOL_GPL(nvdimm_blk_region_create); + +struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc) +{ + ndr_desc->num_lanes = ND_MAX_LANES; + return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type, + __func__); +} +EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index 487d057431cc..c0e6ede3e27d 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -267,6 +267,13 @@ config PHY_EXYNOS5_USBDRD This driver provides PHY interface for USB 3.0 DRD controller present on Exynos5 SoC series. +config PHY_PISTACHIO_USB + tristate "IMG Pistachio USB2.0 PHY driver" + depends on MACH_PISTACHIO + select GENERIC_PHY + help + Enable this to support the USB2.0 PHY on the IMG Pistachio SoC. + config PHY_QCOM_APQ8064_SATA tristate "Qualcomm APQ8064 SATA SerDes/PHY driver" depends on ARCH_QCOM diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile index 42f58e95aff0..f344e1b2e825 100644 --- a/drivers/phy/Makefile +++ b/drivers/phy/Makefile @@ -44,3 +44,4 @@ obj-$(CONFIG_PHY_QCOM_UFS) += phy-qcom-ufs-qmp-20nm.o obj-$(CONFIG_PHY_QCOM_UFS) += phy-qcom-ufs-qmp-14nm.o obj-$(CONFIG_PHY_TUSB1210) += phy-tusb1210.o obj-$(CONFIG_PHY_BRCMSTB_SATA) += phy-brcmstb-sata.o +obj-$(CONFIG_PHY_PISTACHIO_USB) += phy-pistachio-usb.o diff --git a/drivers/phy/phy-pistachio-usb.c b/drivers/phy/phy-pistachio-usb.c new file mode 100644 index 000000000000..c6db35e6bb63 --- /dev/null +++ b/drivers/phy/phy-pistachio-usb.c @@ -0,0 +1,206 @@ +/* + * IMG Pistachio USB PHY driver + * + * Copyright (C) 2015 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/phy/phy.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> + +#include <dt-bindings/phy/phy-pistachio-usb.h> + +#define USB_PHY_CONTROL1 0x04 +#define USB_PHY_CONTROL1_FSEL_SHIFT 2 +#define USB_PHY_CONTROL1_FSEL_MASK 0x7 + +#define USB_PHY_STRAP_CONTROL 0x10 +#define USB_PHY_STRAP_CONTROL_REFCLK_SHIFT 4 +#define USB_PHY_STRAP_CONTROL_REFCLK_MASK 0x3 + +#define USB_PHY_STATUS 0x14 +#define USB_PHY_STATUS_RX_PHY_CLK BIT(9) +#define USB_PHY_STATUS_RX_UTMI_CLK BIT(8) +#define USB_PHY_STATUS_VBUS_FAULT BIT(7) + +struct pistachio_usb_phy { + struct device *dev; + struct regmap *cr_top; + struct clk *phy_clk; + unsigned int refclk; +}; + +static const unsigned long fsel_rate_map[] = { + 9600000, + 10000000, + 12000000, + 19200000, + 20000000, + 24000000, + 0, + 50000000, +}; + +static int pistachio_usb_phy_power_on(struct phy *phy) +{ + struct pistachio_usb_phy *p_phy = phy_get_drvdata(phy); + unsigned long timeout, rate; + unsigned int i; + int ret; + + ret = clk_prepare_enable(p_phy->phy_clk); + if (ret < 0) { + dev_err(p_phy->dev, "Failed to enable PHY clock: %d\n", ret); + return ret; + } + + regmap_update_bits(p_phy->cr_top, USB_PHY_STRAP_CONTROL, + USB_PHY_STRAP_CONTROL_REFCLK_MASK << + USB_PHY_STRAP_CONTROL_REFCLK_SHIFT, + p_phy->refclk << USB_PHY_STRAP_CONTROL_REFCLK_SHIFT); + + rate = clk_get_rate(p_phy->phy_clk); + if (p_phy->refclk == REFCLK_XO_CRYSTAL && rate != 12000000) { + dev_err(p_phy->dev, "Unsupported rate for XO crystal: %ld\n", + rate); + ret = -EINVAL; + goto disable_clk; + } + + for (i = 0; i < ARRAY_SIZE(fsel_rate_map); i++) { + if (rate == fsel_rate_map[i]) + break; + } + if (i == ARRAY_SIZE(fsel_rate_map)) { + dev_err(p_phy->dev, "Unsupported clock rate: %lu\n", rate); + ret = -EINVAL; + goto disable_clk; + } + + regmap_update_bits(p_phy->cr_top, USB_PHY_CONTROL1, + USB_PHY_CONTROL1_FSEL_MASK << + USB_PHY_CONTROL1_FSEL_SHIFT, + i << USB_PHY_CONTROL1_FSEL_SHIFT); + + timeout = jiffies + msecs_to_jiffies(200); + while (time_before(jiffies, timeout)) { + unsigned int val; + + regmap_read(p_phy->cr_top, USB_PHY_STATUS, &val); + if (val & USB_PHY_STATUS_VBUS_FAULT) { + dev_err(p_phy->dev, "VBUS fault detected\n"); + ret = -EIO; + goto disable_clk; + } + if ((val & USB_PHY_STATUS_RX_PHY_CLK) && + (val & USB_PHY_STATUS_RX_UTMI_CLK)) + return 0; + usleep_range(1000, 1500); + } + + dev_err(p_phy->dev, "Timed out waiting for PHY to power on\n"); + ret = -ETIMEDOUT; + +disable_clk: + clk_disable_unprepare(p_phy->phy_clk); + return ret; +} + +static int pistachio_usb_phy_power_off(struct phy *phy) +{ + struct pistachio_usb_phy *p_phy = phy_get_drvdata(phy); + + clk_disable_unprepare(p_phy->phy_clk); + + return 0; +} + +static const struct phy_ops pistachio_usb_phy_ops = { + .power_on = pistachio_usb_phy_power_on, + .power_off = pistachio_usb_phy_power_off, + .owner = THIS_MODULE, +}; + +static int pistachio_usb_phy_probe(struct platform_device *pdev) +{ + struct pistachio_usb_phy *p_phy; + struct phy_provider *provider; + struct phy *phy; + int ret; + + p_phy = devm_kzalloc(&pdev->dev, sizeof(*p_phy), GFP_KERNEL); + if (!p_phy) + return -ENOMEM; + p_phy->dev = &pdev->dev; + platform_set_drvdata(pdev, p_phy); + + p_phy->cr_top = syscon_regmap_lookup_by_phandle(p_phy->dev->of_node, + "img,cr-top"); + if (IS_ERR(p_phy->cr_top)) { + dev_err(p_phy->dev, "Failed to get CR_TOP registers: %ld\n", + PTR_ERR(p_phy->cr_top)); + return PTR_ERR(p_phy->cr_top); + } + + p_phy->phy_clk = devm_clk_get(p_phy->dev, "usb_phy"); + if (IS_ERR(p_phy->phy_clk)) { + dev_err(p_phy->dev, "Failed to get usb_phy clock: %ld\n", + PTR_ERR(p_phy->phy_clk)); + return PTR_ERR(p_phy->phy_clk); + } + + ret = of_property_read_u32(p_phy->dev->of_node, "img,refclk", + &p_phy->refclk); + if (ret < 0) { + dev_err(p_phy->dev, "No reference clock selector specified\n"); + return ret; + } + + phy = devm_phy_create(p_phy->dev, NULL, &pistachio_usb_phy_ops); + if (IS_ERR(phy)) { + dev_err(p_phy->dev, "Failed to create PHY: %ld\n", + PTR_ERR(phy)); + return PTR_ERR(phy); + } + phy_set_drvdata(phy, p_phy); + + provider = devm_of_phy_provider_register(p_phy->dev, + of_phy_simple_xlate); + if (IS_ERR(provider)) { + dev_err(p_phy->dev, "Failed to register PHY provider: %ld\n", + PTR_ERR(provider)); + return PTR_ERR(provider); + } + + return 0; +} + +static const struct of_device_id pistachio_usb_phy_of_match[] = { + { .compatible = "img,pistachio-usb-phy", }, + { }, +}; +MODULE_DEVICE_TABLE(of, pistachio_usb_phy_of_match); + +static struct platform_driver pistachio_usb_phy_driver = { + .probe = pistachio_usb_phy_probe, + .driver = { + .name = "pistachio-usb-phy", + .of_match_table = pistachio_usb_phy_of_match, + }, +}; +module_platform_driver(pistachio_usb_phy_driver); + +MODULE_AUTHOR("Andrew Bresticker <[email protected]>"); +MODULE_DESCRIPTION("IMG Pistachio USB2.0 PHY driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index db2fe4ab4b4a..83b4b89b9d5a 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1503,7 +1503,7 @@ config RTC_DRV_PUV3 config RTC_DRV_LOONGSON1 tristate "loongson1 RTC support" - depends on MACH_LOONGSON1 + depends on MACH_LOONGSON32 help This is a driver for the loongson1 on-chip Counter0 (Time-Of-Year counter) to be used as a RTC. diff --git a/drivers/rtc/rtc-ls1x.c b/drivers/rtc/rtc-ls1x.c index 8445e564094a..22a9ec4f2b83 100644 --- a/drivers/rtc/rtc-ls1x.c +++ b/drivers/rtc/rtc-ls1x.c @@ -17,7 +17,7 @@ #include <linux/delay.h> #include <linux/types.h> #include <linux/io.h> -#include <asm/mach-loongson1/loongson1.h> +#include <loongson1.h> #define LS1X_RTC_REG_OFFSET (LS1X_RTC_BASE + 0x20) #define LS1X_RTC_REGS(x) \ diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c new file mode 100644 index 000000000000..21bf81fe794f --- /dev/null +++ b/drivers/tty/serial/8250/8250_ingenic.c @@ -0,0 +1,266 @@ +/* + * Copyright (C) 2010 Lars-Peter Clausen <[email protected]> + * Copyright (C) 2015 Imagination Technologies + * + * Ingenic SoC UART support + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/clk.h> +#include <linux/console.h> +#include <linux/io.h> +#include <linux/libfdt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/platform_device.h> +#include <linux/serial_8250.h> +#include <linux/serial_core.h> +#include <linux/serial_reg.h> + +struct ingenic_uart_data { + struct clk *clk_module; + struct clk *clk_baud; + int line; +}; + +#define UART_FCR_UME BIT(4) + +static struct earlycon_device *early_device; + +static uint8_t __init early_in(struct uart_port *port, int offset) +{ + return readl(port->membase + (offset << 2)); +} + +static void __init early_out(struct uart_port *port, int offset, uint8_t value) +{ + writel(value, port->membase + (offset << 2)); +} + +static void __init ingenic_early_console_putc(struct uart_port *port, int c) +{ + uint8_t lsr; + + do { + lsr = early_in(port, UART_LSR); + } while ((lsr & UART_LSR_TEMT) == 0); + + early_out(port, UART_TX, c); +} + +static void __init ingenic_early_console_write(struct console *console, + const char *s, unsigned int count) +{ + uart_console_write(&early_device->port, s, count, + ingenic_early_console_putc); +} + +static void __init ingenic_early_console_setup_clock(struct earlycon_device *dev) +{ + void *fdt = initial_boot_params; + const __be32 *prop; + int offset; + + offset = fdt_path_offset(fdt, "/ext"); + if (offset < 0) + return; + + prop = fdt_getprop(fdt, offset, "clock-frequency", NULL); + if (!prop) + return; + + dev->port.uartclk = be32_to_cpup(prop); +} + +static int __init ingenic_early_console_setup(struct earlycon_device *dev, + const char *opt) +{ + struct uart_port *port = &dev->port; + unsigned int baud, divisor; + + if (!dev->port.membase) + return -ENODEV; + + ingenic_early_console_setup_clock(dev); + + baud = dev->baud ?: 115200; + divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * baud); + + early_out(port, UART_IER, 0); + early_out(port, UART_LCR, UART_LCR_DLAB | UART_LCR_WLEN8); + early_out(port, UART_DLL, 0); + early_out(port, UART_DLM, 0); + early_out(port, UART_LCR, UART_LCR_WLEN8); + early_out(port, UART_FCR, UART_FCR_UME | UART_FCR_CLEAR_XMIT | + UART_FCR_CLEAR_RCVR | UART_FCR_ENABLE_FIFO); + early_out(port, UART_MCR, UART_MCR_RTS | UART_MCR_DTR); + + early_out(port, UART_LCR, UART_LCR_DLAB | UART_LCR_WLEN8); + early_out(port, UART_DLL, divisor & 0xff); + early_out(port, UART_DLM, (divisor >> 8) & 0xff); + early_out(port, UART_LCR, UART_LCR_WLEN8); + + early_device = dev; + dev->con->write = ingenic_early_console_write; + + return 0; +} + +EARLYCON_DECLARE(jz4740_uart, ingenic_early_console_setup); +OF_EARLYCON_DECLARE(jz4740_uart, "ingenic,jz4740-uart", + ingenic_early_console_setup); + +EARLYCON_DECLARE(jz4775_uart, ingenic_early_console_setup); +OF_EARLYCON_DECLARE(jz4775_uart, "ingenic,jz4775-uart", + ingenic_early_console_setup); + +EARLYCON_DECLARE(jz4780_uart, ingenic_early_console_setup); +OF_EARLYCON_DECLARE(jz4780_uart, "ingenic,jz4780-uart", + ingenic_early_console_setup); + +static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) +{ + switch (offset) { + case UART_FCR: + /* UART module enable */ + value |= UART_FCR_UME; + break; + + case UART_IER: + value |= (value & 0x4) << 2; + break; + + default: + break; + } + + writeb(value, p->membase + (offset << p->regshift)); +} + +static int ingenic_uart_probe(struct platform_device *pdev) +{ + struct uart_8250_port uart = {}; + struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + struct resource *irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + struct ingenic_uart_data *data; + int err, line; + + if (!regs || !irq) { + dev_err(&pdev->dev, "no registers/irq defined\n"); + return -EINVAL; + } + + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock_init(&uart.port.lock); + uart.port.type = PORT_16550; + uart.port.flags = UPF_SKIP_TEST | UPF_IOREMAP | UPF_FIXED_TYPE; + uart.port.iotype = UPIO_MEM; + uart.port.mapbase = regs->start; + uart.port.regshift = 2; + uart.port.serial_out = ingenic_uart_serial_out; + uart.port.irq = irq->start; + uart.port.dev = &pdev->dev; + + /* Check for a fixed line number */ + line = of_alias_get_id(pdev->dev.of_node, "serial"); + if (line >= 0) + uart.port.line = line; + + uart.port.membase = devm_ioremap(&pdev->dev, regs->start, + resource_size(regs)); + if (!uart.port.membase) + return -ENOMEM; + + data->clk_module = devm_clk_get(&pdev->dev, "module"); + if (IS_ERR(data->clk_module)) { + err = PTR_ERR(data->clk_module); + if (err != -EPROBE_DEFER) + dev_err(&pdev->dev, + "unable to get module clock: %d\n", err); + return err; + } + + data->clk_baud = devm_clk_get(&pdev->dev, "baud"); + if (IS_ERR(data->clk_baud)) { + err = PTR_ERR(data->clk_baud); + if (err != -EPROBE_DEFER) + dev_err(&pdev->dev, + "unable to get baud clock: %d\n", err); + return err; + } + + err = clk_prepare_enable(data->clk_module); + if (err) { + dev_err(&pdev->dev, "could not enable module clock: %d\n", err); + goto out; + } + + err = clk_prepare_enable(data->clk_baud); + if (err) { + dev_err(&pdev->dev, "could not enable baud clock: %d\n", err); + goto out_disable_moduleclk; + } + uart.port.uartclk = clk_get_rate(data->clk_baud); + + data->line = serial8250_register_8250_port(&uart); + if (data->line < 0) { + err = data->line; + goto out_disable_baudclk; + } + + platform_set_drvdata(pdev, data); + return 0; + +out_disable_baudclk: + clk_disable_unprepare(data->clk_baud); +out_disable_moduleclk: + clk_disable_unprepare(data->clk_module); +out: + return err; +} + +static int ingenic_uart_remove(struct platform_device *pdev) +{ + struct ingenic_uart_data *data = platform_get_drvdata(pdev); + + serial8250_unregister_port(data->line); + clk_disable_unprepare(data->clk_module); + clk_disable_unprepare(data->clk_baud); + return 0; +} + +static const struct of_device_id of_match[] = { + { .compatible = "ingenic,jz4740-uart" }, + { .compatible = "ingenic,jz4775-uart" }, + { .compatible = "ingenic,jz4780-uart" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, of_match); + +static struct platform_driver ingenic_uart_platform_driver = { + .driver = { + .name = "ingenic-uart", + .owner = THIS_MODULE, + .of_match_table = of_match, + }, + .probe = ingenic_uart_probe, + .remove = ingenic_uart_remove, +}; + +module_platform_driver(ingenic_uart_platform_driver); + +MODULE_AUTHOR("Paul Burton"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ingenic SoC UART driver"); diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index a74a8e4717d4..e1de1181b322 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -357,3 +357,12 @@ config SERIAL_8250_UNIPHIER help If you have a UniPhier based board and want to use the on-chip serial ports, say Y to this option. If unsure, say N. + +config SERIAL_8250_INGENIC + bool "Support for Ingenic SoC serial ports" + depends on SERIAL_8250_CONSOLE && OF_FLATTREE + select LIBFDT + select SERIAL_EARLYCON + help + If you have a system using an Ingenic SoC and wish to make use of + its UARTs, say Y to this option. If unsure, say N. diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile index 6fa22ffad63d..706295913c34 100644 --- a/drivers/tty/serial/8250/Makefile +++ b/drivers/tty/serial/8250/Makefile @@ -25,3 +25,6 @@ obj-$(CONFIG_SERIAL_8250_FINTEK) += 8250_fintek.o obj-$(CONFIG_SERIAL_8250_LPC18XX) += 8250_lpc18xx.o obj-$(CONFIG_SERIAL_8250_MT6577) += 8250_mtk.o obj-$(CONFIG_SERIAL_8250_UNIPHIER) += 8250_uniphier.o +obj-$(CONFIG_SERIAL_8250_INGENIC) += 8250_ingenic.o + +CFLAGS_8250_ingenic.o += -I$(srctree)/scripts/dtc/libfdt diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 1c4791033b72..2847108cc8dd 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -462,6 +462,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { /* v: May be registered for frame buffer console restore */ NULL, /* v */ &sysrq_showstate_blocked_op, /* w */ + /* x: May be registered on mips for TLB dump */ /* x: May be registered on ppc/powerpc for xmon */ /* x: May be registered on sparc64 for global PMU dump */ NULL, /* x */ diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 547cee83400b..8afc3c1efdab 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -295,7 +295,7 @@ config USB_OCTEON_EHCI bool "Octeon on-chip EHCI support (DEPRECATED)" depends on CAVIUM_OCTEON_SOC default n - select USB_EHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USB_EHCI_HCD_PLATFORM help This option is deprecated now and the driver was removed, use @@ -568,7 +568,7 @@ config USB_OCTEON_OHCI bool "Octeon on-chip OHCI support (DEPRECATED)" depends on CAVIUM_OCTEON_SOC default USB_OCTEON_EHCI - select USB_OHCI_BIG_ENDIAN_MMIO + select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USB_OHCI_LITTLE_ENDIAN select USB_OHCI_HCD_PLATFORM help diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e9851add6f4e..964ad572aaee 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1056,19 +1056,21 @@ struct vfio_devices { static int vfio_pci_get_devs(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; - struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); - - if (pci_drv != &vfio_pci_driver) - return -EBUSY; + struct vfio_device *device; if (devs->cur_index == devs->max_index) return -ENOSPC; - devs->devices[devs->cur_index] = vfio_device_get_from_dev(&pdev->dev); - if (!devs->devices[devs->cur_index]) + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) return -EINVAL; - devs->cur_index++; + if (pci_dev_driver(pdev) != &vfio_pci_driver) { + vfio_device_put(device); + return -EBUSY; + } + + devs->devices[devs->cur_index++] = device; return 0; } diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index 9a4403e2a36c..bb30128782aa 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -1,6 +1,6 @@ config VFIO_PLATFORM tristate "VFIO support for platform devices" - depends on VFIO && EVENTFD && ARM + depends on VFIO && EVENTFD && (ARM || ARM64) select VFIO_VIRQFD help Support for platform devices with VFIO. This is required to make @@ -18,3 +18,5 @@ config VFIO_AMBA framework. If you don't know what to do here, say N. + +source "drivers/vfio/platform/reset/Kconfig" diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile index 81de144c0eaa..9ce8afe28450 100644 --- a/drivers/vfio/platform/Makefile +++ b/drivers/vfio/platform/Makefile @@ -2,7 +2,9 @@ vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o +obj-$(CONFIG_VFIO_PLATFORM) += reset/ vfio-amba-y := vfio_amba.o obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o +obj-$(CONFIG_VFIO_AMBA) += reset/ diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig new file mode 100644 index 000000000000..746b96b0003b --- /dev/null +++ b/drivers/vfio/platform/reset/Kconfig @@ -0,0 +1,7 @@ +config VFIO_PLATFORM_CALXEDAXGMAC_RESET + tristate "VFIO support for calxeda xgmac reset" + depends on VFIO_PLATFORM + help + Enables the VFIO platform driver to handle reset for Calxeda xgmac + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/reset/Makefile b/drivers/vfio/platform/reset/Makefile new file mode 100644 index 000000000000..2a486af9f8fa --- /dev/null +++ b/drivers/vfio/platform/reset/Makefile @@ -0,0 +1,5 @@ +vfio-platform-calxedaxgmac-y := vfio_platform_calxedaxgmac.o + +ccflags-y += -Idrivers/vfio/platform + +obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c new file mode 100644 index 000000000000..619dc7d22082 --- /dev/null +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -0,0 +1,86 @@ +/* + * VFIO platform driver specialized for Calxeda xgmac reset + * reset code is inherited from calxeda xgmac native driver + * + * Copyright 2010-2011 Calxeda, Inc. + * Copyright (c) 2015 Linaro Ltd. + * www.linaro.org + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> + +#include "vfio_platform_private.h" + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Eric Auger <[email protected]>" +#define DRIVER_DESC "Reset support for Calxeda xgmac vfio platform device" + +#define CALXEDAXGMAC_COMPAT "calxeda,hb-xgmac" + +/* XGMAC Register definitions */ +#define XGMAC_CONTROL 0x00000000 /* MAC Configuration */ + +/* DMA Control and Status Registers */ +#define XGMAC_DMA_CONTROL 0x00000f18 /* Ctrl (Operational Mode) */ +#define XGMAC_DMA_INTR_ENA 0x00000f1c /* Interrupt Enable */ + +/* DMA Control registe defines */ +#define DMA_CONTROL_ST 0x00002000 /* Start/Stop Transmission */ +#define DMA_CONTROL_SR 0x00000002 /* Start/Stop Receive */ + +/* Common MAC defines */ +#define MAC_ENABLE_TX 0x00000008 /* Transmitter Enable */ +#define MAC_ENABLE_RX 0x00000004 /* Receiver Enable */ + +static inline void xgmac_mac_disable(void __iomem *ioaddr) +{ + u32 value = readl(ioaddr + XGMAC_DMA_CONTROL); + + value &= ~(DMA_CONTROL_ST | DMA_CONTROL_SR); + writel(value, ioaddr + XGMAC_DMA_CONTROL); + + value = readl(ioaddr + XGMAC_CONTROL); + value &= ~(MAC_ENABLE_TX | MAC_ENABLE_RX); + writel(value, ioaddr + XGMAC_CONTROL); +} + +int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) +{ + struct vfio_platform_region reg = vdev->regions[0]; + + if (!reg.ioaddr) { + reg.ioaddr = + ioremap_nocache(reg.addr, reg.size); + if (!reg.ioaddr) + return -ENOMEM; + } + + /* disable IRQ */ + writel(0, reg.ioaddr + XGMAC_DMA_INTR_ENA); + + /* Disable the MAC core */ + xgmac_mac_disable(reg.ioaddr); + + return 0; +} +EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index abcff7a1aa66..e43efb5e92bf 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -25,6 +25,44 @@ static DEFINE_MUTEX(driver_lock); +static const struct vfio_platform_reset_combo reset_lookup_table[] = { + { + .compat = "calxeda,hb-xgmac", + .reset_function_name = "vfio_platform_calxedaxgmac_reset", + .module_name = "vfio-platform-calxedaxgmac", + }, +}; + +static void vfio_platform_get_reset(struct vfio_platform_device *vdev, + struct device *dev) +{ + const char *compat; + int (*reset)(struct vfio_platform_device *); + int ret, i; + + ret = device_property_read_string(dev, "compatible", &compat); + if (ret) + return; + + for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) { + if (!strcmp(reset_lookup_table[i].compat, compat)) { + request_module(reset_lookup_table[i].module_name); + reset = __symbol_get( + reset_lookup_table[i].reset_function_name); + if (reset) { + vdev->reset = reset; + return; + } + } + } +} + +static void vfio_platform_put_reset(struct vfio_platform_device *vdev) +{ + if (vdev->reset) + symbol_put_addr(vdev->reset); +} + static int vfio_platform_regions_init(struct vfio_platform_device *vdev) { int cnt = 0, i; @@ -100,6 +138,8 @@ static void vfio_platform_release(void *device_data) mutex_lock(&driver_lock); if (!(--vdev->refcnt)) { + if (vdev->reset) + vdev->reset(vdev); vfio_platform_regions_cleanup(vdev); vfio_platform_irq_cleanup(vdev); } @@ -127,6 +167,9 @@ static int vfio_platform_open(void *device_data) ret = vfio_platform_irq_init(vdev); if (ret) goto err_irq; + + if (vdev->reset) + vdev->reset(vdev); } vdev->refcnt++; @@ -159,6 +202,8 @@ static long vfio_platform_ioctl(void *device_data, if (info.argsz < minsz) return -EINVAL; + if (vdev->reset) + vdev->flags |= VFIO_DEVICE_FLAGS_RESET; info.flags = vdev->flags; info.num_regions = vdev->num_regions; info.num_irqs = vdev->num_irqs; @@ -252,8 +297,12 @@ static long vfio_platform_ioctl(void *device_data, return ret; - } else if (cmd == VFIO_DEVICE_RESET) - return -EINVAL; + } else if (cmd == VFIO_DEVICE_RESET) { + if (vdev->reset) + return vdev->reset(vdev); + else + return -EINVAL; + } return -ENOTTY; } @@ -502,6 +551,8 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, return ret; } + vfio_platform_get_reset(vdev, dev); + mutex_init(&vdev->igate); return 0; @@ -513,8 +564,11 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) struct vfio_platform_device *vdev; vdev = vfio_del_group_dev(dev); - if (vdev) + + if (vdev) { + vfio_platform_put_reset(vdev); iommu_group_put(dev->iommu_group); + } return vdev; } diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 5d31e0473406..1c9b3d59543c 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -67,6 +67,13 @@ struct vfio_platform_device { struct resource* (*get_resource)(struct vfio_platform_device *vdev, int i); int (*get_irq)(struct vfio_platform_device *vdev, int i); + int (*reset)(struct vfio_platform_device *vdev); +}; + +struct vfio_platform_reset_combo { + const char *compat; + const char *reset_function_name; + const char *module_name; }; extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index e1278fe04b1e..2fb29dfeffbd 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -661,18 +661,29 @@ int vfio_add_group_dev(struct device *dev, EXPORT_SYMBOL_GPL(vfio_add_group_dev); /** - * Get a reference to the vfio_device for a device that is known to - * be bound to a vfio driver. The driver implicitly holds a - * vfio_device reference between vfio_add_group_dev and - * vfio_del_group_dev. We can therefore use drvdata to increment - * that reference from the struct device. This additional - * reference must be released by calling vfio_device_put. + * Get a reference to the vfio_device for a device. Even if the + * caller thinks they own the device, they could be racing with a + * release call path, so we can't trust drvdata for the shortcut. + * Go the long way around, from the iommu_group to the vfio_group + * to the vfio_device. */ struct vfio_device *vfio_device_get_from_dev(struct device *dev) { - struct vfio_device *device = dev_get_drvdata(dev); + struct iommu_group *iommu_group; + struct vfio_group *group; + struct vfio_device *device; + + iommu_group = iommu_group_get(dev); + if (!iommu_group) + return NULL; - vfio_device_get(device); + group = vfio_group_get_from_iommu(iommu_group); + iommu_group_put(iommu_group); + if (!group) + return NULL; + + device = vfio_group_get_device(group, dev); + vfio_group_put(group); return device; } diff --git a/fs/Makefile b/fs/Makefile index cb92fd4c3172..cb20e4bf2303 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -115,7 +115,6 @@ obj-$(CONFIG_AFS_FS) += afs/ obj-$(CONFIG_NILFS2_FS) += nilfs2/ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ -obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_CACHEFILES) += cachefiles/ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_TRACING) += tracefs/ diff --git a/fs/block_dev.c b/fs/block_dev.c index f839c339565e..4fe10f93db8a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -377,7 +377,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, struct page *page) { const struct block_device_operations *ops = bdev->bd_disk->fops; - if (!ops->rw_page) + if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); } @@ -408,7 +408,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, int result; int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; const struct block_device_operations *ops = bdev->bd_disk->fops; - if (!ops->rw_page) + if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile deleted file mode 100644 index 3a982bd975d2..000000000000 --- a/fs/hppfs/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# -# Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) -# Licensed under the GPL -# - -obj-$(CONFIG_HPPFS) += hppfs.o diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c deleted file mode 100644 index 2867837909a9..000000000000 --- a/fs/hppfs/hppfs.c +++ /dev/null @@ -1,765 +0,0 @@ -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#include <linux/ctype.h> -#include <linux/dcache.h> -#include <linux/file.h> -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/mount.h> -#include <linux/slab.h> -#include <linux/statfs.h> -#include <linux/types.h> -#include <linux/pid_namespace.h> -#include <linux/namei.h> -#include <asm/uaccess.h> -#include <os.h> - -static struct inode *get_inode(struct super_block *, struct dentry *); - -struct hppfs_data { - struct list_head list; - char contents[PAGE_SIZE - sizeof(struct list_head)]; -}; - -struct hppfs_private { - struct file *proc_file; - int host_fd; - loff_t len; - struct hppfs_data *contents; -}; - -struct hppfs_inode_info { - struct dentry *proc_dentry; - struct inode vfs_inode; -}; - -static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) -{ - return container_of(inode, struct hppfs_inode_info, vfs_inode); -} - -#define HPPFS_SUPER_MAGIC 0xb00000ee - -static const struct super_operations hppfs_sbops; - -static int is_pid(struct dentry *dentry) -{ - struct super_block *sb; - int i; - - sb = dentry->d_sb; - if (dentry->d_parent != sb->s_root) - return 0; - - for (i = 0; i < dentry->d_name.len; i++) { - if (!isdigit(dentry->d_name.name[i])) - return 0; - } - return 1; -} - -static char *dentry_name(struct dentry *dentry, int extra) -{ - struct dentry *parent; - char *root, *name; - const char *seg_name; - int len, seg_len, root_len; - - len = 0; - parent = dentry; - while (parent->d_parent != parent) { - if (is_pid(parent)) - len += strlen("pid") + 1; - else len += parent->d_name.len + 1; - parent = parent->d_parent; - } - - root = "proc"; - root_len = strlen(root); - len += root_len; - name = kmalloc(len + extra + 1, GFP_KERNEL); - if (name == NULL) - return NULL; - - name[len] = '\0'; - parent = dentry; - while (parent->d_parent != parent) { - if (is_pid(parent)) { - seg_name = "pid"; - seg_len = strlen(seg_name); - } - else { - seg_name = parent->d_name.name; - seg_len = parent->d_name.len; - } - - len -= seg_len + 1; - name[len] = '/'; - memcpy(&name[len + 1], seg_name, seg_len); - parent = parent->d_parent; - } - memcpy(name, root, root_len); - return name; -} - -static int file_removed(struct dentry *dentry, const char *file) -{ - char *host_file; - int extra, fd; - - extra = 0; - if (file != NULL) - extra += strlen(file) + 1; - - host_file = dentry_name(dentry, extra + strlen("/remove")); - if (host_file == NULL) { - printk(KERN_ERR "file_removed : allocation failed\n"); - return -ENOMEM; - } - - if (file != NULL) { - strcat(host_file, "/"); - strcat(host_file, file); - } - strcat(host_file, "/remove"); - - fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); - kfree(host_file); - if (fd > 0) { - os_close_file(fd); - return 1; - } - return 0; -} - -static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, - unsigned int flags) -{ - struct dentry *proc_dentry, *parent; - struct qstr *name = &dentry->d_name; - struct inode *inode; - int err, deleted; - - deleted = file_removed(dentry, NULL); - if (deleted < 0) - return ERR_PTR(deleted); - else if (deleted) - return ERR_PTR(-ENOENT); - - parent = HPPFS_I(ino)->proc_dentry; - mutex_lock(&d_inode(parent)->i_mutex); - proc_dentry = lookup_one_len(name->name, parent, name->len); - mutex_unlock(&d_inode(parent)->i_mutex); - - if (IS_ERR(proc_dentry)) - return proc_dentry; - - err = -ENOMEM; - inode = get_inode(ino->i_sb, proc_dentry); - if (!inode) - goto out; - - d_add(dentry, inode); - return NULL; - - out: - return ERR_PTR(err); -} - -static const struct inode_operations hppfs_file_iops = { -}; - -static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count, - loff_t *ppos, int is_user) -{ - ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); - ssize_t n; - - read = file_inode(file)->i_fop->read; - - if (!is_user) - set_fs(KERNEL_DS); - - n = (*read)(file, buf, count, &file->f_pos); - - if (!is_user) - set_fs(USER_DS); - - if (ppos) - *ppos = file->f_pos; - return n; -} - -static ssize_t hppfs_read_file(int fd, char __user *buf, ssize_t count) -{ - ssize_t n; - int cur, err; - char *new_buf; - - n = -ENOMEM; - new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (new_buf == NULL) { - printk(KERN_ERR "hppfs_read_file : kmalloc failed\n"); - goto out; - } - n = 0; - while (count > 0) { - cur = min_t(ssize_t, count, PAGE_SIZE); - err = os_read_file(fd, new_buf, cur); - if (err < 0) { - printk(KERN_ERR "hppfs_read : read failed, " - "errno = %d\n", err); - n = err; - goto out_free; - } else if (err == 0) - break; - - if (copy_to_user(buf, new_buf, err)) { - n = -EFAULT; - goto out_free; - } - n += err; - count -= err; - } - out_free: - kfree(new_buf); - out: - return n; -} - -static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) -{ - struct hppfs_private *hppfs = file->private_data; - struct hppfs_data *data; - loff_t off; - int err; - - if (hppfs->contents != NULL) { - int rem; - - if (*ppos >= hppfs->len) - return 0; - - data = hppfs->contents; - off = *ppos; - while (off >= sizeof(data->contents)) { - data = list_entry(data->list.next, struct hppfs_data, - list); - off -= sizeof(data->contents); - } - - if (off + count > hppfs->len) - count = hppfs->len - off; - rem = copy_to_user(buf, &data->contents[off], count); - *ppos += count - rem; - if (rem > 0) - return -EFAULT; - } else if (hppfs->host_fd != -1) { - err = os_seek_file(hppfs->host_fd, *ppos); - if (err) { - printk(KERN_ERR "hppfs_read : seek failed, " - "errno = %d\n", err); - return err; - } - err = hppfs_read_file(hppfs->host_fd, buf, count); - if (err < 0) { - printk(KERN_ERR "hppfs_read: read failed: %d\n", err); - return err; - } - count = err; - if (count > 0) - *ppos += count; - } - else count = read_proc(hppfs->proc_file, buf, count, ppos, 1); - - return count; -} - -static ssize_t hppfs_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - - write = file_inode(proc_file)->i_fop->write; - return (*write)(proc_file, buf, len, ppos); -} - -static int open_host_sock(char *host_file, int *filter_out) -{ - char *end; - int fd; - - end = &host_file[strlen(host_file)]; - strcpy(end, "/rw"); - *filter_out = 1; - fd = os_connect_socket(host_file); - if (fd > 0) - return fd; - - strcpy(end, "/r"); - *filter_out = 0; - fd = os_connect_socket(host_file); - return fd; -} - -static void free_contents(struct hppfs_data *head) -{ - struct hppfs_data *data; - struct list_head *ele, *next; - - if (head == NULL) - return; - - list_for_each_safe(ele, next, &head->list) { - data = list_entry(ele, struct hppfs_data, list); - kfree(data); - } - kfree(head); -} - -static struct hppfs_data *hppfs_get_data(int fd, int filter, - struct file *proc_file, - struct file *hppfs_file, - loff_t *size_out) -{ - struct hppfs_data *data, *new, *head; - int n, err; - - err = -ENOMEM; - data = kmalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) { - printk(KERN_ERR "hppfs_get_data : head allocation failed\n"); - goto failed; - } - - INIT_LIST_HEAD(&data->list); - - head = data; - *size_out = 0; - - if (filter) { - while ((n = read_proc(proc_file, data->contents, - sizeof(data->contents), NULL, 0)) > 0) - os_write_file(fd, data->contents, n); - err = os_shutdown_socket(fd, 0, 1); - if (err) { - printk(KERN_ERR "hppfs_get_data : failed to shut down " - "socket\n"); - goto failed_free; - } - } - while (1) { - n = os_read_file(fd, data->contents, sizeof(data->contents)); - if (n < 0) { - err = n; - printk(KERN_ERR "hppfs_get_data : read failed, " - "errno = %d\n", err); - goto failed_free; - } else if (n == 0) - break; - - *size_out += n; - - if (n < sizeof(data->contents)) - break; - - new = kmalloc(sizeof(*data), GFP_KERNEL); - if (new == 0) { - printk(KERN_ERR "hppfs_get_data : data allocation " - "failed\n"); - err = -ENOMEM; - goto failed_free; - } - - INIT_LIST_HEAD(&new->list); - list_add(&new->list, &data->list); - data = new; - } - return head; - - failed_free: - free_contents(head); - failed: - return ERR_PTR(err); -} - -static struct hppfs_private *hppfs_data(void) -{ - struct hppfs_private *data; - - data = kmalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) - return data; - - *data = ((struct hppfs_private ) { .host_fd = -1, - .len = -1, - .contents = NULL } ); - return data; -} - -static int file_mode(int fmode) -{ - if (fmode == (FMODE_READ | FMODE_WRITE)) - return O_RDWR; - if (fmode == FMODE_READ) - return O_RDONLY; - if (fmode == FMODE_WRITE) - return O_WRONLY; - return 0; -} - -static int hppfs_open(struct inode *inode, struct file *file) -{ - const struct cred *cred = file->f_cred; - struct hppfs_private *data; - struct path path; - char *host_file; - int err, fd, type, filter; - - err = -ENOMEM; - data = hppfs_data(); - if (data == NULL) - goto out; - - host_file = dentry_name(file->f_path.dentry, strlen("/rw")); - if (host_file == NULL) - goto out_free2; - - path.mnt = inode->i_sb->s_fs_info; - path.dentry = HPPFS_I(inode)->proc_dentry; - - data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); - err = PTR_ERR(data->proc_file); - if (IS_ERR(data->proc_file)) - goto out_free1; - - type = os_file_type(host_file); - if (type == OS_TYPE_FILE) { - fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); - if (fd >= 0) - data->host_fd = fd; - else - printk(KERN_ERR "hppfs_open : failed to open '%s', " - "errno = %d\n", host_file, -fd); - - data->contents = NULL; - } else if (type == OS_TYPE_DIR) { - fd = open_host_sock(host_file, &filter); - if (fd > 0) { - data->contents = hppfs_get_data(fd, filter, - data->proc_file, - file, &data->len); - if (!IS_ERR(data->contents)) - data->host_fd = fd; - } else - printk(KERN_ERR "hppfs_open : failed to open a socket " - "in '%s', errno = %d\n", host_file, -fd); - } - kfree(host_file); - - file->private_data = data; - return 0; - - out_free1: - kfree(host_file); - out_free2: - free_contents(data->contents); - kfree(data); - out: - return err; -} - -static int hppfs_dir_open(struct inode *inode, struct file *file) -{ - const struct cred *cred = file->f_cred; - struct hppfs_private *data; - struct path path; - int err; - - err = -ENOMEM; - data = hppfs_data(); - if (data == NULL) - goto out; - - path.mnt = inode->i_sb->s_fs_info; - path.dentry = HPPFS_I(inode)->proc_dentry; - data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); - err = PTR_ERR(data->proc_file); - if (IS_ERR(data->proc_file)) - goto out_free; - - file->private_data = data; - return 0; - - out_free: - kfree(data); - out: - return err; -} - -static loff_t hppfs_llseek(struct file *file, loff_t off, int where) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - loff_t (*llseek)(struct file *, loff_t, int); - loff_t ret; - - llseek = file_inode(proc_file)->i_fop->llseek; - if (llseek != NULL) { - ret = (*llseek)(proc_file, off, where); - if (ret < 0) - return ret; - } - - return default_llseek(file, off, where); -} - -static int hppfs_release(struct inode *inode, struct file *file) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - if (proc_file) - fput(proc_file); - kfree(data); - return 0; -} - -static const struct file_operations hppfs_file_fops = { - .owner = NULL, - .llseek = hppfs_llseek, - .read = hppfs_read, - .write = hppfs_write, - .open = hppfs_open, - .release = hppfs_release, -}; - -struct hppfs_dirent { - struct dir_context ctx; - struct dir_context *caller; - struct dentry *dentry; -}; - -static int hppfs_filldir(struct dir_context *ctx, const char *name, int size, - loff_t offset, u64 inode, unsigned int type) -{ - struct hppfs_dirent *dirent = - container_of(ctx, struct hppfs_dirent, ctx); - - if (file_removed(dirent->dentry, name)) - return 0; - - dirent->caller->pos = dirent->ctx.pos; - return !dir_emit(dirent->caller, name, size, inode, type); -} - -static int hppfs_readdir(struct file *file, struct dir_context *ctx) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - struct hppfs_dirent d = { - .ctx.actor = hppfs_filldir, - .caller = ctx, - .dentry = file->f_path.dentry - }; - int err; - proc_file->f_pos = ctx->pos; - err = iterate_dir(proc_file, &d.ctx); - ctx->pos = d.ctx.pos; - return err; -} - -static const struct file_operations hppfs_dir_fops = { - .owner = NULL, - .iterate = hppfs_readdir, - .open = hppfs_dir_open, - .llseek = default_llseek, - .release = hppfs_release, -}; - -static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) -{ - sf->f_blocks = 0; - sf->f_bfree = 0; - sf->f_bavail = 0; - sf->f_files = 0; - sf->f_ffree = 0; - sf->f_type = HPPFS_SUPER_MAGIC; - return 0; -} - -static struct inode *hppfs_alloc_inode(struct super_block *sb) -{ - struct hppfs_inode_info *hi; - - hi = kmalloc(sizeof(*hi), GFP_KERNEL); - if (!hi) - return NULL; - - hi->proc_dentry = NULL; - inode_init_once(&hi->vfs_inode); - return &hi->vfs_inode; -} - -void hppfs_evict_inode(struct inode *ino) -{ - clear_inode(ino); - dput(HPPFS_I(ino)->proc_dentry); - mntput(ino->i_sb->s_fs_info); -} - -static void hppfs_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - kfree(HPPFS_I(inode)); -} - -static void hppfs_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, hppfs_i_callback); -} - -static const struct super_operations hppfs_sbops = { - .alloc_inode = hppfs_alloc_inode, - .destroy_inode = hppfs_destroy_inode, - .evict_inode = hppfs_evict_inode, - .statfs = hppfs_statfs, -}; - -static int hppfs_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry; - return d_inode(proc_dentry)->i_op->readlink(proc_dentry, buffer, - buflen); -} - -static const char *hppfs_follow_link(struct dentry *dentry, void **cookie) -{ - struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry; - - return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, cookie); -} - -static void hppfs_put_link(struct inode *inode, void *cookie) -{ - struct inode *proc_inode = d_inode(HPPFS_I(inode)->proc_dentry); - - if (proc_inode->i_op->put_link) - proc_inode->i_op->put_link(proc_inode, cookie); -} - -static const struct inode_operations hppfs_dir_iops = { - .lookup = hppfs_lookup, -}; - -static const struct inode_operations hppfs_link_iops = { - .readlink = hppfs_readlink, - .follow_link = hppfs_follow_link, - .put_link = hppfs_put_link, -}; - -static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) -{ - struct inode *proc_ino = d_inode(dentry); - struct inode *inode = new_inode(sb); - - if (!inode) { - dput(dentry); - return NULL; - } - - if (d_is_dir(dentry)) { - inode->i_op = &hppfs_dir_iops; - inode->i_fop = &hppfs_dir_fops; - } else if (d_is_symlink(dentry)) { - inode->i_op = &hppfs_link_iops; - inode->i_fop = &hppfs_file_fops; - } else { - inode->i_op = &hppfs_file_iops; - inode->i_fop = &hppfs_file_fops; - } - - HPPFS_I(inode)->proc_dentry = dentry; - - inode->i_uid = proc_ino->i_uid; - inode->i_gid = proc_ino->i_gid; - inode->i_atime = proc_ino->i_atime; - inode->i_mtime = proc_ino->i_mtime; - inode->i_ctime = proc_ino->i_ctime; - inode->i_ino = proc_ino->i_ino; - inode->i_mode = proc_ino->i_mode; - set_nlink(inode, proc_ino->i_nlink); - inode->i_size = proc_ino->i_size; - inode->i_blocks = proc_ino->i_blocks; - - return inode; -} - -static int hppfs_fill_super(struct super_block *sb, void *d, int silent) -{ - struct inode *root_inode; - struct vfsmount *proc_mnt; - int err = -ENOENT; - - proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt); - if (IS_ERR(proc_mnt)) - goto out; - - sb->s_blocksize = 1024; - sb->s_blocksize_bits = 10; - sb->s_magic = HPPFS_SUPER_MAGIC; - sb->s_op = &hppfs_sbops; - sb->s_fs_info = proc_mnt; - - err = -ENOMEM; - root_inode = get_inode(sb, dget(proc_mnt->mnt_root)); - sb->s_root = d_make_root(root_inode); - if (!sb->s_root) - goto out_mntput; - - return 0; - - out_mntput: - mntput(proc_mnt); - out: - return(err); -} - -static struct dentry *hppfs_read_super(struct file_system_type *type, - int flags, const char *dev_name, - void *data) -{ - return mount_nodev(type, flags, data, hppfs_fill_super); -} - -static struct file_system_type hppfs_type = { - .owner = THIS_MODULE, - .name = "hppfs", - .mount = hppfs_read_super, - .kill_sb = kill_anon_super, - .fs_flags = 0, -}; -MODULE_ALIAS_FS("hppfs"); - -static int __init init_hppfs(void) -{ - return register_filesystem(&hppfs_type); -} - -static void __exit exit_hppfs(void) -{ - unregister_filesystem(&hppfs_type); -} - -module_init(init_hppfs) -module_exit(exit_hppfs) -MODULE_LICENSE("GPL"); diff --git a/fs/xattr.c b/fs/xattr.c index 4ef698549e31..072fee1258dd 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -298,18 +298,18 @@ vfs_removexattr(struct dentry *dentry, const char *name) mutex_lock(&inode->i_mutex); error = security_inode_removexattr(dentry, name); - if (error) { - mutex_unlock(&inode->i_mutex); - return error; - } + if (error) + goto out; error = inode->i_op->removexattr(dentry, name); - mutex_unlock(&inode->i_mutex); if (!error) { fsnotify_xattr(dentry); evm_inode_post_removexattr(dentry, name); } + +out: + mutex_unlock(&inode->i_mutex); return error; } EXPORT_SYMBOL_GPL(vfs_removexattr); diff --git a/include/dt-bindings/clock/jz4740-cgu.h b/include/dt-bindings/clock/jz4740-cgu.h new file mode 100644 index 000000000000..43153d3e9bd2 --- /dev/null +++ b/include/dt-bindings/clock/jz4740-cgu.h @@ -0,0 +1,37 @@ +/* + * This header provides clock numbers for the ingenic,jz4740-cgu DT binding. + * + * They are roughly ordered as: + * - external clocks + * - PLLs + * - muxes/dividers in the order they appear in the jz4740 programmers manual + * - gates in order of their bit in the CLKGR* registers + */ + +#ifndef __DT_BINDINGS_CLOCK_JZ4740_CGU_H__ +#define __DT_BINDINGS_CLOCK_JZ4740_CGU_H__ + +#define JZ4740_CLK_EXT 0 +#define JZ4740_CLK_RTC 1 +#define JZ4740_CLK_PLL 2 +#define JZ4740_CLK_PLL_HALF 3 +#define JZ4740_CLK_CCLK 4 +#define JZ4740_CLK_HCLK 5 +#define JZ4740_CLK_PCLK 6 +#define JZ4740_CLK_MCLK 7 +#define JZ4740_CLK_LCD 8 +#define JZ4740_CLK_LCD_PCLK 9 +#define JZ4740_CLK_I2S 10 +#define JZ4740_CLK_SPI 11 +#define JZ4740_CLK_MMC 12 +#define JZ4740_CLK_UHC 13 +#define JZ4740_CLK_UDC 14 +#define JZ4740_CLK_UART0 15 +#define JZ4740_CLK_UART1 16 +#define JZ4740_CLK_DMA 17 +#define JZ4740_CLK_IPU 18 +#define JZ4740_CLK_ADC 19 +#define JZ4740_CLK_I2C 20 +#define JZ4740_CLK_AIC 21 + +#endif /* __DT_BINDINGS_CLOCK_JZ4740_CGU_H__ */ diff --git a/include/dt-bindings/clock/jz4780-cgu.h b/include/dt-bindings/clock/jz4780-cgu.h new file mode 100644 index 000000000000..467165e3cfee --- /dev/null +++ b/include/dt-bindings/clock/jz4780-cgu.h @@ -0,0 +1,88 @@ +/* + * This header provides clock numbers for the ingenic,jz4780-cgu DT binding. + * + * They are roughly ordered as: + * - external clocks + * - PLLs + * - muxes/dividers in the order they appear in the jz4780 programmers manual + * - gates in order of their bit in the CLKGR* registers + */ + +#ifndef __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ +#define __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ + +#define JZ4780_CLK_EXCLK 0 +#define JZ4780_CLK_RTCLK 1 +#define JZ4780_CLK_APLL 2 +#define JZ4780_CLK_MPLL 3 +#define JZ4780_CLK_EPLL 4 +#define JZ4780_CLK_VPLL 5 +#define JZ4780_CLK_OTGPHY 6 +#define JZ4780_CLK_SCLKA 7 +#define JZ4780_CLK_CPUMUX 8 +#define JZ4780_CLK_CPU 9 +#define JZ4780_CLK_L2CACHE 10 +#define JZ4780_CLK_AHB0 11 +#define JZ4780_CLK_AHB2PMUX 12 +#define JZ4780_CLK_AHB2 13 +#define JZ4780_CLK_PCLK 14 +#define JZ4780_CLK_DDR 15 +#define JZ4780_CLK_VPU 16 +#define JZ4780_CLK_I2SPLL 17 +#define JZ4780_CLK_I2S 18 +#define JZ4780_CLK_LCD0PIXCLK 19 +#define JZ4780_CLK_LCD1PIXCLK 20 +#define JZ4780_CLK_MSCMUX 21 +#define JZ4780_CLK_MSC0 22 +#define JZ4780_CLK_MSC1 23 +#define JZ4780_CLK_MSC2 24 +#define JZ4780_CLK_UHC 25 +#define JZ4780_CLK_SSIPLL 26 +#define JZ4780_CLK_SSI 27 +#define JZ4780_CLK_CIMMCLK 28 +#define JZ4780_CLK_PCMPLL 29 +#define JZ4780_CLK_PCM 30 +#define JZ4780_CLK_GPU 31 +#define JZ4780_CLK_HDMI 32 +#define JZ4780_CLK_BCH 33 +#define JZ4780_CLK_NEMC 34 +#define JZ4780_CLK_OTG0 35 +#define JZ4780_CLK_SSI0 36 +#define JZ4780_CLK_SMB0 37 +#define JZ4780_CLK_SMB1 38 +#define JZ4780_CLK_SCC 39 +#define JZ4780_CLK_AIC 40 +#define JZ4780_CLK_TSSI0 41 +#define JZ4780_CLK_OWI 42 +#define JZ4780_CLK_KBC 43 +#define JZ4780_CLK_SADC 44 +#define JZ4780_CLK_UART0 45 +#define JZ4780_CLK_UART1 46 +#define JZ4780_CLK_UART2 47 +#define JZ4780_CLK_UART3 48 +#define JZ4780_CLK_SSI1 49 +#define JZ4780_CLK_SSI2 50 +#define JZ4780_CLK_PDMA 51 +#define JZ4780_CLK_GPS 52 +#define JZ4780_CLK_MAC 53 +#define JZ4780_CLK_SMB2 54 +#define JZ4780_CLK_CIM 55 +#define JZ4780_CLK_LCD 56 +#define JZ4780_CLK_TVE 57 +#define JZ4780_CLK_IPU 58 +#define JZ4780_CLK_DDR0 59 +#define JZ4780_CLK_DDR1 60 +#define JZ4780_CLK_SMB3 61 +#define JZ4780_CLK_TSSI1 62 +#define JZ4780_CLK_COMPRESS 63 +#define JZ4780_CLK_AIC1 64 +#define JZ4780_CLK_GPVLC 65 +#define JZ4780_CLK_OTG1 66 +#define JZ4780_CLK_UART4 67 +#define JZ4780_CLK_AHBMON 68 +#define JZ4780_CLK_SMB4 69 +#define JZ4780_CLK_DES 70 +#define JZ4780_CLK_X2D 71 +#define JZ4780_CLK_CORE1 72 + +#endif /* __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ */ diff --git a/include/dt-bindings/phy/phy-pistachio-usb.h b/include/dt-bindings/phy/phy-pistachio-usb.h new file mode 100644 index 000000000000..d1877aa0a3f5 --- /dev/null +++ b/include/dt-bindings/phy/phy-pistachio-usb.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2015 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#ifndef _DT_BINDINGS_PHY_PISTACHIO +#define _DT_BINDINGS_PHY_PISTACHIO + +#define REFCLK_XO_CRYSTAL 0x0 +#define REFCLK_X0_EXT_CLK 0x1 +#define REFCLK_CLK_CORE 0x2 + +#endif /* _DT_BINDINGS_PHY_PISTACHIO */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c187817471fb..1618cdfb38c7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -261,8 +261,13 @@ extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); #ifdef CONFIG_ACPI_NUMA +int acpi_map_pxm_to_online_node(int pxm); int acpi_get_node(acpi_handle handle); #else +static inline int acpi_map_pxm_to_online_node(int pxm) +{ + return 0; +} static inline int acpi_get_node(acpi_handle handle) { return 0; diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index b12b07e75929..2793652fbf66 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -10,11 +10,17 @@ #include <linux/types.h> #include <linux/kernel.h> +#include <linux/vmalloc.h> -#ifdef CONFIG_BCM47XX +#ifdef CONFIG_BCM47XX_NVRAM int bcm47xx_nvram_init_from_mem(u32 base, u32 lim); int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len); int bcm47xx_nvram_gpio_pin(const char *name); +char *bcm47xx_nvram_get_contents(size_t *val_len); +static inline void bcm47xx_nvram_release_contents(char *nvram) +{ + vfree(nvram); +}; #else static inline int bcm47xx_nvram_init_from_mem(u32 base, u32 lim) { @@ -29,6 +35,15 @@ static inline int bcm47xx_nvram_gpio_pin(const char *name) { return -ENOTSUPP; }; + +static inline char *bcm47xx_nvram_get_contents(size_t *val_len) +{ + return NULL; +}; + +static inline void bcm47xx_nvram_release_contents(char *nvram) +{ +}; #endif #endif /* __BCM47XX_NVRAM_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 05be2352fef8..26fc8bc77f85 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -21,6 +21,7 @@ # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu +# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); @@ -42,6 +43,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __cond_lock(x,c) (c) # define __percpu # define __rcu +# define __pmem #endif /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h new file mode 100644 index 000000000000..3edc99294bf6 --- /dev/null +++ b/include/linux/dma/pxa-dma.h @@ -0,0 +1,27 @@ +#ifndef _PXA_DMA_H_ +#define _PXA_DMA_H_ + +enum pxad_chan_prio { + PXAD_PRIO_HIGHEST = 0, + PXAD_PRIO_NORMAL, + PXAD_PRIO_LOW, + PXAD_PRIO_LOWEST, +}; + +struct pxad_param { + unsigned int drcmr; + enum pxad_chan_prio prio; +}; + +struct dma_chan; + +#ifdef CONFIG_PXA_DMA +bool pxad_filter_fn(struct dma_chan *chan, void *param); +#else +static inline bool pxad_filter_fn(struct dma_chan *chan, void *param) +{ + return false; +} +#endif + +#endif /* _PXA_DMA_H_ */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ad419757241f..e2f5eb419976 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -65,6 +65,7 @@ enum dma_transaction_type { DMA_PQ, DMA_XOR_VAL, DMA_PQ_VAL, + DMA_MEMSET, DMA_INTERRUPT, DMA_SG, DMA_PRIVATE, @@ -122,10 +123,18 @@ enum dma_transfer_direction { * chunk and before first src/dst address for next chunk. * Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false. * Ignored for src(assumed 0), if src_inc is true and src_sgl is false. + * @dst_icg: Number of bytes to jump after last dst address of this + * chunk and before the first dst address for next chunk. + * Ignored if dst_inc is true and dst_sgl is false. + * @src_icg: Number of bytes to jump after last src address of this + * chunk and before the first src address for next chunk. + * Ignored if src_inc is true and src_sgl is false. */ struct data_chunk { size_t size; size_t icg; + size_t dst_icg; + size_t src_icg; }; /** @@ -222,6 +231,16 @@ struct dma_chan_percpu { }; /** + * struct dma_router - DMA router structure + * @dev: pointer to the DMA router device + * @route_free: function to be called when the route can be disconnected + */ +struct dma_router { + struct device *dev; + void (*route_free)(struct device *dev, void *route_data); +}; + +/** * struct dma_chan - devices supply DMA channels, clients use them * @device: ptr to the dma device who supplies this channel, always !%NULL * @cookie: last cookie value returned to client @@ -232,6 +251,8 @@ struct dma_chan_percpu { * @local: per-cpu pointer to a struct dma_chan_percpu * @client_count: how many clients are using this channel * @table_count: number of appearances in the mem-to-mem allocation table + * @router: pointer to the DMA router structure + * @route_data: channel specific data for the router * @private: private data for certain client-channel associations */ struct dma_chan { @@ -247,6 +268,11 @@ struct dma_chan { struct dma_chan_percpu __percpu *local; int client_count; int table_count; + + /* DMA router */ + struct dma_router *router; + void *route_data; + void *private; }; @@ -570,6 +596,7 @@ struct dma_tx_state { * @copy_align: alignment shift for memcpy operations * @xor_align: alignment shift for xor operations * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @src_addr_widths: bit mask of src addr widths the device supports @@ -588,6 +615,7 @@ struct dma_tx_state { * @device_prep_dma_xor_val: prepares a xor validation operation * @device_prep_dma_pq: prepares a pq operation * @device_prep_dma_pq_val: prepares a pqzero_sum operation + * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio. @@ -620,6 +648,7 @@ struct dma_device { u8 copy_align; u8 xor_align; u8 pq_align; + u8 fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -650,6 +679,9 @@ struct dma_device { struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, enum sum_check_flags *pqres, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_memset)( + struct dma_chan *chan, dma_addr_t dest, int value, size_t len, + unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_sg)( @@ -745,6 +777,17 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma( return chan->device->device_prep_interleaved_dma(chan, xt, flags); } +static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset( + struct dma_chan *chan, dma_addr_t dest, int value, size_t len, + unsigned long flags) +{ + if (!chan || !chan->device) + return NULL; + + return chan->device->device_prep_dma_memset(chan, dest, value, + len, flags); +} + static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( struct dma_chan *chan, struct scatterlist *dst_sg, unsigned int dst_nents, @@ -820,6 +863,12 @@ static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, return dmaengine_check_align(dev->pq_align, off1, off2, len); } +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + static inline void dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) { @@ -874,6 +923,33 @@ static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) BUG(); } +static inline size_t dmaengine_get_icg(bool inc, bool sgl, size_t icg, + size_t dir_icg) +{ + if (inc) { + if (dir_icg) + return dir_icg; + else if (sgl) + return icg; + } + + return 0; +} + +static inline size_t dmaengine_get_dst_icg(struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + return dmaengine_get_icg(xt->dst_inc, xt->dst_sgl, + chunk->icg, chunk->dst_icg); +} + +static inline size_t dmaengine_get_src_icg(struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + return dmaengine_get_icg(xt->src_inc, xt->src_sgl, + chunk->icg, chunk->src_icg); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE diff --git a/include/linux/efi.h b/include/linux/efi.h index 5f19efe4eb3f..85ef051ac6fb 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -85,7 +85,8 @@ typedef struct { #define EFI_MEMORY_MAPPED_IO 11 #define EFI_MEMORY_MAPPED_IO_PORT_SPACE 12 #define EFI_PAL_CODE 13 -#define EFI_MAX_MEMORY_TYPE 14 +#define EFI_PERSISTENT_MEMORY 14 +#define EFI_MAX_MEMORY_TYPE 15 /* Attribute values: */ #define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */ diff --git a/arch/mips/jz4740/irq.h b/include/linux/irqchip/ingenic.h index 0f48720b5b63..0ee319a4029d 100644 --- a/arch/mips/jz4740/irq.h +++ b/include/linux/irqchip/ingenic.h @@ -12,12 +12,12 @@ * */ -#ifndef __MIPS_JZ4740_IRQ_H__ -#define __MIPS_JZ4740_IRQ_H__ +#ifndef __LINUX_IRQCHIP_INGENIC_H__ +#define __LINUX_IRQCHIP_INGENIC_H__ #include <linux/irq.h> -extern void jz4740_irq_suspend(struct irq_data *data); -extern void jz4740_irq_resume(struct irq_data *data); +extern void ingenic_intc_irq_suspend(struct irq_data *data); +extern void ingenic_intc_irq_resume(struct irq_data *data); #endif diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h new file mode 100644 index 000000000000..75e3af01ee32 --- /dev/null +++ b/include/linux/libnvdimm.h @@ -0,0 +1,151 @@ +/* + * libnvdimm - Non-volatile-memory Devices Subsystem + * + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LIBNVDIMM_H__ +#define __LIBNVDIMM_H__ +#include <linux/kernel.h> +#include <linux/sizes.h> +#include <linux/types.h> + +enum { + /* when a dimm supports both PMEM and BLK access a label is required */ + NDD_ALIASING = 1 << 0, + /* unarmed memory devices may not persist writes */ + NDD_UNARMED = 1 << 1, + + /* need to set a limit somewhere, but yes, this is likely overkill */ + ND_IOCTL_MAX_BUFLEN = SZ_4M, + ND_CMD_MAX_ELEM = 4, + ND_CMD_MAX_ENVELOPE = 16, + ND_CMD_ARS_STATUS_MAX = SZ_4K, + ND_MAX_MAPPINGS = 32, + + /* mark newly adjusted resources as requiring a label update */ + DPA_RESOURCE_ADJUSTED = 1 << 0, +}; + +extern struct attribute_group nvdimm_bus_attribute_group; +extern struct attribute_group nvdimm_attribute_group; +extern struct attribute_group nd_device_attribute_group; +extern struct attribute_group nd_numa_attribute_group; +extern struct attribute_group nd_region_attribute_group; +extern struct attribute_group nd_mapping_attribute_group; + +struct nvdimm; +struct nvdimm_bus_descriptor; +typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len); + +struct nd_namespace_label; +struct nvdimm_drvdata; +struct nd_mapping { + struct nvdimm *nvdimm; + struct nd_namespace_label **labels; + u64 start; + u64 size; + /* + * @ndd is for private use at region enable / disable time for + * get_ndd() + put_ndd(), all other nd_mapping to ndd + * conversions use to_ndd() which respects enabled state of the + * nvdimm. + */ + struct nvdimm_drvdata *ndd; +}; + +struct nvdimm_bus_descriptor { + const struct attribute_group **attr_groups; + unsigned long dsm_mask; + char *provider_name; + ndctl_fn ndctl; +}; + +struct nd_cmd_desc { + int in_num; + int out_num; + u32 in_sizes[ND_CMD_MAX_ELEM]; + int out_sizes[ND_CMD_MAX_ELEM]; +}; + +struct nd_interleave_set { + u64 cookie; +}; + +struct nd_region_desc { + struct resource *res; + struct nd_mapping *nd_mapping; + u16 num_mappings; + const struct attribute_group **attr_groups; + struct nd_interleave_set *nd_set; + void *provider_data; + int num_lanes; + int numa_node; +}; + +struct nvdimm_bus; +struct module; +struct device; +struct nd_blk_region; +struct nd_blk_region_desc { + int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw); + struct nd_region_desc ndr_desc; +}; + +static inline struct nd_blk_region_desc *to_blk_region_desc( + struct nd_region_desc *ndr_desc) +{ + return container_of(ndr_desc, struct nd_blk_region_desc, ndr_desc); + +} + +struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nfit_desc, struct module *module); +#define nvdimm_bus_register(parent, desc) \ + __nvdimm_bus_register(parent, desc, THIS_MODULE) +void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); +struct nvdimm_bus *to_nvdimm_bus(struct device *dev); +struct nvdimm *to_nvdimm(struct device *dev); +struct nd_region *to_nd_region(struct device *dev); +struct nd_blk_region *to_nd_blk_region(struct device *dev); +struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); +const char *nvdimm_name(struct nvdimm *nvdimm); +void *nvdimm_provider_data(struct nvdimm *nvdimm); +struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, + const struct attribute_group **groups, unsigned long flags, + unsigned long *dsm_mask); +const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); +const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); +u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, void *buf); +u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, const u32 *in_field, + const u32 *out_field); +int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count); +struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); +struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); +struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc); +void *nd_region_provider_data(struct nd_region *nd_region); +void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); +void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); +struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); +unsigned int nd_region_acquire_lane(struct nd_region *nd_region); +void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); +u64 nd_fletcher64(void *addr, size_t len, bool le); +#endif /* __LIBNVDIMM_H__ */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h new file mode 100644 index 000000000000..9429f054c323 --- /dev/null +++ b/include/linux/lsm_hooks.h @@ -0,0 +1,1888 @@ +/* + * Linux Security Module interfaces + * + * Copyright (C) 2001 WireX Communications, Inc <[email protected]> + * Copyright (C) 2001 Greg Kroah-Hartman <[email protected]> + * Copyright (C) 2001 Networks Associates Technology, Inc <[email protected]> + * Copyright (C) 2001 James Morris <[email protected]> + * Copyright (C) 2001 Silicon Graphics, Inc. (Trust Technology Group) + * Copyright (C) 2015 Intel Corporation. + * Copyright (C) 2015 Casey Schaufler <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Due to this file being licensed under the GPL there is controversy over + * whether this permits you to write a module that #includes this file + * without placing your module under the GPL. Please consult a lawyer for + * advice before doing this. + * + */ + +#ifndef __LINUX_LSM_HOOKS_H +#define __LINUX_LSM_HOOKS_H + +#include <linux/security.h> +#include <linux/init.h> +#include <linux/rculist.h> + +/** + * Security hooks for program execution operations. + * + * @bprm_set_creds: + * Save security information in the bprm->security field, typically based + * on information about the bprm->file, for later use by the apply_creds + * hook. This hook may also optionally check permissions (e.g. for + * transitions between security domains). + * This hook may be called multiple times during a single execve, e.g. for + * interpreters. The hook can tell whether it has already been called by + * checking to see if @bprm->security is non-NULL. If so, then the hook + * may decide either to retain the security information saved earlier or + * to replace it. + * @bprm contains the linux_binprm structure. + * Return 0 if the hook is successful and permission is granted. + * @bprm_check_security: + * This hook mediates the point when a search for a binary handler will + * begin. It allows a check the @bprm->security value which is set in the + * preceding set_creds call. The primary difference from set_creds is + * that the argv list and envp list are reliably available in @bprm. This + * hook may be called multiple times during a single execve; and in each + * pass set_creds is called first. + * @bprm contains the linux_binprm structure. + * Return 0 if the hook is successful and permission is granted. + * @bprm_committing_creds: + * Prepare to install the new security attributes of a process being + * transformed by an execve operation, based on the old credentials + * pointed to by @current->cred and the information set in @bprm->cred by + * the bprm_set_creds hook. @bprm points to the linux_binprm structure. + * This hook is a good place to perform state changes on the process such + * as closing open file descriptors to which access will no longer be + * granted when the attributes are changed. This is called immediately + * before commit_creds(). + * @bprm_committed_creds: + * Tidy up after the installation of the new security attributes of a + * process being transformed by an execve operation. The new credentials + * have, by this point, been set to @current->cred. @bprm points to the + * linux_binprm structure. This hook is a good place to perform state + * changes on the process such as clearing out non-inheritable signal + * state. This is called immediately after commit_creds(). + * @bprm_secureexec: + * Return a boolean value (0 or 1) indicating whether a "secure exec" + * is required. The flag is passed in the auxiliary table + * on the initial stack to the ELF interpreter to indicate whether libc + * should enable secure mode. + * @bprm contains the linux_binprm structure. + * + * Security hooks for filesystem operations. + * + * @sb_alloc_security: + * Allocate and attach a security structure to the sb->s_security field. + * The s_security field is initialized to NULL when the structure is + * allocated. + * @sb contains the super_block structure to be modified. + * Return 0 if operation was successful. + * @sb_free_security: + * Deallocate and clear the sb->s_security field. + * @sb contains the super_block structure to be modified. + * @sb_statfs: + * Check permission before obtaining filesystem statistics for the @mnt + * mountpoint. + * @dentry is a handle on the superblock for the filesystem. + * Return 0 if permission is granted. + * @sb_mount: + * Check permission before an object specified by @dev_name is mounted on + * the mount point named by @nd. For an ordinary mount, @dev_name + * identifies a device if the file system type requires a device. For a + * remount (@flags & MS_REMOUNT), @dev_name is irrelevant. For a + * loopback/bind mount (@flags & MS_BIND), @dev_name identifies the + * pathname of the object being mounted. + * @dev_name contains the name for object being mounted. + * @path contains the path for mount point object. + * @type contains the filesystem type. + * @flags contains the mount flags. + * @data contains the filesystem-specific data. + * Return 0 if permission is granted. + * @sb_copy_data: + * Allow mount option data to be copied prior to parsing by the filesystem, + * so that the security module can extract security-specific mount + * options cleanly (a filesystem may modify the data e.g. with strsep()). + * This also allows the original mount data to be stripped of security- + * specific options to avoid having to make filesystems aware of them. + * @type the type of filesystem being mounted. + * @orig the original mount data copied from userspace. + * @copy copied data which will be passed to the security module. + * Returns 0 if the copy was successful. + * @sb_remount: + * Extracts security system specific mount options and verifies no changes + * are being made to those options. + * @sb superblock being remounted + * @data contains the filesystem-specific data. + * Return 0 if permission is granted. + * @sb_umount: + * Check permission before the @mnt file system is unmounted. + * @mnt contains the mounted file system. + * @flags contains the unmount flags, e.g. MNT_FORCE. + * Return 0 if permission is granted. + * @sb_pivotroot: + * Check permission before pivoting the root filesystem. + * @old_path contains the path for the new location of the + * current root (put_old). + * @new_path contains the path for the new root (new_root). + * Return 0 if permission is granted. + * @sb_set_mnt_opts: + * Set the security relevant mount options used for a superblock + * @sb the superblock to set security mount options for + * @opts binary data structure containing all lsm mount data + * @sb_clone_mnt_opts: + * Copy all security options from a given superblock to another + * @oldsb old superblock which contain information to clone + * @newsb new superblock which needs filled in + * @sb_parse_opts_str: + * Parse a string of security data filling in the opts structure + * @options string containing all mount options known by the LSM + * @opts binary data structure usable by the LSM + * @dentry_init_security: + * Compute a context for a dentry as the inode is not yet available + * since NFSv4 has no label backed by an EA anyway. + * @dentry dentry to use in calculating the context. + * @mode mode used to determine resource type. + * @name name of the last path component used to create file + * @ctx pointer to place the pointer to the resulting context in. + * @ctxlen point to place the length of the resulting context. + * + * + * Security hooks for inode operations. + * + * @inode_alloc_security: + * Allocate and attach a security structure to @inode->i_security. The + * i_security field is initialized to NULL when the inode structure is + * allocated. + * @inode contains the inode structure. + * Return 0 if operation was successful. + * @inode_free_security: + * @inode contains the inode structure. + * Deallocate the inode security structure and set @inode->i_security to + * NULL. + * @inode_init_security: + * Obtain the security attribute name suffix and value to set on a newly + * created inode and set up the incore security field for the new inode. + * This hook is called by the fs code as part of the inode creation + * transaction and provides for atomic labeling of the inode, unlike + * the post_create/mkdir/... hooks called by the VFS. The hook function + * is expected to allocate the name and value via kmalloc, with the caller + * being responsible for calling kfree after using them. + * If the security module does not use security attributes or does + * not wish to put a security attribute on this particular inode, + * then it should return -EOPNOTSUPP to skip this processing. + * @inode contains the inode structure of the newly created inode. + * @dir contains the inode structure of the parent directory. + * @qstr contains the last path component of the new object + * @name will be set to the allocated name suffix (e.g. selinux). + * @value will be set to the allocated attribute value. + * @len will be set to the length of the value. + * Returns 0 if @name and @value have been successfully set, + * -EOPNOTSUPP if no security attribute is needed, or + * -ENOMEM on memory allocation failure. + * @inode_create: + * Check permission to create a regular file. + * @dir contains inode structure of the parent of the new file. + * @dentry contains the dentry structure for the file to be created. + * @mode contains the file mode of the file to be created. + * Return 0 if permission is granted. + * @inode_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing + * link to the file. + * @dir contains the inode structure of the parent directory + * of the new link. + * @new_dentry contains the dentry structure for the new link. + * Return 0 if permission is granted. + * @path_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing link + * to the file. + * @new_dir contains the path structure of the parent directory of + * the new link. + * @new_dentry contains the dentry structure for the new link. + * Return 0 if permission is granted. + * @inode_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the inode structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. + * Return 0 if permission is granted. + * @path_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the path structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. + * Return 0 if permission is granted. + * @inode_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the inode structure of parent directory of + * the symbolic link. + * @dentry contains the dentry structure of the symbolic link. + * @old_name contains the pathname of file. + * Return 0 if permission is granted. + * @path_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the path structure of parent directory of + * the symbolic link. + * @dentry contains the dentry structure of the symbolic link. + * @old_name contains the pathname of file. + * Return 0 if permission is granted. + * @inode_mkdir: + * Check permissions to create a new directory in the existing directory + * associated with inode structure @dir. + * @dir contains the inode structure of parent of the directory + * to be created. + * @dentry contains the dentry structure of new directory. + * @mode contains the mode of new directory. + * Return 0 if permission is granted. + * @path_mkdir: + * Check permissions to create a new directory in the existing directory + * associated with path structure @path. + * @dir contains the path structure of parent of the directory + * to be created. + * @dentry contains the dentry structure of new directory. + * @mode contains the mode of new directory. + * Return 0 if permission is granted. + * @inode_rmdir: + * Check the permission to remove a directory. + * @dir contains the inode structure of parent of the directory + * to be removed. + * @dentry contains the dentry structure of directory to be removed. + * Return 0 if permission is granted. + * @path_rmdir: + * Check the permission to remove a directory. + * @dir contains the path structure of parent of the directory to be + * removed. + * @dentry contains the dentry structure of directory to be removed. + * Return 0 if permission is granted. + * @inode_mknod: + * Check permissions when creating a special file (or a socket or a fifo + * file created via the mknod system call). Note that if mknod operation + * is being done for a regular file, then the create hook will be called + * and not this hook. + * @dir contains the inode structure of parent of the new file. + * @dentry contains the dentry structure of the new file. + * @mode contains the mode of the new file. + * @dev contains the device number. + * Return 0 if permission is granted. + * @path_mknod: + * Check permissions when creating a file. Note that this hook is called + * even if mknod operation is being done for a regular file. + * @dir contains the path structure of parent of the new file. + * @dentry contains the dentry structure of the new file. + * @mode contains the mode of the new file. + * @dev contains the undecoded device number. Use new_decode_dev() to get + * the decoded device number. + * Return 0 if permission is granted. + * @inode_rename: + * Check for permission to rename a file or directory. + * @old_dir contains the inode structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. + * @new_dir contains the inode structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. + * Return 0 if permission is granted. + * @path_rename: + * Check for permission to rename a file or directory. + * @old_dir contains the path structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. + * @new_dir contains the path structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. + * Return 0 if permission is granted. + * @path_chmod: + * Check for permission to change DAC's permission of a file or directory. + * @dentry contains the dentry structure. + * @mnt contains the vfsmnt structure. + * @mode contains DAC's mode. + * Return 0 if permission is granted. + * @path_chown: + * Check for permission to change owner/group of a file or directory. + * @path contains the path structure. + * @uid contains new owner's ID. + * @gid contains new group's ID. + * Return 0 if permission is granted. + * @path_chroot: + * Check for permission to change root directory. + * @path contains the path structure. + * Return 0 if permission is granted. + * @inode_readlink: + * Check the permission to read the symbolic link. + * @dentry contains the dentry structure for the file link. + * Return 0 if permission is granted. + * @inode_follow_link: + * Check permission to follow a symbolic link when looking up a pathname. + * @dentry contains the dentry structure for the link. + * @inode contains the inode, which itself is not stable in RCU-walk + * @rcu indicates whether we are in RCU-walk mode. + * Return 0 if permission is granted. + * @inode_permission: + * Check permission before accessing an inode. This hook is called by the + * existing Linux permission function, so a security module can use it to + * provide additional checking for existing Linux permission checks. + * Notice that this hook is called when a file is opened (as well as many + * other operations), whereas the file_security_ops permission hook is + * called when the actual read/write operations are performed. + * @inode contains the inode structure to check. + * @mask contains the permission mask. + * Return 0 if permission is granted. + * @inode_setattr: + * Check permission before setting file attributes. Note that the kernel + * call to notify_change is performed from several locations, whenever + * file attributes change (such as when a file is truncated, chown/chmod + * operations, transferring disk quotas, etc). + * @dentry contains the dentry structure for the file. + * @attr is the iattr structure containing the new file attributes. + * Return 0 if permission is granted. + * @path_truncate: + * Check permission before truncating a file. + * @path contains the path structure for the file. + * Return 0 if permission is granted. + * @inode_getattr: + * Check permission before obtaining file attributes. + * @mnt is the vfsmount where the dentry was looked up + * @dentry contains the dentry structure for the file. + * Return 0 if permission is granted. + * @inode_setxattr: + * Check permission before setting the extended attributes + * @value identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_post_setxattr: + * Update inode security field after successful setxattr operation. + * @value identified by @name for @dentry. + * @inode_getxattr: + * Check permission before obtaining the extended attributes + * identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_listxattr: + * Check permission before obtaining the list of extended attribute + * names for @dentry. + * Return 0 if permission is granted. + * @inode_removexattr: + * Check permission before removing the extended attribute + * identified by @name for @dentry. + * Return 0 if permission is granted. + * @inode_getsecurity: + * Retrieve a copy of the extended attribute representation of the + * security label associated with @name for @inode via @buffer. Note that + * @name is the remainder of the attribute name after the security prefix + * has been removed. @alloc is used to specify of the call should return a + * value via the buffer or just the value length Return size of buffer on + * success. + * @inode_setsecurity: + * Set the security label associated with @name for @inode from the + * extended attribute value @value. @size indicates the size of the + * @value in bytes. @flags may be XATTR_CREATE, XATTR_REPLACE, or 0. + * Note that @name is the remainder of the attribute name after the + * security. prefix has been removed. + * Return 0 on success. + * @inode_listsecurity: + * Copy the extended attribute names for the security labels + * associated with @inode into @buffer. The maximum size of @buffer + * is specified by @buffer_size. @buffer may be NULL to request + * the size of the buffer required. + * Returns number of bytes used/required on success. + * @inode_need_killpriv: + * Called when an inode has been changed. + * @dentry is the dentry being changed. + * Return <0 on error to abort the inode change operation. + * Return 0 if inode_killpriv does not need to be called. + * Return >0 if inode_killpriv does need to be called. + * @inode_killpriv: + * The setuid bit is being removed. Remove similar security labels. + * Called with the dentry->d_inode->i_mutex held. + * @dentry is the dentry being changed. + * Return 0 on success. If error is returned, then the operation + * causing setuid bit removal is failed. + * @inode_getsecid: + * Get the secid associated with the node. + * @inode contains a pointer to the inode. + * @secid contains a pointer to the location where result will be saved. + * In case of failure, @secid will be set to zero. + * + * Security hooks for file operations + * + * @file_permission: + * Check file permissions before accessing an open file. This hook is + * called by various operations that read or write files. A security + * module can use this hook to perform additional checking on these + * operations, e.g. to revalidate permissions on use to support privilege + * bracketing or policy changes. Notice that this hook is used when the + * actual read/write operations are performed, whereas the + * inode_security_ops hook is called when a file is opened (as well as + * many other operations). + * Caveat: Although this hook can be used to revalidate permissions for + * various system call operations that read or write files, it does not + * address the revalidation of permissions for memory-mapped files. + * Security modules must handle this separately if they need such + * revalidation. + * @file contains the file structure being accessed. + * @mask contains the requested permissions. + * Return 0 if permission is granted. + * @file_alloc_security: + * Allocate and attach a security structure to the file->f_security field. + * The security field is initialized to NULL when the structure is first + * created. + * @file contains the file structure to secure. + * Return 0 if the hook is successful and permission is granted. + * @file_free_security: + * Deallocate and free any security structures stored in file->f_security. + * @file contains the file structure being modified. + * @file_ioctl: + * @file contains the file structure. + * @cmd contains the operation to perform. + * @arg contains the operational arguments. + * Check permission for an ioctl operation on @file. Note that @arg + * sometimes represents a user space pointer; in other cases, it may be a + * simple integer value. When @arg represents a user space pointer, it + * should never be used by the security module. + * Return 0 if permission is granted. + * @mmap_addr : + * Check permissions for a mmap operation at @addr. + * @addr contains virtual address that will be used for the operation. + * Return 0 if permission is granted. + * @mmap_file : + * Check permissions for a mmap operation. The @file may be NULL, e.g. + * if mapping anonymous memory. + * @file contains the file structure for file to map (may be NULL). + * @reqprot contains the protection requested by the application. + * @prot contains the protection that will be applied by the kernel. + * @flags contains the operational flags. + * Return 0 if permission is granted. + * @file_mprotect: + * Check permissions before changing memory access permissions. + * @vma contains the memory region to modify. + * @reqprot contains the protection requested by the application. + * @prot contains the protection that will be applied by the kernel. + * Return 0 if permission is granted. + * @file_lock: + * Check permission before performing file locking operations. + * Note: this hook mediates both flock and fcntl style locks. + * @file contains the file structure. + * @cmd contains the posix-translated lock operation to perform + * (e.g. F_RDLCK, F_WRLCK). + * Return 0 if permission is granted. + * @file_fcntl: + * Check permission before allowing the file operation specified by @cmd + * from being performed on the file @file. Note that @arg sometimes + * represents a user space pointer; in other cases, it may be a simple + * integer value. When @arg represents a user space pointer, it should + * never be used by the security module. + * @file contains the file structure. + * @cmd contains the operation to be performed. + * @arg contains the operational arguments. + * Return 0 if permission is granted. + * @file_set_fowner: + * Save owner security information (typically from current->security) in + * file->f_security for later use by the send_sigiotask hook. + * @file contains the file structure to update. + * Return 0 on success. + * @file_send_sigiotask: + * Check permission for the file owner @fown to send SIGIO or SIGURG to the + * process @tsk. Note that this hook is sometimes called from interrupt. + * Note that the fown_struct, @fown, is never outside the context of a + * struct file, so the file structure (and associated security information) + * can always be obtained: + * container_of(fown, struct file, f_owner) + * @tsk contains the structure of task receiving signal. + * @fown contains the file owner information. + * @sig is the signal that will be sent. When 0, kernel sends SIGIO. + * Return 0 if permission is granted. + * @file_receive: + * This hook allows security modules to control the ability of a process + * to receive an open file descriptor via socket IPC. + * @file contains the file structure being received. + * Return 0 if permission is granted. + * @file_open + * Save open-time permission checking state for later use upon + * file_permission, and recheck access if anything has changed + * since inode_permission. + * + * Security hooks for task operations. + * + * @task_create: + * Check permission before creating a child process. See the clone(2) + * manual page for definitions of the @clone_flags. + * @clone_flags contains the flags indicating what should be shared. + * Return 0 if permission is granted. + * @task_free: + * @task task being freed + * Handle release of task-related resources. (Note that this can be called + * from interrupt context.) + * @cred_alloc_blank: + * @cred points to the credentials. + * @gfp indicates the atomicity of any memory allocations. + * Only allocate sufficient memory and attach to @cred such that + * cred_transfer() will not get ENOMEM. + * @cred_free: + * @cred points to the credentials. + * Deallocate and clear the cred->security field in a set of credentials. + * @cred_prepare: + * @new points to the new credentials. + * @old points to the original credentials. + * @gfp indicates the atomicity of any memory allocations. + * Prepare a new set of credentials by copying the data from the old set. + * @cred_transfer: + * @new points to the new credentials. + * @old points to the original credentials. + * Transfer data from original creds to new creds + * @kernel_act_as: + * Set the credentials for a kernel service to act as (subjective context). + * @new points to the credentials to be modified. + * @secid specifies the security ID to be set + * The current task must be the one that nominated @secid. + * Return 0 if successful. + * @kernel_create_files_as: + * Set the file creation context in a set of credentials to be the same as + * the objective context of the specified inode. + * @new points to the credentials to be modified. + * @inode points to the inode to use as a reference. + * The current task must be the one that nominated @inode. + * Return 0 if successful. + * @kernel_fw_from_file: + * Load firmware from userspace (not called for built-in firmware). + * @file contains the file structure pointing to the file containing + * the firmware to load. This argument will be NULL if the firmware + * was loaded via the uevent-triggered blob-based interface exposed + * by CONFIG_FW_LOADER_USER_HELPER. + * @buf pointer to buffer containing firmware contents. + * @size length of the firmware contents. + * Return 0 if permission is granted. + * @kernel_module_request: + * Ability to trigger the kernel to automatically upcall to userspace for + * userspace to load a kernel module with the given name. + * @kmod_name name of the module requested by the kernel + * Return 0 if successful. + * @kernel_module_from_file: + * Load a kernel module from userspace. + * @file contains the file structure pointing to the file containing + * the kernel module to load. If the module is being loaded from a blob, + * this argument will be NULL. + * Return 0 if permission is granted. + * @task_fix_setuid: + * Update the module's state after setting one or more of the user + * identity attributes of the current process. The @flags parameter + * indicates which of the set*uid system calls invoked this hook. If + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaces + * @flags contains one of the LSM_SETID_* values. + * Return 0 on success. + * @task_setpgid: + * Check permission before setting the process group identifier of the + * process @p to @pgid. + * @p contains the task_struct for process being modified. + * @pgid contains the new pgid. + * Return 0 if permission is granted. + * @task_getpgid: + * Check permission before getting the process group identifier of the + * process @p. + * @p contains the task_struct for the process. + * Return 0 if permission is granted. + * @task_getsid: + * Check permission before getting the session identifier of the process + * @p. + * @p contains the task_struct for the process. + * Return 0 if permission is granted. + * @task_getsecid: + * Retrieve the security identifier of the process @p. + * @p contains the task_struct for the process and place is into @secid. + * In case of failure, @secid will be set to zero. + * + * @task_setnice: + * Check permission before setting the nice value of @p to @nice. + * @p contains the task_struct of process. + * @nice contains the new nice value. + * Return 0 if permission is granted. + * @task_setioprio + * Check permission before setting the ioprio value of @p to @ioprio. + * @p contains the task_struct of process. + * @ioprio contains the new ioprio value + * Return 0 if permission is granted. + * @task_getioprio + * Check permission before getting the ioprio value of @p. + * @p contains the task_struct of process. + * Return 0 if permission is granted. + * @task_setrlimit: + * Check permission before setting the resource limits of the current + * process for @resource to @new_rlim. The old resource limit values can + * be examined by dereferencing (current->signal->rlim + resource). + * @resource contains the resource whose limit is being set. + * @new_rlim contains the new limits for @resource. + * Return 0 if permission is granted. + * @task_setscheduler: + * Check permission before setting scheduling policy and/or parameters of + * process @p based on @policy and @lp. + * @p contains the task_struct for process. + * @policy contains the scheduling policy. + * @lp contains the scheduling parameters. + * Return 0 if permission is granted. + * @task_getscheduler: + * Check permission before obtaining scheduling information for process + * @p. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_movememory + * Check permission before moving memory owned by process @p. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_kill: + * Check permission before sending signal @sig to @p. @info can be NULL, + * the constant 1, or a pointer to a siginfo structure. If @info is 1 or + * SI_FROMKERNEL(info) is true, then the signal should be viewed as coming + * from the kernel and should typically be permitted. + * SIGIO signals are handled separately by the send_sigiotask hook in + * file_security_ops. + * @p contains the task_struct for process. + * @info contains the signal information. + * @sig contains the signal value. + * @secid contains the sid of the process where the signal originated + * Return 0 if permission is granted. + * @task_wait: + * Check permission before allowing a process to reap a child process @p + * and collect its status information. + * @p contains the task_struct for process. + * Return 0 if permission is granted. + * @task_prctl: + * Check permission before performing a process control operation on the + * current process. + * @option contains the operation. + * @arg2 contains a argument. + * @arg3 contains a argument. + * @arg4 contains a argument. + * @arg5 contains a argument. + * Return -ENOSYS if no-one wanted to handle this op, any other value to + * cause prctl() to return immediately with that value. + * @task_to_inode: + * Set the security attributes for an inode based on an associated task's + * security attributes, e.g. for /proc/pid inodes. + * @p contains the task_struct for the task. + * @inode contains the inode structure for the inode. + * + * Security hooks for Netlink messaging. + * + * @netlink_send: + * Save security information for a netlink message so that permission + * checking can be performed when the message is processed. The security + * information can be saved using the eff_cap field of the + * netlink_skb_parms structure. Also may be used to provide fine + * grained control over message transmission. + * @sk associated sock of task sending the message. + * @skb contains the sk_buff structure for the netlink message. + * Return 0 if the information was successfully saved and message + * is allowed to be transmitted. + * + * Security hooks for Unix domain networking. + * + * @unix_stream_connect: + * Check permissions before establishing a Unix domain stream connection + * between @sock and @other. + * @sock contains the sock structure. + * @other contains the peer sock structure. + * @newsk contains the new sock structure. + * Return 0 if permission is granted. + * @unix_may_send: + * Check permissions before connecting or sending datagrams from @sock to + * @other. + * @sock contains the socket structure. + * @other contains the peer socket structure. + * Return 0 if permission is granted. + * + * The @unix_stream_connect and @unix_may_send hooks were necessary because + * Linux provides an alternative to the conventional file name space for Unix + * domain sockets. Whereas binding and connecting to sockets in the file name + * space is mediated by the typical file permissions (and caught by the mknod + * and permission hooks in inode_security_ops), binding and connecting to + * sockets in the abstract name space is completely unmediated. Sufficient + * control of Unix domain sockets in the abstract name space isn't possible + * using only the socket layer hooks, since we need to know the actual target + * socket, which is not looked up until we are inside the af_unix code. + * + * Security hooks for socket operations. + * + * @socket_create: + * Check permissions prior to creating a new socket. + * @family contains the requested protocol family. + * @type contains the requested communications type. + * @protocol contains the requested protocol. + * @kern set to 1 if a kernel socket. + * Return 0 if permission is granted. + * @socket_post_create: + * This hook allows a module to update or allocate a per-socket security + * structure. Note that the security field was not added directly to the + * socket structure, but rather, the socket security information is stored + * in the associated inode. Typically, the inode alloc_security hook will + * allocate and and attach security information to + * sock->inode->i_security. This hook may be used to update the + * sock->inode->i_security field with additional information that wasn't + * available when the inode was allocated. + * @sock contains the newly created socket structure. + * @family contains the requested protocol family. + * @type contains the requested communications type. + * @protocol contains the requested protocol. + * @kern set to 1 if a kernel socket. + * @socket_bind: + * Check permission before socket protocol layer bind operation is + * performed and the socket @sock is bound to the address specified in the + * @address parameter. + * @sock contains the socket structure. + * @address contains the address to bind to. + * @addrlen contains the length of address. + * Return 0 if permission is granted. + * @socket_connect: + * Check permission before socket protocol layer connect operation + * attempts to connect socket @sock to a remote address, @address. + * @sock contains the socket structure. + * @address contains the address of remote endpoint. + * @addrlen contains the length of address. + * Return 0 if permission is granted. + * @socket_listen: + * Check permission before socket protocol layer listen operation. + * @sock contains the socket structure. + * @backlog contains the maximum length for the pending connection queue. + * Return 0 if permission is granted. + * @socket_accept: + * Check permission before accepting a new connection. Note that the new + * socket, @newsock, has been created and some information copied to it, + * but the accept operation has not actually been performed. + * @sock contains the listening socket structure. + * @newsock contains the newly created server socket for connection. + * Return 0 if permission is granted. + * @socket_sendmsg: + * Check permission before transmitting a message to another socket. + * @sock contains the socket structure. + * @msg contains the message to be transmitted. + * @size contains the size of message. + * Return 0 if permission is granted. + * @socket_recvmsg: + * Check permission before receiving a message from a socket. + * @sock contains the socket structure. + * @msg contains the message structure. + * @size contains the size of message structure. + * @flags contains the operational flags. + * Return 0 if permission is granted. + * @socket_getsockname: + * Check permission before the local address (name) of the socket object + * @sock is retrieved. + * @sock contains the socket structure. + * Return 0 if permission is granted. + * @socket_getpeername: + * Check permission before the remote address (name) of a socket object + * @sock is retrieved. + * @sock contains the socket structure. + * Return 0 if permission is granted. + * @socket_getsockopt: + * Check permissions before retrieving the options associated with socket + * @sock. + * @sock contains the socket structure. + * @level contains the protocol level to retrieve option from. + * @optname contains the name of option to retrieve. + * Return 0 if permission is granted. + * @socket_setsockopt: + * Check permissions before setting the options associated with socket + * @sock. + * @sock contains the socket structure. + * @level contains the protocol level to set options for. + * @optname contains the name of the option to set. + * Return 0 if permission is granted. + * @socket_shutdown: + * Checks permission before all or part of a connection on the socket + * @sock is shut down. + * @sock contains the socket structure. + * @how contains the flag indicating how future sends and receives + * are handled. + * Return 0 if permission is granted. + * @socket_sock_rcv_skb: + * Check permissions on incoming network packets. This hook is distinct + * from Netfilter's IP input hooks since it is the first time that the + * incoming sk_buff @skb has been associated with a particular socket, @sk. + * Must not sleep inside this hook because some callers hold spinlocks. + * @sk contains the sock (not socket) associated with the incoming sk_buff. + * @skb contains the incoming network data. + * @socket_getpeersec_stream: + * This hook allows the security module to provide peer socket security + * state for unix or connected tcp sockets to userspace via getsockopt + * SO_GETPEERSEC. For tcp sockets this can be meaningful if the + * socket is associated with an ipsec SA. + * @sock is the local socket. + * @optval userspace memory where the security state is to be copied. + * @optlen userspace int where the module should copy the actual length + * of the security state. + * @len as input is the maximum length to copy to userspace provided + * by the caller. + * Return 0 if all is well, otherwise, typical getsockopt return + * values. + * @socket_getpeersec_dgram: + * This hook allows the security module to provide peer socket security + * state for udp sockets on a per-packet basis to userspace via + * getsockopt SO_GETPEERSEC. The application must first have indicated + * the IP_PASSSEC option via getsockopt. It can then retrieve the + * security state returned by this hook for a packet via the SCM_SECURITY + * ancillary message type. + * @skb is the skbuff for the packet being queried + * @secdata is a pointer to a buffer in which to copy the security data + * @seclen is the maximum length for @secdata + * Return 0 on success, error on failure. + * @sk_alloc_security: + * Allocate and attach a security structure to the sk->sk_security field, + * which is used to copy security attributes between local stream sockets. + * @sk_free_security: + * Deallocate security structure. + * @sk_clone_security: + * Clone/copy security structure. + * @sk_getsecid: + * Retrieve the LSM-specific secid for the sock to enable caching + * of network authorizations. + * @sock_graft: + * Sets the socket's isec sid to the sock's sid. + * @inet_conn_request: + * Sets the openreq's sid to socket's sid with MLS portion taken + * from peer sid. + * @inet_csk_clone: + * Sets the new child socket's sid to the openreq sid. + * @inet_conn_established: + * Sets the connection's peersid to the secmark on skb. + * @secmark_relabel_packet: + * check if the process should be allowed to relabel packets to + * the given secid + * @security_secmark_refcount_inc + * tells the LSM to increment the number of secmark labeling rules loaded + * @security_secmark_refcount_dec + * tells the LSM to decrement the number of secmark labeling rules loaded + * @req_classify_flow: + * Sets the flow's sid to the openreq sid. + * @tun_dev_alloc_security: + * This hook allows a module to allocate a security structure for a TUN + * device. + * @security pointer to a security structure pointer. + * Returns a zero on success, negative values on failure. + * @tun_dev_free_security: + * This hook allows a module to free the security structure for a TUN + * device. + * @security pointer to the TUN device's security structure + * @tun_dev_create: + * Check permissions prior to creating a new TUN device. + * @tun_dev_attach_queue: + * Check permissions prior to attaching to a TUN device queue. + * @security pointer to the TUN device's security structure. + * @tun_dev_attach: + * This hook can be used by the module to update any security state + * associated with the TUN device's sock structure. + * @sk contains the existing sock structure. + * @security pointer to the TUN device's security structure. + * @tun_dev_open: + * This hook can be used by the module to update any security state + * associated with the TUN device's security structure. + * @security pointer to the TUN devices's security structure. + * + * Security hooks for XFRM operations. + * + * @xfrm_policy_alloc_security: + * @ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy + * Database used by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level policy update program (e.g., setkey). + * Allocate a security structure to the xp->security field; the security + * field is initialized to NULL when the xfrm_policy is allocated. + * Return 0 if operation was successful (memory to allocate, legal context) + * @gfp is to specify the context for the allocation + * @xfrm_policy_clone_security: + * @old_ctx contains an existing xfrm_sec_ctx. + * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. + * Allocate a security structure in new_ctxp that contains the + * information from the old_ctx structure. + * Return 0 if operation was successful (memory to allocate). + * @xfrm_policy_free_security: + * @ctx contains the xfrm_sec_ctx + * Deallocate xp->security. + * @xfrm_policy_delete_security: + * @ctx contains the xfrm_sec_ctx. + * Authorize deletion of xp->security. + * @xfrm_state_alloc: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level SA generation program (e.g., setkey or racoon). + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to sec_ctx. Return 0 if operation was successful + * (memory to allocate, legal context). + * @xfrm_state_alloc_acquire: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @polsec contains the policy's security context. + * @secid contains the secid from which to take the mls portion of the + * context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to secid. Return 0 if operation was successful + * (memory to allocate, legal context). + * @xfrm_state_free_security: + * @x contains the xfrm_state. + * Deallocate x->security. + * @xfrm_state_delete_security: + * @x contains the xfrm_state. + * Authorize deletion of x->security. + * @xfrm_policy_lookup: + * @ctx contains the xfrm_sec_ctx for which the access control is being + * checked. + * @fl_secid contains the flow security label that is used to authorize + * access to the policy xp. + * @dir contains the direction of the flow (input or output). + * Check permission when a flow selects a xfrm_policy for processing + * XFRMs on a packet. The hook is called when selecting either a + * per-socket policy or a generic xfrm policy. + * Return 0 if permission is granted, -ESRCH otherwise, or -errno + * on other errors. + * @xfrm_state_pol_flow_match: + * @x contains the state to match. + * @xp contains the policy to check for a match. + * @fl contains the flow to check for a match. + * Return 1 if there is a match. + * @xfrm_decode_session: + * @skb points to skb to decode. + * @secid points to the flow key secid to set. + * @ckall says if all xfrms used should be checked for same secid. + * Return 0 if ckall is zero or all xfrms used have the same secid. + * + * Security hooks affecting all Key Management operations + * + * @key_alloc: + * Permit allocation of a key and assign security data. Note that key does + * not have a serial number assigned at this point. + * @key points to the key. + * @flags is the allocation flags + * Return 0 if permission is granted, -ve error otherwise. + * @key_free: + * Notification of destruction; free security data. + * @key points to the key. + * No return value. + * @key_permission: + * See whether a specific operational right is granted to a process on a + * key. + * @key_ref refers to the key (key pointer + possession attribute bit). + * @cred points to the credentials to provide the context against which to + * evaluate the security data on the key. + * @perm describes the combination of permissions required of this key. + * Return 0 if permission is granted, -ve error otherwise. + * @key_getsecurity: + * Get a textual representation of the security context attached to a key + * for the purposes of honouring KEYCTL_GETSECURITY. This function + * allocates the storage for the NUL-terminated string and the caller + * should free it. + * @key points to the key to be queried. + * @_buffer points to a pointer that should be set to point to the + * resulting string (if no label or an error occurs). + * Return the length of the string (including terminating NUL) or -ve if + * an error. + * May also return 0 (and a NULL buffer pointer) if there is no label. + * + * Security hooks affecting all System V IPC operations. + * + * @ipc_permission: + * Check permissions for access to IPC + * @ipcp contains the kernel IPC permission structure + * @flag contains the desired (requested) permission set + * Return 0 if permission is granted. + * @ipc_getsecid: + * Get the secid associated with the ipc object. + * @ipcp contains the kernel IPC permission structure. + * @secid contains a pointer to the location where result will be saved. + * In case of failure, @secid will be set to zero. + * + * Security hooks for individual messages held in System V IPC message queues + * @msg_msg_alloc_security: + * Allocate and attach a security structure to the msg->security field. + * The security field is initialized to NULL when the structure is first + * created. + * @msg contains the message structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @msg_msg_free_security: + * Deallocate the security structure for this message. + * @msg contains the message structure to be modified. + * + * Security hooks for System V IPC Message Queues + * + * @msg_queue_alloc_security: + * Allocate and attach a security structure to the + * msq->q_perm.security field. The security field is initialized to + * NULL when the structure is first created. + * @msq contains the message queue structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @msg_queue_free_security: + * Deallocate security structure for this message queue. + * @msq contains the message queue structure to be modified. + * @msg_queue_associate: + * Check permission when a message queue is requested through the + * msgget system call. This hook is only called when returning the + * message queue identifier for an existing message queue, not when a + * new message queue is created. + * @msq contains the message queue to act upon. + * @msqflg contains the operation control flags. + * Return 0 if permission is granted. + * @msg_queue_msgctl: + * Check permission when a message control operation specified by @cmd + * is to be performed on the message queue @msq. + * The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO. + * @msq contains the message queue to act upon. May be NULL. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @msg_queue_msgsnd: + * Check permission before a message, @msg, is enqueued on the message + * queue, @msq. + * @msq contains the message queue to send message to. + * @msg contains the message to be enqueued. + * @msqflg contains operational flags. + * Return 0 if permission is granted. + * @msg_queue_msgrcv: + * Check permission before a message, @msg, is removed from the message + * queue, @msq. The @target task structure contains a pointer to the + * process that will be receiving the message (not equal to the current + * process when inline receives are being performed). + * @msq contains the message queue to retrieve message from. + * @msg contains the message destination. + * @target contains the task structure for recipient process. + * @type contains the type of message requested. + * @mode contains the operational flags. + * Return 0 if permission is granted. + * + * Security hooks for System V Shared Memory Segments + * + * @shm_alloc_security: + * Allocate and attach a security structure to the shp->shm_perm.security + * field. The security field is initialized to NULL when the structure is + * first created. + * @shp contains the shared memory structure to be modified. + * Return 0 if operation was successful and permission is granted. + * @shm_free_security: + * Deallocate the security struct for this memory segment. + * @shp contains the shared memory structure to be modified. + * @shm_associate: + * Check permission when a shared memory region is requested through the + * shmget system call. This hook is only called when returning the shared + * memory region identifier for an existing region, not when a new shared + * memory region is created. + * @shp contains the shared memory structure to be modified. + * @shmflg contains the operation control flags. + * Return 0 if permission is granted. + * @shm_shmctl: + * Check permission when a shared memory control operation specified by + * @cmd is to be performed on the shared memory region @shp. + * The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO. + * @shp contains shared memory structure to be modified. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @shm_shmat: + * Check permissions prior to allowing the shmat system call to attach the + * shared memory segment @shp to the data segment of the calling process. + * The attaching address is specified by @shmaddr. + * @shp contains the shared memory structure to be modified. + * @shmaddr contains the address to attach memory region to. + * @shmflg contains the operational flags. + * Return 0 if permission is granted. + * + * Security hooks for System V Semaphores + * + * @sem_alloc_security: + * Allocate and attach a security structure to the sma->sem_perm.security + * field. The security field is initialized to NULL when the structure is + * first created. + * @sma contains the semaphore structure + * Return 0 if operation was successful and permission is granted. + * @sem_free_security: + * deallocate security struct for this semaphore + * @sma contains the semaphore structure. + * @sem_associate: + * Check permission when a semaphore is requested through the semget + * system call. This hook is only called when returning the semaphore + * identifier for an existing semaphore, not when a new one must be + * created. + * @sma contains the semaphore structure. + * @semflg contains the operation control flags. + * Return 0 if permission is granted. + * @sem_semctl: + * Check permission when a semaphore operation specified by @cmd is to be + * performed on the semaphore @sma. The @sma may be NULL, e.g. for + * IPC_INFO or SEM_INFO. + * @sma contains the semaphore structure. May be NULL. + * @cmd contains the operation to be performed. + * Return 0 if permission is granted. + * @sem_semop + * Check permissions before performing operations on members of the + * semaphore set @sma. If the @alter flag is nonzero, the semaphore set + * may be modified. + * @sma contains the semaphore structure. + * @sops contains the operations to perform. + * @nsops contains the number of operations to perform. + * @alter contains the flag indicating whether changes are to be made. + * Return 0 if permission is granted. + * + * @binder_set_context_mgr + * Check whether @mgr is allowed to be the binder context manager. + * @mgr contains the task_struct for the task being registered. + * Return 0 if permission is granted. + * @binder_transaction + * Check whether @from is allowed to invoke a binder transaction call + * to @to. + * @from contains the task_struct for the sending task. + * @to contains the task_struct for the receiving task. + * @binder_transfer_binder + * Check whether @from is allowed to transfer a binder reference to @to. + * @from contains the task_struct for the sending task. + * @to contains the task_struct for the receiving task. + * @binder_transfer_file + * Check whether @from is allowed to transfer @file to @to. + * @from contains the task_struct for the sending task. + * @file contains the struct file being transferred. + * @to contains the task_struct for the receiving task. + * + * @ptrace_access_check: + * Check permission before allowing the current process to trace the + * @child process. + * Security modules may also want to perform a process tracing check + * during an execve in the set_security or apply_creds hooks of + * tracing check during an execve in the bprm_set_creds hook of + * binprm_security_ops if the process is being traced and its security + * attributes would be changed by the execve. + * @child contains the task_struct structure for the target process. + * @mode contains the PTRACE_MODE flags indicating the form of access. + * Return 0 if permission is granted. + * @ptrace_traceme: + * Check that the @parent process has sufficient permission to trace the + * current process before allowing the current process to present itself + * to the @parent process for tracing. + * @parent contains the task_struct structure for debugger process. + * Return 0 if permission is granted. + * @capget: + * Get the @effective, @inheritable, and @permitted capability sets for + * the @target process. The hook may also perform permission checking to + * determine if the current process is allowed to see the capability sets + * of the @target process. + * @target contains the task_struct structure for target process. + * @effective contains the effective capability set. + * @inheritable contains the inheritable capability set. + * @permitted contains the permitted capability set. + * Return 0 if the capability sets were successfully obtained. + * @capset: + * Set the @effective, @inheritable, and @permitted capability sets for + * the current process. + * @new contains the new credentials structure for target process. + * @old contains the current credentials structure for target process. + * @effective contains the effective capability set. + * @inheritable contains the inheritable capability set. + * @permitted contains the permitted capability set. + * Return 0 and update @new if permission is granted. + * @capable: + * Check whether the @tsk process has the @cap capability in the indicated + * credentials. + * @cred contains the credentials to use. + * @ns contains the user namespace we want the capability in + * @cap contains the capability <include/linux/capability.h>. + * @audit: Whether to write an audit message or not + * Return 0 if the capability is granted for @tsk. + * @syslog: + * Check permission before accessing the kernel message ring or changing + * logging to the console. + * See the syslog(2) manual page for an explanation of the @type values. + * @type contains the type of action. + * @from_file indicates the context of action (if it came from /proc). + * Return 0 if permission is granted. + * @settime: + * Check permission to change the system time. + * struct timespec and timezone are defined in include/linux/time.h + * @ts contains new time + * @tz contains new timezone + * Return 0 if permission is granted. + * @vm_enough_memory: + * Check permissions for allocating a new virtual mapping. + * @mm contains the mm struct it is being added to. + * @pages contains the number of pages. + * Return 0 if permission is granted. + * + * @ismaclabel: + * Check if the extended attribute specified by @name + * represents a MAC label. Returns 1 if name is a MAC + * attribute otherwise returns 0. + * @name full extended attribute name to check against + * LSM as a MAC label. + * + * @secid_to_secctx: + * Convert secid to security context. If secdata is NULL the length of + * the result will be returned in seclen, but no secdata will be returned. + * This does mean that the length could change between calls to check the + * length and the next call which actually allocates and returns the + * secdata. + * @secid contains the security ID. + * @secdata contains the pointer that stores the converted security + * context. + * @seclen pointer which contains the length of the data + * @secctx_to_secid: + * Convert security context to secid. + * @secid contains the pointer to the generated security ID. + * @secdata contains the security context. + * + * @release_secctx: + * Release the security context. + * @secdata contains the security context. + * @seclen contains the length of the security context. + * + * Security hooks for Audit + * + * @audit_rule_init: + * Allocate and initialize an LSM audit rule structure. + * @field contains the required Audit action. + * Fields flags are defined in include/linux/audit.h + * @op contains the operator the rule uses. + * @rulestr contains the context where the rule will be applied to. + * @lsmrule contains a pointer to receive the result. + * Return 0 if @lsmrule has been successfully set, + * -EINVAL in case of an invalid rule. + * + * @audit_rule_known: + * Specifies whether given @rule contains any fields related to + * current LSM. + * @rule contains the audit rule of interest. + * Return 1 in case of relation found, 0 otherwise. + * + * @audit_rule_match: + * Determine if given @secid matches a rule previously approved + * by @audit_rule_known. + * @secid contains the security id in question. + * @field contains the field which relates to current LSM. + * @op contains the operator that will be used for matching. + * @rule points to the audit rule that will be checked against. + * @actx points to the audit context associated with the check. + * Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure. + * + * @audit_rule_free: + * Deallocate the LSM audit rule structure previously allocated by + * audit_rule_init. + * @rule contains the allocated rule + * + * @inode_notifysecctx: + * Notify the security module of what the security context of an inode + * should be. Initializes the incore security context managed by the + * security module for this inode. Example usage: NFS client invokes + * this hook to initialize the security context in its incore inode to the + * value provided by the server for the file when the server returned the + * file's attributes to the client. + * + * Must be called with inode->i_mutex locked. + * + * @inode we wish to set the security context of. + * @ctx contains the string which we wish to set in the inode. + * @ctxlen contains the length of @ctx. + * + * @inode_setsecctx: + * Change the security context of an inode. Updates the + * incore security context managed by the security module and invokes the + * fs code as needed (via __vfs_setxattr_noperm) to update any backing + * xattrs that represent the context. Example usage: NFS server invokes + * this hook to change the security context in its incore inode and on the + * backing filesystem to a value provided by the client on a SETATTR + * operation. + * + * Must be called with inode->i_mutex locked. + * + * @dentry contains the inode we wish to set the security context of. + * @ctx contains the string which we wish to set in the inode. + * @ctxlen contains the length of @ctx. + * + * @inode_getsecctx: + * On success, returns 0 and fills out @ctx and @ctxlen with the security + * context for the given @inode. + * + * @inode we wish to get the security context of. + * @ctx is a pointer in which to place the allocated security context. + * @ctxlen points to the place to put the length of @ctx. + * This is the main security structure. + */ + +union security_list_options { + int (*binder_set_context_mgr)(struct task_struct *mgr); + int (*binder_transaction)(struct task_struct *from, + struct task_struct *to); + int (*binder_transfer_binder)(struct task_struct *from, + struct task_struct *to); + int (*binder_transfer_file)(struct task_struct *from, + struct task_struct *to, + struct file *file); + + int (*ptrace_access_check)(struct task_struct *child, + unsigned int mode); + int (*ptrace_traceme)(struct task_struct *parent); + int (*capget)(struct task_struct *target, kernel_cap_t *effective, + kernel_cap_t *inheritable, kernel_cap_t *permitted); + int (*capset)(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); + int (*capable)(const struct cred *cred, struct user_namespace *ns, + int cap, int audit); + int (*quotactl)(int cmds, int type, int id, struct super_block *sb); + int (*quota_on)(struct dentry *dentry); + int (*syslog)(int type); + int (*settime)(const struct timespec *ts, const struct timezone *tz); + int (*vm_enough_memory)(struct mm_struct *mm, long pages); + + int (*bprm_set_creds)(struct linux_binprm *bprm); + int (*bprm_check_security)(struct linux_binprm *bprm); + int (*bprm_secureexec)(struct linux_binprm *bprm); + void (*bprm_committing_creds)(struct linux_binprm *bprm); + void (*bprm_committed_creds)(struct linux_binprm *bprm); + + int (*sb_alloc_security)(struct super_block *sb); + void (*sb_free_security)(struct super_block *sb); + int (*sb_copy_data)(char *orig, char *copy); + int (*sb_remount)(struct super_block *sb, void *data); + int (*sb_kern_mount)(struct super_block *sb, int flags, void *data); + int (*sb_show_options)(struct seq_file *m, struct super_block *sb); + int (*sb_statfs)(struct dentry *dentry); + int (*sb_mount)(const char *dev_name, struct path *path, + const char *type, unsigned long flags, void *data); + int (*sb_umount)(struct vfsmount *mnt, int flags); + int (*sb_pivotroot)(struct path *old_path, struct path *new_path); + int (*sb_set_mnt_opts)(struct super_block *sb, + struct security_mnt_opts *opts, + unsigned long kern_flags, + unsigned long *set_kern_flags); + int (*sb_clone_mnt_opts)(const struct super_block *oldsb, + struct super_block *newsb); + int (*sb_parse_opts_str)(char *options, struct security_mnt_opts *opts); + int (*dentry_init_security)(struct dentry *dentry, int mode, + struct qstr *name, void **ctx, + u32 *ctxlen); + + +#ifdef CONFIG_SECURITY_PATH + int (*path_unlink)(struct path *dir, struct dentry *dentry); + int (*path_mkdir)(struct path *dir, struct dentry *dentry, + umode_t mode); + int (*path_rmdir)(struct path *dir, struct dentry *dentry); + int (*path_mknod)(struct path *dir, struct dentry *dentry, + umode_t mode, unsigned int dev); + int (*path_truncate)(struct path *path); + int (*path_symlink)(struct path *dir, struct dentry *dentry, + const char *old_name); + int (*path_link)(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry); + int (*path_rename)(struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, + struct dentry *new_dentry); + int (*path_chmod)(struct path *path, umode_t mode); + int (*path_chown)(struct path *path, kuid_t uid, kgid_t gid); + int (*path_chroot)(struct path *path); +#endif + + int (*inode_alloc_security)(struct inode *inode); + void (*inode_free_security)(struct inode *inode); + int (*inode_init_security)(struct inode *inode, struct inode *dir, + const struct qstr *qstr, + const char **name, void **value, + size_t *len); + int (*inode_create)(struct inode *dir, struct dentry *dentry, + umode_t mode); + int (*inode_link)(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry); + int (*inode_unlink)(struct inode *dir, struct dentry *dentry); + int (*inode_symlink)(struct inode *dir, struct dentry *dentry, + const char *old_name); + int (*inode_mkdir)(struct inode *dir, struct dentry *dentry, + umode_t mode); + int (*inode_rmdir)(struct inode *dir, struct dentry *dentry); + int (*inode_mknod)(struct inode *dir, struct dentry *dentry, + umode_t mode, dev_t dev); + int (*inode_rename)(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry); + int (*inode_readlink)(struct dentry *dentry); + int (*inode_follow_link)(struct dentry *dentry, struct inode *inode, + bool rcu); + int (*inode_permission)(struct inode *inode, int mask); + int (*inode_setattr)(struct dentry *dentry, struct iattr *attr); + int (*inode_getattr)(const struct path *path); + int (*inode_setxattr)(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); + void (*inode_post_setxattr)(struct dentry *dentry, const char *name, + const void *value, size_t size, + int flags); + int (*inode_getxattr)(struct dentry *dentry, const char *name); + int (*inode_listxattr)(struct dentry *dentry); + int (*inode_removexattr)(struct dentry *dentry, const char *name); + int (*inode_need_killpriv)(struct dentry *dentry); + int (*inode_killpriv)(struct dentry *dentry); + int (*inode_getsecurity)(const struct inode *inode, const char *name, + void **buffer, bool alloc); + int (*inode_setsecurity)(struct inode *inode, const char *name, + const void *value, size_t size, + int flags); + int (*inode_listsecurity)(struct inode *inode, char *buffer, + size_t buffer_size); + void (*inode_getsecid)(const struct inode *inode, u32 *secid); + + int (*file_permission)(struct file *file, int mask); + int (*file_alloc_security)(struct file *file); + void (*file_free_security)(struct file *file); + int (*file_ioctl)(struct file *file, unsigned int cmd, + unsigned long arg); + int (*mmap_addr)(unsigned long addr); + int (*mmap_file)(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); + int (*file_mprotect)(struct vm_area_struct *vma, unsigned long reqprot, + unsigned long prot); + int (*file_lock)(struct file *file, unsigned int cmd); + int (*file_fcntl)(struct file *file, unsigned int cmd, + unsigned long arg); + void (*file_set_fowner)(struct file *file); + int (*file_send_sigiotask)(struct task_struct *tsk, + struct fown_struct *fown, int sig); + int (*file_receive)(struct file *file); + int (*file_open)(struct file *file, const struct cred *cred); + + int (*task_create)(unsigned long clone_flags); + void (*task_free)(struct task_struct *task); + int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp); + void (*cred_free)(struct cred *cred); + int (*cred_prepare)(struct cred *new, const struct cred *old, + gfp_t gfp); + void (*cred_transfer)(struct cred *new, const struct cred *old); + int (*kernel_act_as)(struct cred *new, u32 secid); + int (*kernel_create_files_as)(struct cred *new, struct inode *inode); + int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size); + int (*kernel_module_request)(char *kmod_name); + int (*kernel_module_from_file)(struct file *file); + int (*task_fix_setuid)(struct cred *new, const struct cred *old, + int flags); + int (*task_setpgid)(struct task_struct *p, pid_t pgid); + int (*task_getpgid)(struct task_struct *p); + int (*task_getsid)(struct task_struct *p); + void (*task_getsecid)(struct task_struct *p, u32 *secid); + int (*task_setnice)(struct task_struct *p, int nice); + int (*task_setioprio)(struct task_struct *p, int ioprio); + int (*task_getioprio)(struct task_struct *p); + int (*task_setrlimit)(struct task_struct *p, unsigned int resource, + struct rlimit *new_rlim); + int (*task_setscheduler)(struct task_struct *p); + int (*task_getscheduler)(struct task_struct *p); + int (*task_movememory)(struct task_struct *p); + int (*task_kill)(struct task_struct *p, struct siginfo *info, + int sig, u32 secid); + int (*task_wait)(struct task_struct *p); + int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5); + void (*task_to_inode)(struct task_struct *p, struct inode *inode); + + int (*ipc_permission)(struct kern_ipc_perm *ipcp, short flag); + void (*ipc_getsecid)(struct kern_ipc_perm *ipcp, u32 *secid); + + int (*msg_msg_alloc_security)(struct msg_msg *msg); + void (*msg_msg_free_security)(struct msg_msg *msg); + + int (*msg_queue_alloc_security)(struct msg_queue *msq); + void (*msg_queue_free_security)(struct msg_queue *msq); + int (*msg_queue_associate)(struct msg_queue *msq, int msqflg); + int (*msg_queue_msgctl)(struct msg_queue *msq, int cmd); + int (*msg_queue_msgsnd)(struct msg_queue *msq, struct msg_msg *msg, + int msqflg); + int (*msg_queue_msgrcv)(struct msg_queue *msq, struct msg_msg *msg, + struct task_struct *target, long type, + int mode); + + int (*shm_alloc_security)(struct shmid_kernel *shp); + void (*shm_free_security)(struct shmid_kernel *shp); + int (*shm_associate)(struct shmid_kernel *shp, int shmflg); + int (*shm_shmctl)(struct shmid_kernel *shp, int cmd); + int (*shm_shmat)(struct shmid_kernel *shp, char __user *shmaddr, + int shmflg); + + int (*sem_alloc_security)(struct sem_array *sma); + void (*sem_free_security)(struct sem_array *sma); + int (*sem_associate)(struct sem_array *sma, int semflg); + int (*sem_semctl)(struct sem_array *sma, int cmd); + int (*sem_semop)(struct sem_array *sma, struct sembuf *sops, + unsigned nsops, int alter); + + int (*netlink_send)(struct sock *sk, struct sk_buff *skb); + + void (*d_instantiate)(struct dentry *dentry, struct inode *inode); + + int (*getprocattr)(struct task_struct *p, char *name, char **value); + int (*setprocattr)(struct task_struct *p, char *name, void *value, + size_t size); + int (*ismaclabel)(const char *name); + int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen); + int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid); + void (*release_secctx)(char *secdata, u32 seclen); + + int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); + int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); + int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); + +#ifdef CONFIG_SECURITY_NETWORK + int (*unix_stream_connect)(struct sock *sock, struct sock *other, + struct sock *newsk); + int (*unix_may_send)(struct socket *sock, struct socket *other); + + int (*socket_create)(int family, int type, int protocol, int kern); + int (*socket_post_create)(struct socket *sock, int family, int type, + int protocol, int kern); + int (*socket_bind)(struct socket *sock, struct sockaddr *address, + int addrlen); + int (*socket_connect)(struct socket *sock, struct sockaddr *address, + int addrlen); + int (*socket_listen)(struct socket *sock, int backlog); + int (*socket_accept)(struct socket *sock, struct socket *newsock); + int (*socket_sendmsg)(struct socket *sock, struct msghdr *msg, + int size); + int (*socket_recvmsg)(struct socket *sock, struct msghdr *msg, + int size, int flags); + int (*socket_getsockname)(struct socket *sock); + int (*socket_getpeername)(struct socket *sock); + int (*socket_getsockopt)(struct socket *sock, int level, int optname); + int (*socket_setsockopt)(struct socket *sock, int level, int optname); + int (*socket_shutdown)(struct socket *sock, int how); + int (*socket_sock_rcv_skb)(struct sock *sk, struct sk_buff *skb); + int (*socket_getpeersec_stream)(struct socket *sock, + char __user *optval, + int __user *optlen, unsigned len); + int (*socket_getpeersec_dgram)(struct socket *sock, + struct sk_buff *skb, u32 *secid); + int (*sk_alloc_security)(struct sock *sk, int family, gfp_t priority); + void (*sk_free_security)(struct sock *sk); + void (*sk_clone_security)(const struct sock *sk, struct sock *newsk); + void (*sk_getsecid)(struct sock *sk, u32 *secid); + void (*sock_graft)(struct sock *sk, struct socket *parent); + int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); + void (*inet_csk_clone)(struct sock *newsk, + const struct request_sock *req); + void (*inet_conn_established)(struct sock *sk, struct sk_buff *skb); + int (*secmark_relabel_packet)(u32 secid); + void (*secmark_refcount_inc)(void); + void (*secmark_refcount_dec)(void); + void (*req_classify_flow)(const struct request_sock *req, + struct flowi *fl); + int (*tun_dev_alloc_security)(void **security); + void (*tun_dev_free_security)(void *security); + int (*tun_dev_create)(void); + int (*tun_dev_attach_queue)(void *security); + int (*tun_dev_attach)(struct sock *sk, void *security); + int (*tun_dev_open)(void *security); +#endif /* CONFIG_SECURITY_NETWORK */ + +#ifdef CONFIG_SECURITY_NETWORK_XFRM + int (*xfrm_policy_alloc_security)(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp); + int (*xfrm_policy_clone_security)(struct xfrm_sec_ctx *old_ctx, + struct xfrm_sec_ctx **new_ctx); + void (*xfrm_policy_free_security)(struct xfrm_sec_ctx *ctx); + int (*xfrm_policy_delete_security)(struct xfrm_sec_ctx *ctx); + int (*xfrm_state_alloc)(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_acquire)(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, + u32 secid); + void (*xfrm_state_free_security)(struct xfrm_state *x); + int (*xfrm_state_delete_security)(struct xfrm_state *x); + int (*xfrm_policy_lookup)(struct xfrm_sec_ctx *ctx, u32 fl_secid, + u8 dir); + int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, + struct xfrm_policy *xp, + const struct flowi *fl); + int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + + /* key management security hooks */ +#ifdef CONFIG_KEYS + int (*key_alloc)(struct key *key, const struct cred *cred, + unsigned long flags); + void (*key_free)(struct key *key); + int (*key_permission)(key_ref_t key_ref, const struct cred *cred, + unsigned perm); + int (*key_getsecurity)(struct key *key, char **_buffer); +#endif /* CONFIG_KEYS */ + +#ifdef CONFIG_AUDIT + int (*audit_rule_init)(u32 field, u32 op, char *rulestr, + void **lsmrule); + int (*audit_rule_known)(struct audit_krule *krule); + int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule, + struct audit_context *actx); + void (*audit_rule_free)(void *lsmrule); +#endif /* CONFIG_AUDIT */ +}; + +struct security_hook_heads { + struct list_head binder_set_context_mgr; + struct list_head binder_transaction; + struct list_head binder_transfer_binder; + struct list_head binder_transfer_file; + struct list_head ptrace_access_check; + struct list_head ptrace_traceme; + struct list_head capget; + struct list_head capset; + struct list_head capable; + struct list_head quotactl; + struct list_head quota_on; + struct list_head syslog; + struct list_head settime; + struct list_head vm_enough_memory; + struct list_head bprm_set_creds; + struct list_head bprm_check_security; + struct list_head bprm_secureexec; + struct list_head bprm_committing_creds; + struct list_head bprm_committed_creds; + struct list_head sb_alloc_security; + struct list_head sb_free_security; + struct list_head sb_copy_data; + struct list_head sb_remount; + struct list_head sb_kern_mount; + struct list_head sb_show_options; + struct list_head sb_statfs; + struct list_head sb_mount; + struct list_head sb_umount; + struct list_head sb_pivotroot; + struct list_head sb_set_mnt_opts; + struct list_head sb_clone_mnt_opts; + struct list_head sb_parse_opts_str; + struct list_head dentry_init_security; +#ifdef CONFIG_SECURITY_PATH + struct list_head path_unlink; + struct list_head path_mkdir; + struct list_head path_rmdir; + struct list_head path_mknod; + struct list_head path_truncate; + struct list_head path_symlink; + struct list_head path_link; + struct list_head path_rename; + struct list_head path_chmod; + struct list_head path_chown; + struct list_head path_chroot; +#endif + struct list_head inode_alloc_security; + struct list_head inode_free_security; + struct list_head inode_init_security; + struct list_head inode_create; + struct list_head inode_link; + struct list_head inode_unlink; + struct list_head inode_symlink; + struct list_head inode_mkdir; + struct list_head inode_rmdir; + struct list_head inode_mknod; + struct list_head inode_rename; + struct list_head inode_readlink; + struct list_head inode_follow_link; + struct list_head inode_permission; + struct list_head inode_setattr; + struct list_head inode_getattr; + struct list_head inode_setxattr; + struct list_head inode_post_setxattr; + struct list_head inode_getxattr; + struct list_head inode_listxattr; + struct list_head inode_removexattr; + struct list_head inode_need_killpriv; + struct list_head inode_killpriv; + struct list_head inode_getsecurity; + struct list_head inode_setsecurity; + struct list_head inode_listsecurity; + struct list_head inode_getsecid; + struct list_head file_permission; + struct list_head file_alloc_security; + struct list_head file_free_security; + struct list_head file_ioctl; + struct list_head mmap_addr; + struct list_head mmap_file; + struct list_head file_mprotect; + struct list_head file_lock; + struct list_head file_fcntl; + struct list_head file_set_fowner; + struct list_head file_send_sigiotask; + struct list_head file_receive; + struct list_head file_open; + struct list_head task_create; + struct list_head task_free; + struct list_head cred_alloc_blank; + struct list_head cred_free; + struct list_head cred_prepare; + struct list_head cred_transfer; + struct list_head kernel_act_as; + struct list_head kernel_create_files_as; + struct list_head kernel_fw_from_file; + struct list_head kernel_module_request; + struct list_head kernel_module_from_file; + struct list_head task_fix_setuid; + struct list_head task_setpgid; + struct list_head task_getpgid; + struct list_head task_getsid; + struct list_head task_getsecid; + struct list_head task_setnice; + struct list_head task_setioprio; + struct list_head task_getioprio; + struct list_head task_setrlimit; + struct list_head task_setscheduler; + struct list_head task_getscheduler; + struct list_head task_movememory; + struct list_head task_kill; + struct list_head task_wait; + struct list_head task_prctl; + struct list_head task_to_inode; + struct list_head ipc_permission; + struct list_head ipc_getsecid; + struct list_head msg_msg_alloc_security; + struct list_head msg_msg_free_security; + struct list_head msg_queue_alloc_security; + struct list_head msg_queue_free_security; + struct list_head msg_queue_associate; + struct list_head msg_queue_msgctl; + struct list_head msg_queue_msgsnd; + struct list_head msg_queue_msgrcv; + struct list_head shm_alloc_security; + struct list_head shm_free_security; + struct list_head shm_associate; + struct list_head shm_shmctl; + struct list_head shm_shmat; + struct list_head sem_alloc_security; + struct list_head sem_free_security; + struct list_head sem_associate; + struct list_head sem_semctl; + struct list_head sem_semop; + struct list_head netlink_send; + struct list_head d_instantiate; + struct list_head getprocattr; + struct list_head setprocattr; + struct list_head ismaclabel; + struct list_head secid_to_secctx; + struct list_head secctx_to_secid; + struct list_head release_secctx; + struct list_head inode_notifysecctx; + struct list_head inode_setsecctx; + struct list_head inode_getsecctx; +#ifdef CONFIG_SECURITY_NETWORK + struct list_head unix_stream_connect; + struct list_head unix_may_send; + struct list_head socket_create; + struct list_head socket_post_create; + struct list_head socket_bind; + struct list_head socket_connect; + struct list_head socket_listen; + struct list_head socket_accept; + struct list_head socket_sendmsg; + struct list_head socket_recvmsg; + struct list_head socket_getsockname; + struct list_head socket_getpeername; + struct list_head socket_getsockopt; + struct list_head socket_setsockopt; + struct list_head socket_shutdown; + struct list_head socket_sock_rcv_skb; + struct list_head socket_getpeersec_stream; + struct list_head socket_getpeersec_dgram; + struct list_head sk_alloc_security; + struct list_head sk_free_security; + struct list_head sk_clone_security; + struct list_head sk_getsecid; + struct list_head sock_graft; + struct list_head inet_conn_request; + struct list_head inet_csk_clone; + struct list_head inet_conn_established; + struct list_head secmark_relabel_packet; + struct list_head secmark_refcount_inc; + struct list_head secmark_refcount_dec; + struct list_head req_classify_flow; + struct list_head tun_dev_alloc_security; + struct list_head tun_dev_free_security; + struct list_head tun_dev_create; + struct list_head tun_dev_attach_queue; + struct list_head tun_dev_attach; + struct list_head tun_dev_open; + struct list_head skb_owned_by; +#endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM + struct list_head xfrm_policy_alloc_security; + struct list_head xfrm_policy_clone_security; + struct list_head xfrm_policy_free_security; + struct list_head xfrm_policy_delete_security; + struct list_head xfrm_state_alloc; + struct list_head xfrm_state_alloc_acquire; + struct list_head xfrm_state_free_security; + struct list_head xfrm_state_delete_security; + struct list_head xfrm_policy_lookup; + struct list_head xfrm_state_pol_flow_match; + struct list_head xfrm_decode_session; +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ +#ifdef CONFIG_KEYS + struct list_head key_alloc; + struct list_head key_free; + struct list_head key_permission; + struct list_head key_getsecurity; +#endif /* CONFIG_KEYS */ +#ifdef CONFIG_AUDIT + struct list_head audit_rule_init; + struct list_head audit_rule_known; + struct list_head audit_rule_match; + struct list_head audit_rule_free; +#endif /* CONFIG_AUDIT */ +}; + +/* + * Security module hook list structure. + * For use with generic list macros for common operations. + */ +struct security_hook_list { + struct list_head list; + struct list_head *head; + union security_list_options hook; +}; + +/* + * Initializing a security_hook_list structure takes + * up a lot of space in a source file. This macro takes + * care of the common case and reduces the amount of + * text involved. + */ +#define LSM_HOOK_INIT(HEAD, HOOK) \ + { .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } } + +extern struct security_hook_heads security_hook_heads; + +static inline void security_add_hooks(struct security_hook_list *hooks, + int count) +{ + int i; + + for (i = 0; i < count; i++) + list_add_tail_rcu(&hooks[i].list, hooks[i].head); +} + +#ifdef CONFIG_SECURITY_SELINUX_DISABLE +/* + * Assuring the safety of deleting a security module is up to + * the security module involved. This may entail ordering the + * module's hook list in a particular way, refusing to disable + * the module once a policy is loaded or any number of other + * actions better imagined than described. + * + * The name of the configuration option reflects the only module + * that currently uses the mechanism. Any developer who thinks + * disabling their module is a good idea needs to be at least as + * careful as the SELinux team. + */ +static inline void security_delete_hooks(struct security_hook_list *hooks, + int count) +{ + int i; + + for (i = 0; i < count; i++) + list_del_rcu(&hooks[i].list); +} +#endif /* CONFIG_SECURITY_SELINUX_DISABLE */ + +extern int __init security_module_enable(const char *module); +extern void __init capability_add_hooks(void); +#ifdef CONFIG_SECURITY_YAMA_STACKED +void __init yama_add_hooks(void); +#endif + +#endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/include/linux/nd.h b/include/linux/nd.h new file mode 100644 index 000000000000..507e47c86737 --- /dev/null +++ b/include/linux/nd.h @@ -0,0 +1,151 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LINUX_ND_H__ +#define __LINUX_ND_H__ +#include <linux/fs.h> +#include <linux/ndctl.h> +#include <linux/device.h> + +struct nd_device_driver { + struct device_driver drv; + unsigned long type; + int (*probe)(struct device *dev); + int (*remove)(struct device *dev); +}; + +static inline struct nd_device_driver *to_nd_device_driver( + struct device_driver *drv) +{ + return container_of(drv, struct nd_device_driver, drv); +}; + +/** + * struct nd_namespace_common - core infrastructure of a namespace + * @force_raw: ignore other personalities for the namespace (e.g. btt) + * @dev: device model node + * @claim: when set a another personality has taken ownership of the namespace + * @rw_bytes: access the raw namespace capacity with byte-aligned transfers + */ +struct nd_namespace_common { + int force_raw; + struct device dev; + struct device *claim; + int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, + void *buf, size_t size, int rw); +}; + +static inline struct nd_namespace_common *to_ndns(struct device *dev) +{ + return container_of(dev, struct nd_namespace_common, dev); +} + +/** + * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * @dev: namespace device created by the nd region driver + * @res: struct resource conversion of a NFIT SPA table + */ +struct nd_namespace_io { + struct nd_namespace_common common; + struct resource res; +}; + +/** + * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory + * @nsio: device and system physical address range to drive + * @alt_name: namespace name supplied in the dimm label + * @uuid: namespace name supplied in the dimm label + */ +struct nd_namespace_pmem { + struct nd_namespace_io nsio; + char *alt_name; + u8 *uuid; +}; + +/** + * struct nd_namespace_blk - namespace for dimm-bounded persistent memory + * @alt_name: namespace name supplied in the dimm label + * @uuid: namespace name supplied in the dimm label + * @id: ida allocated id + * @lbasize: blk namespaces have a native sector size when btt not present + * @num_resources: number of dpa extents to claim + * @res: discontiguous dpa extents for given dimm + */ +struct nd_namespace_blk { + struct nd_namespace_common common; + char *alt_name; + u8 *uuid; + int id; + unsigned long lbasize; + int num_resources; + struct resource **res; +}; + +static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev) +{ + return container_of(dev, struct nd_namespace_io, common.dev); +} + +static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + return container_of(nsio, struct nd_namespace_pmem, nsio); +} + +static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev) +{ + return container_of(dev, struct nd_namespace_blk, common.dev); +} + +/** + * nvdimm_read_bytes() - synchronously read bytes from an nvdimm namespace + * @ndns: device to read + * @offset: namespace-relative starting offset + * @buf: buffer to fill + * @size: transfer length + * + * @buf is up-to-date upon return from this routine. + */ +static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size) +{ + return ndns->rw_bytes(ndns, offset, buf, size, READ); +} + +/** + * nvdimm_write_bytes() - synchronously write bytes to an nvdimm namespace + * @ndns: device to read + * @offset: namespace-relative starting offset + * @buf: buffer to drain + * @size: transfer length + * + * NVDIMM Namepaces disks do not implement sectors internally. Depending on + * the @ndns, the contents of @buf may be in cpu cache, platform buffers, + * or on backing memory media upon return from this routine. Flushing + * to media is handled internal to the @ndns driver, if at all. + */ +static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size) +{ + return ndns->rw_bytes(ndns, offset, buf, size, WRITE); +} + +#define MODULE_ALIAS_ND_DEVICE(type) \ + MODULE_ALIAS("nd:t" __stringify(type) "*") +#define ND_DEVICE_MODALIAS_FMT "nd:t%d" + +int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, + struct module *module, const char *mod_name); +#define nd_driver_register(driver) \ + __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +#endif /* __LINUX_ND_H__ */ diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 56bc026c143f..98ba7525929e 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -23,6 +23,9 @@ struct of_dma { struct device_node *of_node; struct dma_chan *(*of_dma_xlate) (struct of_phandle_args *, struct of_dma *); + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *); + struct dma_router *dma_router; void *of_dma_data; }; @@ -37,12 +40,20 @@ extern int of_dma_controller_register(struct device_node *np, (struct of_phandle_args *, struct of_dma *), void *data); extern void of_dma_controller_free(struct device_node *np); + +extern int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router); +#define of_dma_router_free of_dma_controller_free + extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name); extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma); extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec, struct of_dma *ofdma); + #else static inline int of_dma_controller_register(struct device_node *np, struct dma_chan *(*of_dma_xlate) @@ -56,6 +67,16 @@ static inline void of_dma_controller_free(struct device_node *np) { } +static inline int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router) +{ + return -ENODEV; +} + +#define of_dma_router_free of_dma_controller_free + static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name) { diff --git a/include/linux/platform_data/dma-rcar-audmapp.h b/include/linux/platform_data/dma-rcar-audmapp.h deleted file mode 100644 index 471fffebbeb4..000000000000 --- a/include/linux/platform_data/dma-rcar-audmapp.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * This is for Renesas R-Car Audio-DMAC-peri-peri. - * - * Copyright (C) 2014 Renesas Electronics Corporation - * Copyright (C) 2014 Kuninori Morimoto <[email protected]> - * - * This file is based on the include/linux/sh_dma.h - * - * Header for the new SH dmaengine driver - * - * Copyright (C) 2010 Guennadi Liakhovetski <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef SH_AUDMAPP_H -#define SH_AUDMAPP_H - -#include <linux/dmaengine.h> - -struct audmapp_slave_config { - int slave_id; - dma_addr_t src; - dma_addr_t dst; - u32 chcr; -}; - -struct audmapp_pdata { - struct audmapp_slave_config *slave; - int slave_num; -}; - -#endif /* SH_AUDMAPP_H */ diff --git a/include/linux/platform_data/gpio-ath79.h b/include/linux/platform_data/gpio-ath79.h new file mode 100644 index 000000000000..88b0db7bee74 --- /dev/null +++ b/include/linux/platform_data/gpio-ath79.h @@ -0,0 +1,19 @@ +/* + * Atheros AR7XXX/AR9XXX GPIO controller platform data + * + * Copyright (C) 2015 Alban Bedel <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_PLATFORM_DATA_GPIO_ATH79_H +#define __LINUX_PLATFORM_DATA_GPIO_ATH79_H + +struct ath79_gpio_platform_data { + unsigned ngpios; + bool oe_inverted; +}; + +#endif diff --git a/include/linux/pmem.h b/include/linux/pmem.h new file mode 100644 index 000000000000..d2114045a6c4 --- /dev/null +++ b/include/linux/pmem.h @@ -0,0 +1,152 @@ +/* + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __PMEM_H__ +#define __PMEM_H__ + +#include <linux/io.h> + +#ifdef CONFIG_ARCH_HAS_PMEM_API +#include <asm/cacheflush.h> +#else +static inline void arch_wmb_pmem(void) +{ + BUG(); +} + +static inline bool __arch_has_wmb_pmem(void) +{ + return false; +} + +static inline void __pmem *arch_memremap_pmem(resource_size_t offset, + unsigned long size) +{ + return NULL; +} + +static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t n) +{ + BUG(); +} +#endif + +/* + * Architectures that define ARCH_HAS_PMEM_API must provide + * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(), + * arch_wmb_pmem(), and __arch_has_wmb_pmem(). + */ + +static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) +{ + memcpy(dst, (void __force const *) src, size); +} + +static inline void memunmap_pmem(void __pmem *addr) +{ + iounmap((void __force __iomem *) addr); +} + +/** + * arch_has_wmb_pmem - true if wmb_pmem() ensures durability + * + * For a given cpu implementation within an architecture it is possible + * that wmb_pmem() resolves to a nop. In the case this returns + * false, pmem api users are unable to ensure durability and may want to + * fall back to a different data consistency model, or otherwise notify + * the user. + */ +static inline bool arch_has_wmb_pmem(void) +{ + if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) + return __arch_has_wmb_pmem(); + return false; +} + +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem(); +} + +/* + * These defaults seek to offer decent performance and minimize the + * window between i/o completion and writes being durable on media. + * However, it is undefined / architecture specific whether + * default_memremap_pmem + default_memcpy_to_pmem is sufficient for + * making data durable relative to i/o completion. + */ +static void default_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t size) +{ + memcpy((void __force *) dst, src, size); +} + +static void __pmem *default_memremap_pmem(resource_size_t offset, + unsigned long size) +{ + return (void __pmem __force *)ioremap_wt(offset, size); +} + +/** + * memremap_pmem - map physical persistent memory for pmem api + * @offset: physical address of persistent memory + * @size: size of the mapping + * + * Establish a mapping of the architecture specific memory type expected + * by memcpy_to_pmem() and wmb_pmem(). For example, it may be + * the case that an uncacheable or writethrough mapping is sufficient, + * or a writeback mapping provided memcpy_to_pmem() and + * wmb_pmem() arrange for the data to be written through the + * cache to persistent media. + */ +static inline void __pmem *memremap_pmem(resource_size_t offset, + unsigned long size) +{ + if (arch_has_pmem_api()) + return arch_memremap_pmem(offset, size); + return default_memremap_pmem(offset, size); +} + +/** + * memcpy_to_pmem - copy data to persistent memory + * @dst: destination buffer for the copy + * @src: source buffer for the copy + * @n: length of the copy in bytes + * + * Perform a memory copy that results in the destination of the copy + * being effectively evicted from, or never written to, the processor + * cache hierarchy after the copy completes. After memcpy_to_pmem() + * data may still reside in cpu or platform buffers, so this operation + * must be followed by a wmb_pmem(). + */ +static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) +{ + if (arch_has_pmem_api()) + arch_memcpy_to_pmem(dst, src, n); + else + default_memcpy_to_pmem(dst, src, n); +} + +/** + * wmb_pmem - synchronize writes to persistent memory + * + * After a series of memcpy_to_pmem() operations this drains data from + * cpu write buffers and any platform (memory controller) buffers to + * ensure that written data is durable on persistent memory media. + */ +static inline void wmb_pmem(void) +{ + if (arch_has_pmem_api()) + arch_wmb_pmem(); +} +#endif /* __PMEM_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a09ece354c64..8aa4a251742f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2421,7 +2421,6 @@ extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); -extern void __flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); diff --git a/include/linux/security.h b/include/linux/security.h index 52febde52479..79d85ddf8093 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/err.h> #include <linux/string.h> +#include <linux/mm.h> struct linux_binprm; struct cred; @@ -53,9 +54,6 @@ struct xattr; struct xfrm_sec_ctx; struct mm_struct; -/* Maximum number of letters for an LSM name string */ -#define SECURITY_NAME_MAX 10 - /* If capable should audit the security request */ #define SECURITY_CAP_NOAUDIT 0 #define SECURITY_CAP_AUDIT 1 @@ -68,10 +66,7 @@ struct audit_krule; struct user_namespace; struct timezone; -/* - * These functions are in security/capability.c and are used - * as the default capabilities functions - */ +/* These functions are in security/commoncap.c */ extern int cap_capable(const struct cred *cred, struct user_namespace *ns, int cap, int audit); extern int cap_settime(const struct timespec *ts, const struct timezone *tz); @@ -113,10 +108,6 @@ struct xfrm_state; struct xfrm_user_sec_ctx; struct seq_file; -extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); - -void reset_security_ops(void); - #ifdef CONFIG_MMU extern unsigned long mmap_min_addr; extern unsigned long dac_mmap_min_addr; @@ -187,1583 +178,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) opts->num_mnt_opts = 0; } -/** - * struct security_operations - main security structure - * - * Security module identifier. - * - * @name: - * A string that acts as a unique identifier for the LSM with max number - * of characters = SECURITY_NAME_MAX. - * - * Security hooks for program execution operations. - * - * @bprm_set_creds: - * Save security information in the bprm->security field, typically based - * on information about the bprm->file, for later use by the apply_creds - * hook. This hook may also optionally check permissions (e.g. for - * transitions between security domains). - * This hook may be called multiple times during a single execve, e.g. for - * interpreters. The hook can tell whether it has already been called by - * checking to see if @bprm->security is non-NULL. If so, then the hook - * may decide either to retain the security information saved earlier or - * to replace it. - * @bprm contains the linux_binprm structure. - * Return 0 if the hook is successful and permission is granted. - * @bprm_check_security: - * This hook mediates the point when a search for a binary handler will - * begin. It allows a check the @bprm->security value which is set in the - * preceding set_creds call. The primary difference from set_creds is - * that the argv list and envp list are reliably available in @bprm. This - * hook may be called multiple times during a single execve; and in each - * pass set_creds is called first. - * @bprm contains the linux_binprm structure. - * Return 0 if the hook is successful and permission is granted. - * @bprm_committing_creds: - * Prepare to install the new security attributes of a process being - * transformed by an execve operation, based on the old credentials - * pointed to by @current->cred and the information set in @bprm->cred by - * the bprm_set_creds hook. @bprm points to the linux_binprm structure. - * This hook is a good place to perform state changes on the process such - * as closing open file descriptors to which access will no longer be - * granted when the attributes are changed. This is called immediately - * before commit_creds(). - * @bprm_committed_creds: - * Tidy up after the installation of the new security attributes of a - * process being transformed by an execve operation. The new credentials - * have, by this point, been set to @current->cred. @bprm points to the - * linux_binprm structure. This hook is a good place to perform state - * changes on the process such as clearing out non-inheritable signal - * state. This is called immediately after commit_creds(). - * @bprm_secureexec: - * Return a boolean value (0 or 1) indicating whether a "secure exec" - * is required. The flag is passed in the auxiliary table - * on the initial stack to the ELF interpreter to indicate whether libc - * should enable secure mode. - * @bprm contains the linux_binprm structure. - * - * Security hooks for filesystem operations. - * - * @sb_alloc_security: - * Allocate and attach a security structure to the sb->s_security field. - * The s_security field is initialized to NULL when the structure is - * allocated. - * @sb contains the super_block structure to be modified. - * Return 0 if operation was successful. - * @sb_free_security: - * Deallocate and clear the sb->s_security field. - * @sb contains the super_block structure to be modified. - * @sb_statfs: - * Check permission before obtaining filesystem statistics for the @mnt - * mountpoint. - * @dentry is a handle on the superblock for the filesystem. - * Return 0 if permission is granted. - * @sb_mount: - * Check permission before an object specified by @dev_name is mounted on - * the mount point named by @nd. For an ordinary mount, @dev_name - * identifies a device if the file system type requires a device. For a - * remount (@flags & MS_REMOUNT), @dev_name is irrelevant. For a - * loopback/bind mount (@flags & MS_BIND), @dev_name identifies the - * pathname of the object being mounted. - * @dev_name contains the name for object being mounted. - * @path contains the path for mount point object. - * @type contains the filesystem type. - * @flags contains the mount flags. - * @data contains the filesystem-specific data. - * Return 0 if permission is granted. - * @sb_copy_data: - * Allow mount option data to be copied prior to parsing by the filesystem, - * so that the security module can extract security-specific mount - * options cleanly (a filesystem may modify the data e.g. with strsep()). - * This also allows the original mount data to be stripped of security- - * specific options to avoid having to make filesystems aware of them. - * @type the type of filesystem being mounted. - * @orig the original mount data copied from userspace. - * @copy copied data which will be passed to the security module. - * Returns 0 if the copy was successful. - * @sb_remount: - * Extracts security system specific mount options and verifies no changes - * are being made to those options. - * @sb superblock being remounted - * @data contains the filesystem-specific data. - * Return 0 if permission is granted. - * @sb_umount: - * Check permission before the @mnt file system is unmounted. - * @mnt contains the mounted file system. - * @flags contains the unmount flags, e.g. MNT_FORCE. - * Return 0 if permission is granted. - * @sb_pivotroot: - * Check permission before pivoting the root filesystem. - * @old_path contains the path for the new location of the current root (put_old). - * @new_path contains the path for the new root (new_root). - * Return 0 if permission is granted. - * @sb_set_mnt_opts: - * Set the security relevant mount options used for a superblock - * @sb the superblock to set security mount options for - * @opts binary data structure containing all lsm mount data - * @sb_clone_mnt_opts: - * Copy all security options from a given superblock to another - * @oldsb old superblock which contain information to clone - * @newsb new superblock which needs filled in - * @sb_parse_opts_str: - * Parse a string of security data filling in the opts structure - * @options string containing all mount options known by the LSM - * @opts binary data structure usable by the LSM - * @dentry_init_security: - * Compute a context for a dentry as the inode is not yet available - * since NFSv4 has no label backed by an EA anyway. - * @dentry dentry to use in calculating the context. - * @mode mode used to determine resource type. - * @name name of the last path component used to create file - * @ctx pointer to place the pointer to the resulting context in. - * @ctxlen point to place the length of the resulting context. - * - * - * Security hooks for inode operations. - * - * @inode_alloc_security: - * Allocate and attach a security structure to @inode->i_security. The - * i_security field is initialized to NULL when the inode structure is - * allocated. - * @inode contains the inode structure. - * Return 0 if operation was successful. - * @inode_free_security: - * @inode contains the inode structure. - * Deallocate the inode security structure and set @inode->i_security to - * NULL. - * @inode_init_security: - * Obtain the security attribute name suffix and value to set on a newly - * created inode and set up the incore security field for the new inode. - * This hook is called by the fs code as part of the inode creation - * transaction and provides for atomic labeling of the inode, unlike - * the post_create/mkdir/... hooks called by the VFS. The hook function - * is expected to allocate the name and value via kmalloc, with the caller - * being responsible for calling kfree after using them. - * If the security module does not use security attributes or does - * not wish to put a security attribute on this particular inode, - * then it should return -EOPNOTSUPP to skip this processing. - * @inode contains the inode structure of the newly created inode. - * @dir contains the inode structure of the parent directory. - * @qstr contains the last path component of the new object - * @name will be set to the allocated name suffix (e.g. selinux). - * @value will be set to the allocated attribute value. - * @len will be set to the length of the value. - * Returns 0 if @name and @value have been successfully set, - * -EOPNOTSUPP if no security attribute is needed, or - * -ENOMEM on memory allocation failure. - * @inode_create: - * Check permission to create a regular file. - * @dir contains inode structure of the parent of the new file. - * @dentry contains the dentry structure for the file to be created. - * @mode contains the file mode of the file to be created. - * Return 0 if permission is granted. - * @inode_link: - * Check permission before creating a new hard link to a file. - * @old_dentry contains the dentry structure for an existing link to the file. - * @dir contains the inode structure of the parent directory of the new link. - * @new_dentry contains the dentry structure for the new link. - * Return 0 if permission is granted. - * @path_link: - * Check permission before creating a new hard link to a file. - * @old_dentry contains the dentry structure for an existing link - * to the file. - * @new_dir contains the path structure of the parent directory of - * the new link. - * @new_dentry contains the dentry structure for the new link. - * Return 0 if permission is granted. - * @inode_unlink: - * Check the permission to remove a hard link to a file. - * @dir contains the inode structure of parent directory of the file. - * @dentry contains the dentry structure for file to be unlinked. - * Return 0 if permission is granted. - * @path_unlink: - * Check the permission to remove a hard link to a file. - * @dir contains the path structure of parent directory of the file. - * @dentry contains the dentry structure for file to be unlinked. - * Return 0 if permission is granted. - * @inode_symlink: - * Check the permission to create a symbolic link to a file. - * @dir contains the inode structure of parent directory of the symbolic link. - * @dentry contains the dentry structure of the symbolic link. - * @old_name contains the pathname of file. - * Return 0 if permission is granted. - * @path_symlink: - * Check the permission to create a symbolic link to a file. - * @dir contains the path structure of parent directory of - * the symbolic link. - * @dentry contains the dentry structure of the symbolic link. - * @old_name contains the pathname of file. - * Return 0 if permission is granted. - * @inode_mkdir: - * Check permissions to create a new directory in the existing directory - * associated with inode structure @dir. - * @dir contains the inode structure of parent of the directory to be created. - * @dentry contains the dentry structure of new directory. - * @mode contains the mode of new directory. - * Return 0 if permission is granted. - * @path_mkdir: - * Check permissions to create a new directory in the existing directory - * associated with path structure @path. - * @dir contains the path structure of parent of the directory - * to be created. - * @dentry contains the dentry structure of new directory. - * @mode contains the mode of new directory. - * Return 0 if permission is granted. - * @inode_rmdir: - * Check the permission to remove a directory. - * @dir contains the inode structure of parent of the directory to be removed. - * @dentry contains the dentry structure of directory to be removed. - * Return 0 if permission is granted. - * @path_rmdir: - * Check the permission to remove a directory. - * @dir contains the path structure of parent of the directory to be - * removed. - * @dentry contains the dentry structure of directory to be removed. - * Return 0 if permission is granted. - * @inode_mknod: - * Check permissions when creating a special file (or a socket or a fifo - * file created via the mknod system call). Note that if mknod operation - * is being done for a regular file, then the create hook will be called - * and not this hook. - * @dir contains the inode structure of parent of the new file. - * @dentry contains the dentry structure of the new file. - * @mode contains the mode of the new file. - * @dev contains the device number. - * Return 0 if permission is granted. - * @path_mknod: - * Check permissions when creating a file. Note that this hook is called - * even if mknod operation is being done for a regular file. - * @dir contains the path structure of parent of the new file. - * @dentry contains the dentry structure of the new file. - * @mode contains the mode of the new file. - * @dev contains the undecoded device number. Use new_decode_dev() to get - * the decoded device number. - * Return 0 if permission is granted. - * @inode_rename: - * Check for permission to rename a file or directory. - * @old_dir contains the inode structure for parent of the old link. - * @old_dentry contains the dentry structure of the old link. - * @new_dir contains the inode structure for parent of the new link. - * @new_dentry contains the dentry structure of the new link. - * Return 0 if permission is granted. - * @path_rename: - * Check for permission to rename a file or directory. - * @old_dir contains the path structure for parent of the old link. - * @old_dentry contains the dentry structure of the old link. - * @new_dir contains the path structure for parent of the new link. - * @new_dentry contains the dentry structure of the new link. - * Return 0 if permission is granted. - * @path_chmod: - * Check for permission to change DAC's permission of a file or directory. - * @dentry contains the dentry structure. - * @mnt contains the vfsmnt structure. - * @mode contains DAC's mode. - * Return 0 if permission is granted. - * @path_chown: - * Check for permission to change owner/group of a file or directory. - * @path contains the path structure. - * @uid contains new owner's ID. - * @gid contains new group's ID. - * Return 0 if permission is granted. - * @path_chroot: - * Check for permission to change root directory. - * @path contains the path structure. - * Return 0 if permission is granted. - * @inode_readlink: - * Check the permission to read the symbolic link. - * @dentry contains the dentry structure for the file link. - * Return 0 if permission is granted. - * @inode_follow_link: - * Check permission to follow a symbolic link when looking up a pathname. - * @dentry contains the dentry structure for the link. - * @inode contains the inode, which itself is not stable in RCU-walk - * @rcu indicates whether we are in RCU-walk mode. - * Return 0 if permission is granted. - * @inode_permission: - * Check permission before accessing an inode. This hook is called by the - * existing Linux permission function, so a security module can use it to - * provide additional checking for existing Linux permission checks. - * Notice that this hook is called when a file is opened (as well as many - * other operations), whereas the file_security_ops permission hook is - * called when the actual read/write operations are performed. - * @inode contains the inode structure to check. - * @mask contains the permission mask. - * Return 0 if permission is granted. - * @inode_setattr: - * Check permission before setting file attributes. Note that the kernel - * call to notify_change is performed from several locations, whenever - * file attributes change (such as when a file is truncated, chown/chmod - * operations, transferring disk quotas, etc). - * @dentry contains the dentry structure for the file. - * @attr is the iattr structure containing the new file attributes. - * Return 0 if permission is granted. - * @path_truncate: - * Check permission before truncating a file. - * @path contains the path structure for the file. - * Return 0 if permission is granted. - * @inode_getattr: - * Check permission before obtaining file attributes. - * @mnt is the vfsmount where the dentry was looked up - * @dentry contains the dentry structure for the file. - * Return 0 if permission is granted. - * @inode_setxattr: - * Check permission before setting the extended attributes - * @value identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_post_setxattr: - * Update inode security field after successful setxattr operation. - * @value identified by @name for @dentry. - * @inode_getxattr: - * Check permission before obtaining the extended attributes - * identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_listxattr: - * Check permission before obtaining the list of extended attribute - * names for @dentry. - * Return 0 if permission is granted. - * @inode_removexattr: - * Check permission before removing the extended attribute - * identified by @name for @dentry. - * Return 0 if permission is granted. - * @inode_getsecurity: - * Retrieve a copy of the extended attribute representation of the - * security label associated with @name for @inode via @buffer. Note that - * @name is the remainder of the attribute name after the security prefix - * has been removed. @alloc is used to specify of the call should return a - * value via the buffer or just the value length Return size of buffer on - * success. - * @inode_setsecurity: - * Set the security label associated with @name for @inode from the - * extended attribute value @value. @size indicates the size of the - * @value in bytes. @flags may be XATTR_CREATE, XATTR_REPLACE, or 0. - * Note that @name is the remainder of the attribute name after the - * security. prefix has been removed. - * Return 0 on success. - * @inode_listsecurity: - * Copy the extended attribute names for the security labels - * associated with @inode into @buffer. The maximum size of @buffer - * is specified by @buffer_size. @buffer may be NULL to request - * the size of the buffer required. - * Returns number of bytes used/required on success. - * @inode_need_killpriv: - * Called when an inode has been changed. - * @dentry is the dentry being changed. - * Return <0 on error to abort the inode change operation. - * Return 0 if inode_killpriv does not need to be called. - * Return >0 if inode_killpriv does need to be called. - * @inode_killpriv: - * The setuid bit is being removed. Remove similar security labels. - * Called with the dentry->d_inode->i_mutex held. - * @dentry is the dentry being changed. - * Return 0 on success. If error is returned, then the operation - * causing setuid bit removal is failed. - * @inode_getsecid: - * Get the secid associated with the node. - * @inode contains a pointer to the inode. - * @secid contains a pointer to the location where result will be saved. - * In case of failure, @secid will be set to zero. - * - * Security hooks for file operations - * - * @file_permission: - * Check file permissions before accessing an open file. This hook is - * called by various operations that read or write files. A security - * module can use this hook to perform additional checking on these - * operations, e.g. to revalidate permissions on use to support privilege - * bracketing or policy changes. Notice that this hook is used when the - * actual read/write operations are performed, whereas the - * inode_security_ops hook is called when a file is opened (as well as - * many other operations). - * Caveat: Although this hook can be used to revalidate permissions for - * various system call operations that read or write files, it does not - * address the revalidation of permissions for memory-mapped files. - * Security modules must handle this separately if they need such - * revalidation. - * @file contains the file structure being accessed. - * @mask contains the requested permissions. - * Return 0 if permission is granted. - * @file_alloc_security: - * Allocate and attach a security structure to the file->f_security field. - * The security field is initialized to NULL when the structure is first - * created. - * @file contains the file structure to secure. - * Return 0 if the hook is successful and permission is granted. - * @file_free_security: - * Deallocate and free any security structures stored in file->f_security. - * @file contains the file structure being modified. - * @file_ioctl: - * @file contains the file structure. - * @cmd contains the operation to perform. - * @arg contains the operational arguments. - * Check permission for an ioctl operation on @file. Note that @arg - * sometimes represents a user space pointer; in other cases, it may be a - * simple integer value. When @arg represents a user space pointer, it - * should never be used by the security module. - * Return 0 if permission is granted. - * @mmap_addr : - * Check permissions for a mmap operation at @addr. - * @addr contains virtual address that will be used for the operation. - * Return 0 if permission is granted. - * @mmap_file : - * Check permissions for a mmap operation. The @file may be NULL, e.g. - * if mapping anonymous memory. - * @file contains the file structure for file to map (may be NULL). - * @reqprot contains the protection requested by the application. - * @prot contains the protection that will be applied by the kernel. - * @flags contains the operational flags. - * Return 0 if permission is granted. - * @file_mprotect: - * Check permissions before changing memory access permissions. - * @vma contains the memory region to modify. - * @reqprot contains the protection requested by the application. - * @prot contains the protection that will be applied by the kernel. - * Return 0 if permission is granted. - * @file_lock: - * Check permission before performing file locking operations. - * Note: this hook mediates both flock and fcntl style locks. - * @file contains the file structure. - * @cmd contains the posix-translated lock operation to perform - * (e.g. F_RDLCK, F_WRLCK). - * Return 0 if permission is granted. - * @file_fcntl: - * Check permission before allowing the file operation specified by @cmd - * from being performed on the file @file. Note that @arg sometimes - * represents a user space pointer; in other cases, it may be a simple - * integer value. When @arg represents a user space pointer, it should - * never be used by the security module. - * @file contains the file structure. - * @cmd contains the operation to be performed. - * @arg contains the operational arguments. - * Return 0 if permission is granted. - * @file_set_fowner: - * Save owner security information (typically from current->security) in - * file->f_security for later use by the send_sigiotask hook. - * @file contains the file structure to update. - * Return 0 on success. - * @file_send_sigiotask: - * Check permission for the file owner @fown to send SIGIO or SIGURG to the - * process @tsk. Note that this hook is sometimes called from interrupt. - * Note that the fown_struct, @fown, is never outside the context of a - * struct file, so the file structure (and associated security information) - * can always be obtained: - * container_of(fown, struct file, f_owner) - * @tsk contains the structure of task receiving signal. - * @fown contains the file owner information. - * @sig is the signal that will be sent. When 0, kernel sends SIGIO. - * Return 0 if permission is granted. - * @file_receive: - * This hook allows security modules to control the ability of a process - * to receive an open file descriptor via socket IPC. - * @file contains the file structure being received. - * Return 0 if permission is granted. - * @file_open - * Save open-time permission checking state for later use upon - * file_permission, and recheck access if anything has changed - * since inode_permission. - * - * Security hooks for task operations. - * - * @task_create: - * Check permission before creating a child process. See the clone(2) - * manual page for definitions of the @clone_flags. - * @clone_flags contains the flags indicating what should be shared. - * Return 0 if permission is granted. - * @task_free: - * @task task being freed - * Handle release of task-related resources. (Note that this can be called - * from interrupt context.) - * @cred_alloc_blank: - * @cred points to the credentials. - * @gfp indicates the atomicity of any memory allocations. - * Only allocate sufficient memory and attach to @cred such that - * cred_transfer() will not get ENOMEM. - * @cred_free: - * @cred points to the credentials. - * Deallocate and clear the cred->security field in a set of credentials. - * @cred_prepare: - * @new points to the new credentials. - * @old points to the original credentials. - * @gfp indicates the atomicity of any memory allocations. - * Prepare a new set of credentials by copying the data from the old set. - * @cred_transfer: - * @new points to the new credentials. - * @old points to the original credentials. - * Transfer data from original creds to new creds - * @kernel_act_as: - * Set the credentials for a kernel service to act as (subjective context). - * @new points to the credentials to be modified. - * @secid specifies the security ID to be set - * The current task must be the one that nominated @secid. - * Return 0 if successful. - * @kernel_create_files_as: - * Set the file creation context in a set of credentials to be the same as - * the objective context of the specified inode. - * @new points to the credentials to be modified. - * @inode points to the inode to use as a reference. - * The current task must be the one that nominated @inode. - * Return 0 if successful. - * @kernel_fw_from_file: - * Load firmware from userspace (not called for built-in firmware). - * @file contains the file structure pointing to the file containing - * the firmware to load. This argument will be NULL if the firmware - * was loaded via the uevent-triggered blob-based interface exposed - * by CONFIG_FW_LOADER_USER_HELPER. - * @buf pointer to buffer containing firmware contents. - * @size length of the firmware contents. - * Return 0 if permission is granted. - * @kernel_module_request: - * Ability to trigger the kernel to automatically upcall to userspace for - * userspace to load a kernel module with the given name. - * @kmod_name name of the module requested by the kernel - * Return 0 if successful. - * @kernel_module_from_file: - * Load a kernel module from userspace. - * @file contains the file structure pointing to the file containing - * the kernel module to load. If the module is being loaded from a blob, - * this argument will be NULL. - * Return 0 if permission is granted. - * @task_fix_setuid: - * Update the module's state after setting one or more of the user - * identity attributes of the current process. The @flags parameter - * indicates which of the set*uid system calls invoked this hook. If - * @new is the set of credentials that will be installed. Modifications - * should be made to this rather than to @current->cred. - * @old is the set of credentials that are being replaces - * @flags contains one of the LSM_SETID_* values. - * Return 0 on success. - * @task_setpgid: - * Check permission before setting the process group identifier of the - * process @p to @pgid. - * @p contains the task_struct for process being modified. - * @pgid contains the new pgid. - * Return 0 if permission is granted. - * @task_getpgid: - * Check permission before getting the process group identifier of the - * process @p. - * @p contains the task_struct for the process. - * Return 0 if permission is granted. - * @task_getsid: - * Check permission before getting the session identifier of the process - * @p. - * @p contains the task_struct for the process. - * Return 0 if permission is granted. - * @task_getsecid: - * Retrieve the security identifier of the process @p. - * @p contains the task_struct for the process and place is into @secid. - * In case of failure, @secid will be set to zero. - * - * @task_setnice: - * Check permission before setting the nice value of @p to @nice. - * @p contains the task_struct of process. - * @nice contains the new nice value. - * Return 0 if permission is granted. - * @task_setioprio - * Check permission before setting the ioprio value of @p to @ioprio. - * @p contains the task_struct of process. - * @ioprio contains the new ioprio value - * Return 0 if permission is granted. - * @task_getioprio - * Check permission before getting the ioprio value of @p. - * @p contains the task_struct of process. - * Return 0 if permission is granted. - * @task_setrlimit: - * Check permission before setting the resource limits of the current - * process for @resource to @new_rlim. The old resource limit values can - * be examined by dereferencing (current->signal->rlim + resource). - * @resource contains the resource whose limit is being set. - * @new_rlim contains the new limits for @resource. - * Return 0 if permission is granted. - * @task_setscheduler: - * Check permission before setting scheduling policy and/or parameters of - * process @p based on @policy and @lp. - * @p contains the task_struct for process. - * @policy contains the scheduling policy. - * @lp contains the scheduling parameters. - * Return 0 if permission is granted. - * @task_getscheduler: - * Check permission before obtaining scheduling information for process - * @p. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_movememory - * Check permission before moving memory owned by process @p. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_kill: - * Check permission before sending signal @sig to @p. @info can be NULL, - * the constant 1, or a pointer to a siginfo structure. If @info is 1 or - * SI_FROMKERNEL(info) is true, then the signal should be viewed as coming - * from the kernel and should typically be permitted. - * SIGIO signals are handled separately by the send_sigiotask hook in - * file_security_ops. - * @p contains the task_struct for process. - * @info contains the signal information. - * @sig contains the signal value. - * @secid contains the sid of the process where the signal originated - * Return 0 if permission is granted. - * @task_wait: - * Check permission before allowing a process to reap a child process @p - * and collect its status information. - * @p contains the task_struct for process. - * Return 0 if permission is granted. - * @task_prctl: - * Check permission before performing a process control operation on the - * current process. - * @option contains the operation. - * @arg2 contains a argument. - * @arg3 contains a argument. - * @arg4 contains a argument. - * @arg5 contains a argument. - * Return -ENOSYS if no-one wanted to handle this op, any other value to - * cause prctl() to return immediately with that value. - * @task_to_inode: - * Set the security attributes for an inode based on an associated task's - * security attributes, e.g. for /proc/pid inodes. - * @p contains the task_struct for the task. - * @inode contains the inode structure for the inode. - * - * Security hooks for Netlink messaging. - * - * @netlink_send: - * Save security information for a netlink message so that permission - * checking can be performed when the message is processed. The security - * information can be saved using the eff_cap field of the - * netlink_skb_parms structure. Also may be used to provide fine - * grained control over message transmission. - * @sk associated sock of task sending the message. - * @skb contains the sk_buff structure for the netlink message. - * Return 0 if the information was successfully saved and message - * is allowed to be transmitted. - * - * Security hooks for Unix domain networking. - * - * @unix_stream_connect: - * Check permissions before establishing a Unix domain stream connection - * between @sock and @other. - * @sock contains the sock structure. - * @other contains the peer sock structure. - * @newsk contains the new sock structure. - * Return 0 if permission is granted. - * @unix_may_send: - * Check permissions before connecting or sending datagrams from @sock to - * @other. - * @sock contains the socket structure. - * @other contains the peer socket structure. - * Return 0 if permission is granted. - * - * The @unix_stream_connect and @unix_may_send hooks were necessary because - * Linux provides an alternative to the conventional file name space for Unix - * domain sockets. Whereas binding and connecting to sockets in the file name - * space is mediated by the typical file permissions (and caught by the mknod - * and permission hooks in inode_security_ops), binding and connecting to - * sockets in the abstract name space is completely unmediated. Sufficient - * control of Unix domain sockets in the abstract name space isn't possible - * using only the socket layer hooks, since we need to know the actual target - * socket, which is not looked up until we are inside the af_unix code. - * - * Security hooks for socket operations. - * - * @socket_create: - * Check permissions prior to creating a new socket. - * @family contains the requested protocol family. - * @type contains the requested communications type. - * @protocol contains the requested protocol. - * @kern set to 1 if a kernel socket. - * Return 0 if permission is granted. - * @socket_post_create: - * This hook allows a module to update or allocate a per-socket security - * structure. Note that the security field was not added directly to the - * socket structure, but rather, the socket security information is stored - * in the associated inode. Typically, the inode alloc_security hook will - * allocate and and attach security information to - * sock->inode->i_security. This hook may be used to update the - * sock->inode->i_security field with additional information that wasn't - * available when the inode was allocated. - * @sock contains the newly created socket structure. - * @family contains the requested protocol family. - * @type contains the requested communications type. - * @protocol contains the requested protocol. - * @kern set to 1 if a kernel socket. - * @socket_bind: - * Check permission before socket protocol layer bind operation is - * performed and the socket @sock is bound to the address specified in the - * @address parameter. - * @sock contains the socket structure. - * @address contains the address to bind to. - * @addrlen contains the length of address. - * Return 0 if permission is granted. - * @socket_connect: - * Check permission before socket protocol layer connect operation - * attempts to connect socket @sock to a remote address, @address. - * @sock contains the socket structure. - * @address contains the address of remote endpoint. - * @addrlen contains the length of address. - * Return 0 if permission is granted. - * @socket_listen: - * Check permission before socket protocol layer listen operation. - * @sock contains the socket structure. - * @backlog contains the maximum length for the pending connection queue. - * Return 0 if permission is granted. - * @socket_accept: - * Check permission before accepting a new connection. Note that the new - * socket, @newsock, has been created and some information copied to it, - * but the accept operation has not actually been performed. - * @sock contains the listening socket structure. - * @newsock contains the newly created server socket for connection. - * Return 0 if permission is granted. - * @socket_sendmsg: - * Check permission before transmitting a message to another socket. - * @sock contains the socket structure. - * @msg contains the message to be transmitted. - * @size contains the size of message. - * Return 0 if permission is granted. - * @socket_recvmsg: - * Check permission before receiving a message from a socket. - * @sock contains the socket structure. - * @msg contains the message structure. - * @size contains the size of message structure. - * @flags contains the operational flags. - * Return 0 if permission is granted. - * @socket_getsockname: - * Check permission before the local address (name) of the socket object - * @sock is retrieved. - * @sock contains the socket structure. - * Return 0 if permission is granted. - * @socket_getpeername: - * Check permission before the remote address (name) of a socket object - * @sock is retrieved. - * @sock contains the socket structure. - * Return 0 if permission is granted. - * @socket_getsockopt: - * Check permissions before retrieving the options associated with socket - * @sock. - * @sock contains the socket structure. - * @level contains the protocol level to retrieve option from. - * @optname contains the name of option to retrieve. - * Return 0 if permission is granted. - * @socket_setsockopt: - * Check permissions before setting the options associated with socket - * @sock. - * @sock contains the socket structure. - * @level contains the protocol level to set options for. - * @optname contains the name of the option to set. - * Return 0 if permission is granted. - * @socket_shutdown: - * Checks permission before all or part of a connection on the socket - * @sock is shut down. - * @sock contains the socket structure. - * @how contains the flag indicating how future sends and receives are handled. - * Return 0 if permission is granted. - * @socket_sock_rcv_skb: - * Check permissions on incoming network packets. This hook is distinct - * from Netfilter's IP input hooks since it is the first time that the - * incoming sk_buff @skb has been associated with a particular socket, @sk. - * Must not sleep inside this hook because some callers hold spinlocks. - * @sk contains the sock (not socket) associated with the incoming sk_buff. - * @skb contains the incoming network data. - * @socket_getpeersec_stream: - * This hook allows the security module to provide peer socket security - * state for unix or connected tcp sockets to userspace via getsockopt - * SO_GETPEERSEC. For tcp sockets this can be meaningful if the - * socket is associated with an ipsec SA. - * @sock is the local socket. - * @optval userspace memory where the security state is to be copied. - * @optlen userspace int where the module should copy the actual length - * of the security state. - * @len as input is the maximum length to copy to userspace provided - * by the caller. - * Return 0 if all is well, otherwise, typical getsockopt return - * values. - * @socket_getpeersec_dgram: - * This hook allows the security module to provide peer socket security - * state for udp sockets on a per-packet basis to userspace via - * getsockopt SO_GETPEERSEC. The application must first have indicated - * the IP_PASSSEC option via getsockopt. It can then retrieve the - * security state returned by this hook for a packet via the SCM_SECURITY - * ancillary message type. - * @skb is the skbuff for the packet being queried - * @secdata is a pointer to a buffer in which to copy the security data - * @seclen is the maximum length for @secdata - * Return 0 on success, error on failure. - * @sk_alloc_security: - * Allocate and attach a security structure to the sk->sk_security field, - * which is used to copy security attributes between local stream sockets. - * @sk_free_security: - * Deallocate security structure. - * @sk_clone_security: - * Clone/copy security structure. - * @sk_getsecid: - * Retrieve the LSM-specific secid for the sock to enable caching of network - * authorizations. - * @sock_graft: - * Sets the socket's isec sid to the sock's sid. - * @inet_conn_request: - * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. - * @inet_csk_clone: - * Sets the new child socket's sid to the openreq sid. - * @inet_conn_established: - * Sets the connection's peersid to the secmark on skb. - * @secmark_relabel_packet: - * check if the process should be allowed to relabel packets to the given secid - * @security_secmark_refcount_inc - * tells the LSM to increment the number of secmark labeling rules loaded - * @security_secmark_refcount_dec - * tells the LSM to decrement the number of secmark labeling rules loaded - * @req_classify_flow: - * Sets the flow's sid to the openreq sid. - * @tun_dev_alloc_security: - * This hook allows a module to allocate a security structure for a TUN - * device. - * @security pointer to a security structure pointer. - * Returns a zero on success, negative values on failure. - * @tun_dev_free_security: - * This hook allows a module to free the security structure for a TUN - * device. - * @security pointer to the TUN device's security structure - * @tun_dev_create: - * Check permissions prior to creating a new TUN device. - * @tun_dev_attach_queue: - * Check permissions prior to attaching to a TUN device queue. - * @security pointer to the TUN device's security structure. - * @tun_dev_attach: - * This hook can be used by the module to update any security state - * associated with the TUN device's sock structure. - * @sk contains the existing sock structure. - * @security pointer to the TUN device's security structure. - * @tun_dev_open: - * This hook can be used by the module to update any security state - * associated with the TUN device's security structure. - * @security pointer to the TUN devices's security structure. - * @skb_owned_by: - * This hook sets the packet's owning sock. - * @skb is the packet. - * @sk the sock which owns the packet. - * - * Security hooks for XFRM operations. - * - * @xfrm_policy_alloc_security: - * @ctxp is a pointer to the xfrm_sec_ctx being added to Security Policy - * Database used by the XFRM system. - * @sec_ctx contains the security context information being provided by - * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. - * Return 0 if operation was successful (memory to allocate, legal context) - * @gfp is to specify the context for the allocation - * @xfrm_policy_clone_security: - * @old_ctx contains an existing xfrm_sec_ctx. - * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. - * Allocate a security structure in new_ctxp that contains the - * information from the old_ctx structure. - * Return 0 if operation was successful (memory to allocate). - * @xfrm_policy_free_security: - * @ctx contains the xfrm_sec_ctx - * Deallocate xp->security. - * @xfrm_policy_delete_security: - * @ctx contains the xfrm_sec_ctx. - * Authorize deletion of xp->security. - * @xfrm_state_alloc: - * @x contains the xfrm_state being added to the Security Association - * Database by the XFRM system. - * @sec_ctx contains the security context information being provided by - * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->security field; the security - * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to sec_ctx. Return 0 if operation was successful - * (memory to allocate, legal context). - * @xfrm_state_alloc_acquire: - * @x contains the xfrm_state being added to the Security Association - * Database by the XFRM system. - * @polsec contains the policy's security context. - * @secid contains the secid from which to take the mls portion of the - * context. - * Allocate a security structure to the x->security field; the security - * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to secid. Return 0 if operation was successful - * (memory to allocate, legal context). - * @xfrm_state_free_security: - * @x contains the xfrm_state. - * Deallocate x->security. - * @xfrm_state_delete_security: - * @x contains the xfrm_state. - * Authorize deletion of x->security. - * @xfrm_policy_lookup: - * @ctx contains the xfrm_sec_ctx for which the access control is being - * checked. - * @fl_secid contains the flow security label that is used to authorize - * access to the policy xp. - * @dir contains the direction of the flow (input or output). - * Check permission when a flow selects a xfrm_policy for processing - * XFRMs on a packet. The hook is called when selecting either a - * per-socket policy or a generic xfrm policy. - * Return 0 if permission is granted, -ESRCH otherwise, or -errno - * on other errors. - * @xfrm_state_pol_flow_match: - * @x contains the state to match. - * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. - * Return 1 if there is a match. - * @xfrm_decode_session: - * @skb points to skb to decode. - * @secid points to the flow key secid to set. - * @ckall says if all xfrms used should be checked for same secid. - * Return 0 if ckall is zero or all xfrms used have the same secid. - * - * Security hooks affecting all Key Management operations - * - * @key_alloc: - * Permit allocation of a key and assign security data. Note that key does - * not have a serial number assigned at this point. - * @key points to the key. - * @flags is the allocation flags - * Return 0 if permission is granted, -ve error otherwise. - * @key_free: - * Notification of destruction; free security data. - * @key points to the key. - * No return value. - * @key_permission: - * See whether a specific operational right is granted to a process on a - * key. - * @key_ref refers to the key (key pointer + possession attribute bit). - * @cred points to the credentials to provide the context against which to - * evaluate the security data on the key. - * @perm describes the combination of permissions required of this key. - * Return 0 if permission is granted, -ve error otherwise. - * @key_getsecurity: - * Get a textual representation of the security context attached to a key - * for the purposes of honouring KEYCTL_GETSECURITY. This function - * allocates the storage for the NUL-terminated string and the caller - * should free it. - * @key points to the key to be queried. - * @_buffer points to a pointer that should be set to point to the - * resulting string (if no label or an error occurs). - * Return the length of the string (including terminating NUL) or -ve if - * an error. - * May also return 0 (and a NULL buffer pointer) if there is no label. - * - * Security hooks affecting all System V IPC operations. - * - * @ipc_permission: - * Check permissions for access to IPC - * @ipcp contains the kernel IPC permission structure - * @flag contains the desired (requested) permission set - * Return 0 if permission is granted. - * @ipc_getsecid: - * Get the secid associated with the ipc object. - * @ipcp contains the kernel IPC permission structure. - * @secid contains a pointer to the location where result will be saved. - * In case of failure, @secid will be set to zero. - * - * Security hooks for individual messages held in System V IPC message queues - * @msg_msg_alloc_security: - * Allocate and attach a security structure to the msg->security field. - * The security field is initialized to NULL when the structure is first - * created. - * @msg contains the message structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @msg_msg_free_security: - * Deallocate the security structure for this message. - * @msg contains the message structure to be modified. - * - * Security hooks for System V IPC Message Queues - * - * @msg_queue_alloc_security: - * Allocate and attach a security structure to the - * msq->q_perm.security field. The security field is initialized to - * NULL when the structure is first created. - * @msq contains the message queue structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @msg_queue_free_security: - * Deallocate security structure for this message queue. - * @msq contains the message queue structure to be modified. - * @msg_queue_associate: - * Check permission when a message queue is requested through the - * msgget system call. This hook is only called when returning the - * message queue identifier for an existing message queue, not when a - * new message queue is created. - * @msq contains the message queue to act upon. - * @msqflg contains the operation control flags. - * Return 0 if permission is granted. - * @msg_queue_msgctl: - * Check permission when a message control operation specified by @cmd - * is to be performed on the message queue @msq. - * The @msq may be NULL, e.g. for IPC_INFO or MSG_INFO. - * @msq contains the message queue to act upon. May be NULL. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @msg_queue_msgsnd: - * Check permission before a message, @msg, is enqueued on the message - * queue, @msq. - * @msq contains the message queue to send message to. - * @msg contains the message to be enqueued. - * @msqflg contains operational flags. - * Return 0 if permission is granted. - * @msg_queue_msgrcv: - * Check permission before a message, @msg, is removed from the message - * queue, @msq. The @target task structure contains a pointer to the - * process that will be receiving the message (not equal to the current - * process when inline receives are being performed). - * @msq contains the message queue to retrieve message from. - * @msg contains the message destination. - * @target contains the task structure for recipient process. - * @type contains the type of message requested. - * @mode contains the operational flags. - * Return 0 if permission is granted. - * - * Security hooks for System V Shared Memory Segments - * - * @shm_alloc_security: - * Allocate and attach a security structure to the shp->shm_perm.security - * field. The security field is initialized to NULL when the structure is - * first created. - * @shp contains the shared memory structure to be modified. - * Return 0 if operation was successful and permission is granted. - * @shm_free_security: - * Deallocate the security struct for this memory segment. - * @shp contains the shared memory structure to be modified. - * @shm_associate: - * Check permission when a shared memory region is requested through the - * shmget system call. This hook is only called when returning the shared - * memory region identifier for an existing region, not when a new shared - * memory region is created. - * @shp contains the shared memory structure to be modified. - * @shmflg contains the operation control flags. - * Return 0 if permission is granted. - * @shm_shmctl: - * Check permission when a shared memory control operation specified by - * @cmd is to be performed on the shared memory region @shp. - * The @shp may be NULL, e.g. for IPC_INFO or SHM_INFO. - * @shp contains shared memory structure to be modified. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @shm_shmat: - * Check permissions prior to allowing the shmat system call to attach the - * shared memory segment @shp to the data segment of the calling process. - * The attaching address is specified by @shmaddr. - * @shp contains the shared memory structure to be modified. - * @shmaddr contains the address to attach memory region to. - * @shmflg contains the operational flags. - * Return 0 if permission is granted. - * - * Security hooks for System V Semaphores - * - * @sem_alloc_security: - * Allocate and attach a security structure to the sma->sem_perm.security - * field. The security field is initialized to NULL when the structure is - * first created. - * @sma contains the semaphore structure - * Return 0 if operation was successful and permission is granted. - * @sem_free_security: - * deallocate security struct for this semaphore - * @sma contains the semaphore structure. - * @sem_associate: - * Check permission when a semaphore is requested through the semget - * system call. This hook is only called when returning the semaphore - * identifier for an existing semaphore, not when a new one must be - * created. - * @sma contains the semaphore structure. - * @semflg contains the operation control flags. - * Return 0 if permission is granted. - * @sem_semctl: - * Check permission when a semaphore operation specified by @cmd is to be - * performed on the semaphore @sma. The @sma may be NULL, e.g. for - * IPC_INFO or SEM_INFO. - * @sma contains the semaphore structure. May be NULL. - * @cmd contains the operation to be performed. - * Return 0 if permission is granted. - * @sem_semop - * Check permissions before performing operations on members of the - * semaphore set @sma. If the @alter flag is nonzero, the semaphore set - * may be modified. - * @sma contains the semaphore structure. - * @sops contains the operations to perform. - * @nsops contains the number of operations to perform. - * @alter contains the flag indicating whether changes are to be made. - * Return 0 if permission is granted. - * - * @binder_set_context_mgr - * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. - * Return 0 if permission is granted. - * @binder_transaction - * Check whether @from is allowed to invoke a binder transaction call - * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. - * @binder_transfer_binder - * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. - * @binder_transfer_file - * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. - * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. - * - * @ptrace_access_check: - * Check permission before allowing the current process to trace the - * @child process. - * Security modules may also want to perform a process tracing check - * during an execve in the set_security or apply_creds hooks of - * tracing check during an execve in the bprm_set_creds hook of - * binprm_security_ops if the process is being traced and its security - * attributes would be changed by the execve. - * @child contains the task_struct structure for the target process. - * @mode contains the PTRACE_MODE flags indicating the form of access. - * Return 0 if permission is granted. - * @ptrace_traceme: - * Check that the @parent process has sufficient permission to trace the - * current process before allowing the current process to present itself - * to the @parent process for tracing. - * @parent contains the task_struct structure for debugger process. - * Return 0 if permission is granted. - * @capget: - * Get the @effective, @inheritable, and @permitted capability sets for - * the @target process. The hook may also perform permission checking to - * determine if the current process is allowed to see the capability sets - * of the @target process. - * @target contains the task_struct structure for target process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 if the capability sets were successfully obtained. - * @capset: - * Set the @effective, @inheritable, and @permitted capability sets for - * the current process. - * @new contains the new credentials structure for target process. - * @old contains the current credentials structure for target process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 and update @new if permission is granted. - * @capable: - * Check whether the @tsk process has the @cap capability in the indicated - * credentials. - * @cred contains the credentials to use. - * @ns contains the user namespace we want the capability in - * @cap contains the capability <include/linux/capability.h>. - * @audit: Whether to write an audit message or not - * Return 0 if the capability is granted for @tsk. - * @syslog: - * Check permission before accessing the kernel message ring or changing - * logging to the console. - * See the syslog(2) manual page for an explanation of the @type values. - * @type contains the type of action. - * @from_file indicates the context of action (if it came from /proc). - * Return 0 if permission is granted. - * @settime: - * Check permission to change the system time. - * struct timespec and timezone are defined in include/linux/time.h - * @ts contains new time - * @tz contains new timezone - * Return 0 if permission is granted. - * @vm_enough_memory: - * Check permissions for allocating a new virtual mapping. - * @mm contains the mm struct it is being added to. - * @pages contains the number of pages. - * Return 0 if permission is granted. - * - * @ismaclabel: - * Check if the extended attribute specified by @name - * represents a MAC label. Returns 1 if name is a MAC - * attribute otherwise returns 0. - * @name full extended attribute name to check against - * LSM as a MAC label. - * - * @secid_to_secctx: - * Convert secid to security context. If secdata is NULL the length of - * the result will be returned in seclen, but no secdata will be returned. - * This does mean that the length could change between calls to check the - * length and the next call which actually allocates and returns the secdata. - * @secid contains the security ID. - * @secdata contains the pointer that stores the converted security context. - * @seclen pointer which contains the length of the data - * @secctx_to_secid: - * Convert security context to secid. - * @secid contains the pointer to the generated security ID. - * @secdata contains the security context. - * - * @release_secctx: - * Release the security context. - * @secdata contains the security context. - * @seclen contains the length of the security context. - * - * Security hooks for Audit - * - * @audit_rule_init: - * Allocate and initialize an LSM audit rule structure. - * @field contains the required Audit action. Fields flags are defined in include/linux/audit.h - * @op contains the operator the rule uses. - * @rulestr contains the context where the rule will be applied to. - * @lsmrule contains a pointer to receive the result. - * Return 0 if @lsmrule has been successfully set, - * -EINVAL in case of an invalid rule. - * - * @audit_rule_known: - * Specifies whether given @rule contains any fields related to current LSM. - * @rule contains the audit rule of interest. - * Return 1 in case of relation found, 0 otherwise. - * - * @audit_rule_match: - * Determine if given @secid matches a rule previously approved - * by @audit_rule_known. - * @secid contains the security id in question. - * @field contains the field which relates to current LSM. - * @op contains the operator that will be used for matching. - * @rule points to the audit rule that will be checked against. - * @actx points to the audit context associated with the check. - * Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure. - * - * @audit_rule_free: - * Deallocate the LSM audit rule structure previously allocated by - * audit_rule_init. - * @rule contains the allocated rule - * - * @inode_notifysecctx: - * Notify the security module of what the security context of an inode - * should be. Initializes the incore security context managed by the - * security module for this inode. Example usage: NFS client invokes - * this hook to initialize the security context in its incore inode to the - * value provided by the server for the file when the server returned the - * file's attributes to the client. - * - * Must be called with inode->i_mutex locked. - * - * @inode we wish to set the security context of. - * @ctx contains the string which we wish to set in the inode. - * @ctxlen contains the length of @ctx. - * - * @inode_setsecctx: - * Change the security context of an inode. Updates the - * incore security context managed by the security module and invokes the - * fs code as needed (via __vfs_setxattr_noperm) to update any backing - * xattrs that represent the context. Example usage: NFS server invokes - * this hook to change the security context in its incore inode and on the - * backing filesystem to a value provided by the client on a SETATTR - * operation. - * - * Must be called with inode->i_mutex locked. - * - * @dentry contains the inode we wish to set the security context of. - * @ctx contains the string which we wish to set in the inode. - * @ctxlen contains the length of @ctx. - * - * @inode_getsecctx: - * On success, returns 0 and fills out @ctx and @ctxlen with the security - * context for the given @inode. - * - * @inode we wish to get the security context of. - * @ctx is a pointer in which to place the allocated security context. - * @ctxlen points to the place to put the length of @ctx. - * This is the main security structure. - */ -struct security_operations { - char name[SECURITY_NAME_MAX + 1]; - - int (*binder_set_context_mgr) (struct task_struct *mgr); - int (*binder_transaction) (struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_binder) (struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_file) (struct task_struct *from, - struct task_struct *to, struct file *file); - - int (*ptrace_access_check) (struct task_struct *child, unsigned int mode); - int (*ptrace_traceme) (struct task_struct *parent); - int (*capget) (struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset) (struct cred *new, - const struct cred *old, - const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - int (*capable) (const struct cred *cred, struct user_namespace *ns, - int cap, int audit); - int (*quotactl) (int cmds, int type, int id, struct super_block *sb); - int (*quota_on) (struct dentry *dentry); - int (*syslog) (int type); - int (*settime) (const struct timespec *ts, const struct timezone *tz); - int (*vm_enough_memory) (struct mm_struct *mm, long pages); - - int (*bprm_set_creds) (struct linux_binprm *bprm); - int (*bprm_check_security) (struct linux_binprm *bprm); - int (*bprm_secureexec) (struct linux_binprm *bprm); - void (*bprm_committing_creds) (struct linux_binprm *bprm); - void (*bprm_committed_creds) (struct linux_binprm *bprm); - - int (*sb_alloc_security) (struct super_block *sb); - void (*sb_free_security) (struct super_block *sb); - int (*sb_copy_data) (char *orig, char *copy); - int (*sb_remount) (struct super_block *sb, void *data); - int (*sb_kern_mount) (struct super_block *sb, int flags, void *data); - int (*sb_show_options) (struct seq_file *m, struct super_block *sb); - int (*sb_statfs) (struct dentry *dentry); - int (*sb_mount) (const char *dev_name, struct path *path, - const char *type, unsigned long flags, void *data); - int (*sb_umount) (struct vfsmount *mnt, int flags); - int (*sb_pivotroot) (struct path *old_path, - struct path *new_path); - int (*sb_set_mnt_opts) (struct super_block *sb, - struct security_mnt_opts *opts, - unsigned long kern_flags, - unsigned long *set_kern_flags); - int (*sb_clone_mnt_opts) (const struct super_block *oldsb, - struct super_block *newsb); - int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); - int (*dentry_init_security) (struct dentry *dentry, int mode, - struct qstr *name, void **ctx, - u32 *ctxlen); - - -#ifdef CONFIG_SECURITY_PATH - int (*path_unlink) (struct path *dir, struct dentry *dentry); - int (*path_mkdir) (struct path *dir, struct dentry *dentry, umode_t mode); - int (*path_rmdir) (struct path *dir, struct dentry *dentry); - int (*path_mknod) (struct path *dir, struct dentry *dentry, umode_t mode, - unsigned int dev); - int (*path_truncate) (struct path *path); - int (*path_symlink) (struct path *dir, struct dentry *dentry, - const char *old_name); - int (*path_link) (struct dentry *old_dentry, struct path *new_dir, - struct dentry *new_dentry); - int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry); - int (*path_chmod) (struct path *path, umode_t mode); - int (*path_chown) (struct path *path, kuid_t uid, kgid_t gid); - int (*path_chroot) (struct path *path); -#endif - - int (*inode_alloc_security) (struct inode *inode); - void (*inode_free_security) (struct inode *inode); - int (*inode_init_security) (struct inode *inode, struct inode *dir, - const struct qstr *qstr, const char **name, - void **value, size_t *len); - int (*inode_create) (struct inode *dir, - struct dentry *dentry, umode_t mode); - int (*inode_link) (struct dentry *old_dentry, - struct inode *dir, struct dentry *new_dentry); - int (*inode_unlink) (struct inode *dir, struct dentry *dentry); - int (*inode_symlink) (struct inode *dir, - struct dentry *dentry, const char *old_name); - int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, umode_t mode); - int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); - int (*inode_mknod) (struct inode *dir, struct dentry *dentry, - umode_t mode, dev_t dev); - int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry); - int (*inode_readlink) (struct dentry *dentry); - int (*inode_follow_link) (struct dentry *dentry, struct inode *inode, - bool rcu); - int (*inode_permission) (struct inode *inode, int mask); - int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); - int (*inode_getattr) (const struct path *path); - int (*inode_setxattr) (struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); - void (*inode_post_setxattr) (struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); - int (*inode_getxattr) (struct dentry *dentry, const char *name); - int (*inode_listxattr) (struct dentry *dentry); - int (*inode_removexattr) (struct dentry *dentry, const char *name); - int (*inode_need_killpriv) (struct dentry *dentry); - int (*inode_killpriv) (struct dentry *dentry); - int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc); - int (*inode_setsecurity) (struct inode *inode, const char *name, const void *value, size_t size, int flags); - int (*inode_listsecurity) (struct inode *inode, char *buffer, size_t buffer_size); - void (*inode_getsecid) (const struct inode *inode, u32 *secid); - - int (*file_permission) (struct file *file, int mask); - int (*file_alloc_security) (struct file *file); - void (*file_free_security) (struct file *file); - int (*file_ioctl) (struct file *file, unsigned int cmd, - unsigned long arg); - int (*mmap_addr) (unsigned long addr); - int (*mmap_file) (struct file *file, - unsigned long reqprot, unsigned long prot, - unsigned long flags); - int (*file_mprotect) (struct vm_area_struct *vma, - unsigned long reqprot, - unsigned long prot); - int (*file_lock) (struct file *file, unsigned int cmd); - int (*file_fcntl) (struct file *file, unsigned int cmd, - unsigned long arg); - void (*file_set_fowner) (struct file *file); - int (*file_send_sigiotask) (struct task_struct *tsk, - struct fown_struct *fown, int sig); - int (*file_receive) (struct file *file); - int (*file_open) (struct file *file, const struct cred *cred); - - int (*task_create) (unsigned long clone_flags); - void (*task_free) (struct task_struct *task); - int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp); - void (*cred_free) (struct cred *cred); - int (*cred_prepare)(struct cred *new, const struct cred *old, - gfp_t gfp); - void (*cred_transfer)(struct cred *new, const struct cred *old); - int (*kernel_act_as)(struct cred *new, u32 secid); - int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_fw_from_file)(struct file *file, char *buf, size_t size); - int (*kernel_module_request)(char *kmod_name); - int (*kernel_module_from_file)(struct file *file); - int (*task_fix_setuid) (struct cred *new, const struct cred *old, - int flags); - int (*task_setpgid) (struct task_struct *p, pid_t pgid); - int (*task_getpgid) (struct task_struct *p); - int (*task_getsid) (struct task_struct *p); - void (*task_getsecid) (struct task_struct *p, u32 *secid); - int (*task_setnice) (struct task_struct *p, int nice); - int (*task_setioprio) (struct task_struct *p, int ioprio); - int (*task_getioprio) (struct task_struct *p); - int (*task_setrlimit) (struct task_struct *p, unsigned int resource, - struct rlimit *new_rlim); - int (*task_setscheduler) (struct task_struct *p); - int (*task_getscheduler) (struct task_struct *p); - int (*task_movememory) (struct task_struct *p); - int (*task_kill) (struct task_struct *p, - struct siginfo *info, int sig, u32 secid); - int (*task_wait) (struct task_struct *p); - int (*task_prctl) (int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5); - void (*task_to_inode) (struct task_struct *p, struct inode *inode); - - int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag); - void (*ipc_getsecid) (struct kern_ipc_perm *ipcp, u32 *secid); - - int (*msg_msg_alloc_security) (struct msg_msg *msg); - void (*msg_msg_free_security) (struct msg_msg *msg); - - int (*msg_queue_alloc_security) (struct msg_queue *msq); - void (*msg_queue_free_security) (struct msg_queue *msq); - int (*msg_queue_associate) (struct msg_queue *msq, int msqflg); - int (*msg_queue_msgctl) (struct msg_queue *msq, int cmd); - int (*msg_queue_msgsnd) (struct msg_queue *msq, - struct msg_msg *msg, int msqflg); - int (*msg_queue_msgrcv) (struct msg_queue *msq, - struct msg_msg *msg, - struct task_struct *target, - long type, int mode); - - int (*shm_alloc_security) (struct shmid_kernel *shp); - void (*shm_free_security) (struct shmid_kernel *shp); - int (*shm_associate) (struct shmid_kernel *shp, int shmflg); - int (*shm_shmctl) (struct shmid_kernel *shp, int cmd); - int (*shm_shmat) (struct shmid_kernel *shp, - char __user *shmaddr, int shmflg); - - int (*sem_alloc_security) (struct sem_array *sma); - void (*sem_free_security) (struct sem_array *sma); - int (*sem_associate) (struct sem_array *sma, int semflg); - int (*sem_semctl) (struct sem_array *sma, int cmd); - int (*sem_semop) (struct sem_array *sma, - struct sembuf *sops, unsigned nsops, int alter); - - int (*netlink_send) (struct sock *sk, struct sk_buff *skb); - - void (*d_instantiate) (struct dentry *dentry, struct inode *inode); - - int (*getprocattr) (struct task_struct *p, char *name, char **value); - int (*setprocattr) (struct task_struct *p, char *name, void *value, size_t size); - int (*ismaclabel) (const char *name); - int (*secid_to_secctx) (u32 secid, char **secdata, u32 *seclen); - int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid); - void (*release_secctx) (char *secdata, u32 seclen); - - int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); - int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); - int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); - -#ifdef CONFIG_SECURITY_NETWORK - int (*unix_stream_connect) (struct sock *sock, struct sock *other, struct sock *newsk); - int (*unix_may_send) (struct socket *sock, struct socket *other); - - int (*socket_create) (int family, int type, int protocol, int kern); - int (*socket_post_create) (struct socket *sock, int family, - int type, int protocol, int kern); - int (*socket_bind) (struct socket *sock, - struct sockaddr *address, int addrlen); - int (*socket_connect) (struct socket *sock, - struct sockaddr *address, int addrlen); - int (*socket_listen) (struct socket *sock, int backlog); - int (*socket_accept) (struct socket *sock, struct socket *newsock); - int (*socket_sendmsg) (struct socket *sock, - struct msghdr *msg, int size); - int (*socket_recvmsg) (struct socket *sock, - struct msghdr *msg, int size, int flags); - int (*socket_getsockname) (struct socket *sock); - int (*socket_getpeername) (struct socket *sock); - int (*socket_getsockopt) (struct socket *sock, int level, int optname); - int (*socket_setsockopt) (struct socket *sock, int level, int optname); - int (*socket_shutdown) (struct socket *sock, int how); - int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb); - int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); - int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); - int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); - void (*sk_free_security) (struct sock *sk); - void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); - void (*sk_getsecid) (struct sock *sk, u32 *secid); - void (*sock_graft) (struct sock *sk, struct socket *parent); - int (*inet_conn_request) (struct sock *sk, struct sk_buff *skb, - struct request_sock *req); - void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); - void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); - int (*secmark_relabel_packet) (u32 secid); - void (*secmark_refcount_inc) (void); - void (*secmark_refcount_dec) (void); - void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); - int (*tun_dev_alloc_security) (void **security); - void (*tun_dev_free_security) (void *security); - int (*tun_dev_create) (void); - int (*tun_dev_attach_queue) (void *security); - int (*tun_dev_attach) (struct sock *sk, void *security); - int (*tun_dev_open) (void *security); -#endif /* CONFIG_SECURITY_NETWORK */ - -#ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); - int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); - void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); - int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); - int (*xfrm_state_alloc) (struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx); - int (*xfrm_state_alloc_acquire) (struct xfrm_state *x, - struct xfrm_sec_ctx *polsec, - u32 secid); - void (*xfrm_state_free_security) (struct xfrm_state *x); - int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); - int (*xfrm_state_pol_flow_match) (struct xfrm_state *x, - struct xfrm_policy *xp, - const struct flowi *fl); - int (*xfrm_decode_session) (struct sk_buff *skb, u32 *secid, int ckall); -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ - - /* key management security hooks */ -#ifdef CONFIG_KEYS - int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags); - void (*key_free) (struct key *key); - int (*key_permission) (key_ref_t key_ref, - const struct cred *cred, - unsigned perm); - int (*key_getsecurity)(struct key *key, char **_buffer); -#endif /* CONFIG_KEYS */ - -#ifdef CONFIG_AUDIT - int (*audit_rule_init) (u32 field, u32 op, char *rulestr, void **lsmrule); - int (*audit_rule_known) (struct audit_krule *krule); - int (*audit_rule_match) (u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx); - void (*audit_rule_free) (void *lsmrule); -#endif /* CONFIG_AUDIT */ -}; - /* prototypes */ extern int security_init(void); -extern int security_module_enable(struct security_operations *ops); -extern int register_security(struct security_operations *ops); -extern void __init security_fixup_ops(struct security_operations *ops); - /* Security operations */ int security_binder_set_context_mgr(struct task_struct *mgr); @@ -2049,7 +465,7 @@ static inline int security_settime(const struct timespec *ts, static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return cap_vm_enough_memory(mm, pages); + return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages)); } static inline int security_bprm_set_creds(struct linux_binprm *bprm) @@ -2653,7 +1069,7 @@ static inline int security_setprocattr(struct task_struct *p, char *name, void * static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) { - return cap_netlink_send(sk, skb); + return 0; } static inline int security_ismaclabel(const char *name) @@ -3221,36 +1637,5 @@ static inline void free_secdata(void *secdata) { } #endif /* CONFIG_SECURITY */ -#ifdef CONFIG_SECURITY_YAMA -extern int yama_ptrace_access_check(struct task_struct *child, - unsigned int mode); -extern int yama_ptrace_traceme(struct task_struct *parent); -extern void yama_task_free(struct task_struct *task); -extern int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5); -#else -static inline int yama_ptrace_access_check(struct task_struct *child, - unsigned int mode) -{ - return 0; -} - -static inline int yama_ptrace_traceme(struct task_struct *parent) -{ - return 0; -} - -static inline void yama_task_free(struct task_struct *task) -{ -} - -static inline int yama_task_prctl(int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5) -{ - return -ENOSYS; -} -#endif /* CONFIG_SECURITY_YAMA */ - #endif /* ! __LINUX_SECURITY_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 9de2fdc8b5e4..a99f0e5243e1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -153,30 +153,8 @@ size_t ksize(const void *); #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN #define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN #define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN) -/* - * The KMALLOC_LOOP_LOW is the definition for the for loop index start number - * to create the kmalloc_caches object in create_kmalloc_caches(). The first - * and the second are 96 and 192. You can see that in the kmalloc_index(), if - * the KMALLOC_MIN_SIZE <= 32, then return 1 (96). If KMALLOC_MIN_SIZE <= 64, - * then return 2 (192). If the KMALLOC_MIN_SIZE is bigger than 64, we don't - * need to initialize 96 and 192. Go directly to start the KMALLOC_SHIFT_LOW. - */ -#if KMALLOC_MIN_SIZE <= 32 -#define KMALLOC_LOOP_LOW 1 -#elif KMALLOC_MIN_SIZE <= 64 -#define KMALLOC_LOOP_LOW 2 -#else -#define KMALLOC_LOOP_LOW KMALLOC_SHIFT_LOW -#endif - #else #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) -/* - * The KMALLOC_MIN_SIZE of slub/slab/slob is 2^3/2^5/2^3. So, even slab is used. - * The KMALLOC_MIN_SIZE <= 32. The kmalloc-96 and kmalloc-192 should also be - * initialized. - */ -#define KMALLOC_LOOP_LOW 1 #endif /* diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 4568a5cc9ab8..c3d1a525bacc 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -29,10 +29,13 @@ struct ssb_sprom { u8 il0mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11b/g */ u8 et0mac[6] __aligned(sizeof(u16)); /* MAC address for Ethernet */ u8 et1mac[6] __aligned(sizeof(u16)); /* MAC address for 802.11a */ + u8 et2mac[6] __aligned(sizeof(u16)); /* MAC address for extra Ethernet */ u8 et0phyaddr; /* MII address for enet0 */ u8 et1phyaddr; /* MII address for enet1 */ + u8 et2phyaddr; /* MII address for enet2 */ u8 et0mdcport; /* MDIO for enet0 */ u8 et1mdcport; /* MDIO for enet1 */ + u8 et2mdcport; /* MDIO for enet2 */ u16 dev_id; /* Device ID overriding e.g. PCI ID */ u16 board_rev; /* Board revision number from SPROM. */ u16 board_num; /* Board number from SPROM. */ @@ -88,11 +91,14 @@ struct ssb_sprom { u32 ofdm5glpo; /* 5.2GHz OFDM power offset */ u32 ofdm5gpo; /* 5.3GHz OFDM power offset */ u32 ofdm5ghpo; /* 5.8GHz OFDM power offset */ + u32 boardflags; + u32 boardflags2; + u32 boardflags3; + /* TODO: Switch all drivers to new u32 fields and drop below ones */ u16 boardflags_lo; /* Board flags (bits 0-15) */ u16 boardflags_hi; /* Board flags (bits 16-31) */ u16 boardflags2_lo; /* Board flags (bits 32-47) */ u16 boardflags2_hi; /* Board flags (bits 48-63) */ - /* TODO store board flags in a single u64 */ struct ssb_sprom_core_pwr_info core_pwr_info[4]; diff --git a/include/linux/wait.h b/include/linux/wait.h index d69ac4ecc88b..1e1bf9f963a9 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -358,6 +358,19 @@ do { \ __ret; \ }) +#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0, \ + cmd1; schedule(); cmd2) +/* + * Just like wait_event_cmd(), except it sets exclusive flag + */ +#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ +do { \ + if (condition) \ + break; \ + __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2); \ +} while (0) + #define __wait_event_cmd(wq, condition, cmd1, cmd2) \ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ cmd1; schedule(); cmd2) diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index c1c23f19d4a2..1ff9942718fe 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -272,6 +272,7 @@ header-y += ncp_fs.h header-y += ncp.h header-y += ncp_mount.h header-y += ncp_no.h +header-y += ndctl.h header-y += neighbour.h header-y += netconf.h header-y += netdevice.h diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h new file mode 100644 index 000000000000..2b94ea2287bb --- /dev/null +++ b/include/uapi/linux/ndctl.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU Lesser General Public License, + * version 2.1, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + */ +#ifndef __NDCTL_H__ +#define __NDCTL_H__ + +#include <linux/types.h> + +struct nd_cmd_smart { + __u32 status; + __u8 data[128]; +} __packed; + +struct nd_cmd_smart_threshold { + __u32 status; + __u8 data[8]; +} __packed; + +struct nd_cmd_dimm_flags { + __u32 status; + __u32 flags; +} __packed; + +struct nd_cmd_get_config_size { + __u32 status; + __u32 config_size; + __u32 max_xfer; +} __packed; + +struct nd_cmd_get_config_data_hdr { + __u32 in_offset; + __u32 in_length; + __u32 status; + __u8 out_buf[0]; +} __packed; + +struct nd_cmd_set_config_hdr { + __u32 in_offset; + __u32 in_length; + __u8 in_buf[0]; +} __packed; + +struct nd_cmd_vendor_hdr { + __u32 opcode; + __u32 in_length; + __u8 in_buf[0]; +} __packed; + +struct nd_cmd_vendor_tail { + __u32 status; + __u32 out_length; + __u8 out_buf[0]; +} __packed; + +struct nd_cmd_ars_cap { + __u64 address; + __u64 length; + __u32 status; + __u32 max_ars_out; +} __packed; + +struct nd_cmd_ars_start { + __u64 address; + __u64 length; + __u16 type; + __u8 reserved[6]; + __u32 status; +} __packed; + +struct nd_cmd_ars_status { + __u32 status; + __u32 out_length; + __u64 address; + __u64 length; + __u16 type; + __u32 num_records; + struct nd_ars_record { + __u32 handle; + __u32 flags; + __u64 err_address; + __u64 mask; + } __packed records[0]; +} __packed; + +enum { + ND_CMD_IMPLEMENTED = 0, + + /* bus commands */ + ND_CMD_ARS_CAP = 1, + ND_CMD_ARS_START = 2, + ND_CMD_ARS_STATUS = 3, + + /* per-dimm commands */ + ND_CMD_SMART = 1, + ND_CMD_SMART_THRESHOLD = 2, + ND_CMD_DIMM_FLAGS = 3, + ND_CMD_GET_CONFIG_SIZE = 4, + ND_CMD_GET_CONFIG_DATA = 5, + ND_CMD_SET_CONFIG_DATA = 6, + ND_CMD_VENDOR_EFFECT_LOG_SIZE = 7, + ND_CMD_VENDOR_EFFECT_LOG = 8, + ND_CMD_VENDOR = 9, +}; + +static inline const char *nvdimm_bus_cmd_name(unsigned cmd) +{ + static const char * const names[] = { + [ND_CMD_ARS_CAP] = "ars_cap", + [ND_CMD_ARS_START] = "ars_start", + [ND_CMD_ARS_STATUS] = "ars_status", + }; + + if (cmd < ARRAY_SIZE(names) && names[cmd]) + return names[cmd]; + return "unknown"; +} + +static inline const char *nvdimm_cmd_name(unsigned cmd) +{ + static const char * const names[] = { + [ND_CMD_SMART] = "smart", + [ND_CMD_SMART_THRESHOLD] = "smart_thresh", + [ND_CMD_DIMM_FLAGS] = "flags", + [ND_CMD_GET_CONFIG_SIZE] = "get_size", + [ND_CMD_GET_CONFIG_DATA] = "get_data", + [ND_CMD_SET_CONFIG_DATA] = "set_data", + [ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size", + [ND_CMD_VENDOR_EFFECT_LOG] = "effect_log", + [ND_CMD_VENDOR] = "vendor", + }; + + if (cmd < ARRAY_SIZE(names) && names[cmd]) + return names[cmd]; + return "unknown"; +} + +#define ND_IOCTL 'N' + +#define ND_IOCTL_SMART _IOWR(ND_IOCTL, ND_CMD_SMART,\ + struct nd_cmd_smart) + +#define ND_IOCTL_SMART_THRESHOLD _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\ + struct nd_cmd_smart_threshold) + +#define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\ + struct nd_cmd_dimm_flags) + +#define ND_IOCTL_GET_CONFIG_SIZE _IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_SIZE,\ + struct nd_cmd_get_config_size) + +#define ND_IOCTL_GET_CONFIG_DATA _IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_DATA,\ + struct nd_cmd_get_config_data_hdr) + +#define ND_IOCTL_SET_CONFIG_DATA _IOWR(ND_IOCTL, ND_CMD_SET_CONFIG_DATA,\ + struct nd_cmd_set_config_hdr) + +#define ND_IOCTL_VENDOR _IOWR(ND_IOCTL, ND_CMD_VENDOR,\ + struct nd_cmd_vendor_hdr) + +#define ND_IOCTL_ARS_CAP _IOWR(ND_IOCTL, ND_CMD_ARS_CAP,\ + struct nd_cmd_ars_cap) + +#define ND_IOCTL_ARS_START _IOWR(ND_IOCTL, ND_CMD_ARS_START,\ + struct nd_cmd_ars_start) + +#define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\ + struct nd_cmd_ars_status) + +#define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */ +#define ND_DEVICE_REGION_PMEM 2 /* nd_region: (parent of PMEM namespaces) */ +#define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ +#define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ +#define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ +#define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ + +enum nd_driver_flags { + ND_DRIVER_DIMM = 1 << ND_DEVICE_DIMM, + ND_DRIVER_REGION_PMEM = 1 << ND_DEVICE_REGION_PMEM, + ND_DRIVER_REGION_BLK = 1 << ND_DEVICE_REGION_BLK, + ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, + ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, + ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, +}; + +enum { + ND_MIN_NAMESPACE_SIZE = 0x00400000, +}; +#endif /* __NDCTL_H__ */ diff --git a/kernel/audit.c b/kernel/audit.c index 1c13e4267de6..f9e6065346db 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1904,7 +1904,7 @@ EXPORT_SYMBOL(audit_log_task_info); /** * audit_log_link_denied - report a link restriction denial - * @operation: specific link opreation + * @operation: specific link operation * @link: the path that triggered the restriction */ void audit_log_link_denied(const char *operation, struct path *link) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9fb9d1cb83ce..09c65640cad6 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -599,9 +599,7 @@ static int audit_filter_rules(struct task_struct *tsk, result = match_tree_refs(ctx, rule->tree); break; case AUDIT_LOGINUID: - result = 0; - if (ctx) - result = audit_uid_comparator(tsk->loginuid, f->op, f->uid); + result = audit_uid_comparator(tsk->loginuid, f->op, f->uid); break; case AUDIT_LOGINUID_SET: result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val); @@ -1023,7 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (unlikely((len == -1) || len > MAX_ARG_STRLEN - 1)) { + if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) { WARN_ON(1); send_sig(SIGKILL, current, 0); return -1; diff --git a/kernel/signal.c b/kernel/signal.c index f19833b5db3c..836df8dac6cc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -414,21 +414,16 @@ void flush_sigqueue(struct sigpending *queue) } /* - * Flush all pending signals for a task. + * Flush all pending signals for this kthread. */ -void __flush_signals(struct task_struct *t) -{ - clear_tsk_thread_flag(t, TIF_SIGPENDING); - flush_sigqueue(&t->pending); - flush_sigqueue(&t->signal->shared_pending); -} - void flush_signals(struct task_struct *t) { unsigned long flags; spin_lock_irqsave(&t->sighand->siglock, flags); - __flush_signals(t); + clear_tsk_thread_flag(t, TIF_SIGPENDING); + flush_sigqueue(&t->pending); + flush_sigqueue(&t->signal->shared_pending); spin_unlock_irqrestore(&t->sighand->siglock, flags); } diff --git a/lib/Kconfig b/lib/Kconfig index 34e332b8d326..3a2ef67db6c7 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -528,4 +528,7 @@ source "lib/fonts/Kconfig" config ARCH_HAS_SG_CHAIN def_bool n +config ARCH_HAS_PMEM_API + bool + endmenu diff --git a/mm/slab_common.c b/mm/slab_common.c index 9f8d71f78404..983b78694c46 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -855,6 +855,12 @@ void __init setup_kmalloc_cache_index_table(void) } } +static void new_kmalloc_cache(int idx, unsigned long flags) +{ + kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, + kmalloc_info[idx].size, flags); +} + /* * Create the kmalloc array. Some of the regular kmalloc arrays * may already have been created because they were needed to @@ -864,25 +870,19 @@ void __init create_kmalloc_caches(unsigned long flags) { int i; - for (i = KMALLOC_LOOP_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - if (!kmalloc_caches[i]) { - kmalloc_caches[i] = create_kmalloc_cache( - kmalloc_info[i].name, - kmalloc_info[i].size, - flags); - } + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { + if (!kmalloc_caches[i]) + new_kmalloc_cache(i, flags); /* - * "i == 2" is the "kmalloc-192" case which is the last special - * case for initialization and it's the point to jump to - * allocate the minimize size of the object. In slab allocator, - * the KMALLOC_SHIFT_LOW = 5. So, it needs to skip 2^3 and 2^4 - * and go straight to allocate 2^5. If the ARCH_DMA_MINALIGN is - * defined, it may be larger than 2^5 and here is also the - * trick to skip the empty gap. + * Caches that are not of the two-to-the-power-of size. + * These have to be created immediately after the + * earlier power of two caches */ - if (i == 2) - i = (KMALLOC_SHIFT_LOW - 1); + if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) + new_kmalloc_cache(1, flags); + if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) + new_kmalloc_cache(2, flags); } /* Kmalloc array is now usable */ diff --git a/security/Makefile b/security/Makefile index 05f1c934d74b..c9bfbc84ff50 100644 --- a/security/Makefile +++ b/security/Makefile @@ -14,7 +14,7 @@ obj-y += commoncap.o obj-$(CONFIG_MMU) += min_addr.o # Object file lists -obj-$(CONFIG_SECURITY) += security.o capability.o +obj-$(CONFIG_SECURITY) += security.o obj-$(CONFIG_SECURITYFS) += inode.o obj-$(CONFIG_SECURITY_SELINUX) += selinux/ obj-$(CONFIG_SECURITY_SMACK) += smack/ diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index d97cba3e3849..dc0027b28b04 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -347,9 +347,7 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm) file_inode(bprm->file)->i_mode }; const char *name = NULL, *target = NULL, *info = NULL; - int error = cap_bprm_set_creds(bprm); - if (error) - return error; + int error = 0; if (bprm->cred_prepared) return 0; @@ -531,15 +529,13 @@ cleanup: */ int apparmor_bprm_secureexec(struct linux_binprm *bprm) { - int ret = cap_bprm_secureexec(bprm); - /* the decision to use secure exec is computed in set_creds * and stored in bprm->unsafe. */ - if (!ret && (bprm->unsafe & AA_SECURE_X_NEEDED)) - ret = 1; + if (bprm->unsafe & AA_SECURE_X_NEEDED) + return 1; - return ret; + return 0; } /** diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index e5f1561439db..5696874e8062 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -12,7 +12,7 @@ * License. */ -#include <linux/security.h> +#include <linux/lsm_hooks.h> #include <linux/moduleparam.h> #include <linux/mm.h> #include <linux/mman.h> @@ -96,19 +96,11 @@ static void apparmor_cred_transfer(struct cred *new, const struct cred *old) static int apparmor_ptrace_access_check(struct task_struct *child, unsigned int mode) { - int error = cap_ptrace_access_check(child, mode); - if (error) - return error; - return aa_ptrace(current, child, mode); } static int apparmor_ptrace_traceme(struct task_struct *parent) { - int error = cap_ptrace_traceme(parent); - if (error) - return error; - return aa_ptrace(parent, current, PTRACE_MODE_ATTACH); } @@ -123,10 +115,10 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective, cred = __task_cred(target); profile = aa_cred_profile(cred); - *effective = cred->cap_effective; - *inheritable = cred->cap_inheritable; - *permitted = cred->cap_permitted; - + /* + * cap_capget is stacked ahead of this and will + * initialize effective and permitted. + */ if (!unconfined(profile) && !COMPLAIN_MODE(profile)) { *effective = cap_intersect(*effective, profile->caps.allow); *permitted = cap_intersect(*permitted, profile->caps.allow); @@ -140,13 +132,11 @@ static int apparmor_capable(const struct cred *cred, struct user_namespace *ns, int cap, int audit) { struct aa_profile *profile; - /* cap_capable returns 0 on success, else -EPERM */ - int error = cap_capable(cred, ns, cap, audit); - if (!error) { - profile = aa_cred_profile(cred); - if (!unconfined(profile)) - error = aa_capable(profile, cap, audit); - } + int error = 0; + + profile = aa_cred_profile(cred); + if (!unconfined(profile)) + error = aa_capable(profile, cap, audit); return error; } @@ -615,49 +605,46 @@ static int apparmor_task_setrlimit(struct task_struct *task, return error; } -static struct security_operations apparmor_ops = { - .name = "apparmor", - - .ptrace_access_check = apparmor_ptrace_access_check, - .ptrace_traceme = apparmor_ptrace_traceme, - .capget = apparmor_capget, - .capable = apparmor_capable, - - .path_link = apparmor_path_link, - .path_unlink = apparmor_path_unlink, - .path_symlink = apparmor_path_symlink, - .path_mkdir = apparmor_path_mkdir, - .path_rmdir = apparmor_path_rmdir, - .path_mknod = apparmor_path_mknod, - .path_rename = apparmor_path_rename, - .path_chmod = apparmor_path_chmod, - .path_chown = apparmor_path_chown, - .path_truncate = apparmor_path_truncate, - .inode_getattr = apparmor_inode_getattr, - - .file_open = apparmor_file_open, - .file_permission = apparmor_file_permission, - .file_alloc_security = apparmor_file_alloc_security, - .file_free_security = apparmor_file_free_security, - .mmap_file = apparmor_mmap_file, - .mmap_addr = cap_mmap_addr, - .file_mprotect = apparmor_file_mprotect, - .file_lock = apparmor_file_lock, - - .getprocattr = apparmor_getprocattr, - .setprocattr = apparmor_setprocattr, - - .cred_alloc_blank = apparmor_cred_alloc_blank, - .cred_free = apparmor_cred_free, - .cred_prepare = apparmor_cred_prepare, - .cred_transfer = apparmor_cred_transfer, - - .bprm_set_creds = apparmor_bprm_set_creds, - .bprm_committing_creds = apparmor_bprm_committing_creds, - .bprm_committed_creds = apparmor_bprm_committed_creds, - .bprm_secureexec = apparmor_bprm_secureexec, - - .task_setrlimit = apparmor_task_setrlimit, +static struct security_hook_list apparmor_hooks[] = { + LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), + LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), + LSM_HOOK_INIT(capget, apparmor_capget), + LSM_HOOK_INIT(capable, apparmor_capable), + + LSM_HOOK_INIT(path_link, apparmor_path_link), + LSM_HOOK_INIT(path_unlink, apparmor_path_unlink), + LSM_HOOK_INIT(path_symlink, apparmor_path_symlink), + LSM_HOOK_INIT(path_mkdir, apparmor_path_mkdir), + LSM_HOOK_INIT(path_rmdir, apparmor_path_rmdir), + LSM_HOOK_INIT(path_mknod, apparmor_path_mknod), + LSM_HOOK_INIT(path_rename, apparmor_path_rename), + LSM_HOOK_INIT(path_chmod, apparmor_path_chmod), + LSM_HOOK_INIT(path_chown, apparmor_path_chown), + LSM_HOOK_INIT(path_truncate, apparmor_path_truncate), + LSM_HOOK_INIT(inode_getattr, apparmor_inode_getattr), + + LSM_HOOK_INIT(file_open, apparmor_file_open), + LSM_HOOK_INIT(file_permission, apparmor_file_permission), + LSM_HOOK_INIT(file_alloc_security, apparmor_file_alloc_security), + LSM_HOOK_INIT(file_free_security, apparmor_file_free_security), + LSM_HOOK_INIT(mmap_file, apparmor_mmap_file), + LSM_HOOK_INIT(file_mprotect, apparmor_file_mprotect), + LSM_HOOK_INIT(file_lock, apparmor_file_lock), + + LSM_HOOK_INIT(getprocattr, apparmor_getprocattr), + LSM_HOOK_INIT(setprocattr, apparmor_setprocattr), + + LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank), + LSM_HOOK_INIT(cred_free, apparmor_cred_free), + LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare), + LSM_HOOK_INIT(cred_transfer, apparmor_cred_transfer), + + LSM_HOOK_INIT(bprm_set_creds, apparmor_bprm_set_creds), + LSM_HOOK_INIT(bprm_committing_creds, apparmor_bprm_committing_creds), + LSM_HOOK_INIT(bprm_committed_creds, apparmor_bprm_committed_creds), + LSM_HOOK_INIT(bprm_secureexec, apparmor_bprm_secureexec), + + LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit), }; /* @@ -898,7 +885,7 @@ static int __init apparmor_init(void) { int error; - if (!apparmor_enabled || !security_module_enable(&apparmor_ops)) { + if (!apparmor_enabled || !security_module_enable("apparmor")) { aa_info_message("AppArmor disabled by boot time parameter"); apparmor_enabled = 0; return 0; @@ -913,17 +900,10 @@ static int __init apparmor_init(void) error = set_init_cxt(); if (error) { AA_ERROR("Failed to set context on init task\n"); - goto register_security_out; - } - - error = register_security(&apparmor_ops); - if (error) { - struct cred *cred = (struct cred *)current->real_cred; - aa_free_task_context(cred_cxt(cred)); - cred_cxt(cred) = NULL; - AA_ERROR("Unable to register AppArmor\n"); - goto register_security_out; + aa_free_root_ns(); + goto alloc_out; } + security_add_hooks(apparmor_hooks, ARRAY_SIZE(apparmor_hooks)); /* Report that AppArmor successfully initialized */ apparmor_initialized = 1; @@ -936,9 +916,6 @@ static int __init apparmor_init(void) return error; -register_security_out: - aa_free_root_ns(); - alloc_out: aa_destroy_aafs(); diff --git a/security/capability.c b/security/capability.c deleted file mode 100644 index 7d3f38fe02ba..000000000000 --- a/security/capability.c +++ /dev/null @@ -1,1158 +0,0 @@ -/* - * Capabilities Linux Security Module - * - * This is the default security module in case no other module is loaded. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - */ - -#include <linux/security.h> - -static int cap_binder_set_context_mgr(struct task_struct *mgr) -{ - return 0; -} - -static int cap_binder_transaction(struct task_struct *from, - struct task_struct *to) -{ - return 0; -} - -static int cap_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) -{ - return 0; -} - -static int cap_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file) -{ - return 0; -} - -static int cap_syslog(int type) -{ - return 0; -} - -static int cap_quotactl(int cmds, int type, int id, struct super_block *sb) -{ - return 0; -} - -static int cap_quota_on(struct dentry *dentry) -{ - return 0; -} - -static int cap_bprm_check_security(struct linux_binprm *bprm) -{ - return 0; -} - -static void cap_bprm_committing_creds(struct linux_binprm *bprm) -{ -} - -static void cap_bprm_committed_creds(struct linux_binprm *bprm) -{ -} - -static int cap_sb_alloc_security(struct super_block *sb) -{ - return 0; -} - -static void cap_sb_free_security(struct super_block *sb) -{ -} - -static int cap_sb_copy_data(char *orig, char *copy) -{ - return 0; -} - -static int cap_sb_remount(struct super_block *sb, void *data) -{ - return 0; -} - -static int cap_sb_kern_mount(struct super_block *sb, int flags, void *data) -{ - return 0; -} - -static int cap_sb_show_options(struct seq_file *m, struct super_block *sb) -{ - return 0; -} - -static int cap_sb_statfs(struct dentry *dentry) -{ - return 0; -} - -static int cap_sb_mount(const char *dev_name, struct path *path, - const char *type, unsigned long flags, void *data) -{ - return 0; -} - -static int cap_sb_umount(struct vfsmount *mnt, int flags) -{ - return 0; -} - -static int cap_sb_pivotroot(struct path *old_path, struct path *new_path) -{ - return 0; -} - -static int cap_sb_set_mnt_opts(struct super_block *sb, - struct security_mnt_opts *opts, - unsigned long kern_flags, - unsigned long *set_kern_flags) - -{ - if (unlikely(opts->num_mnt_opts)) - return -EOPNOTSUPP; - return 0; -} - -static int cap_sb_clone_mnt_opts(const struct super_block *oldsb, - struct super_block *newsb) -{ - return 0; -} - -static int cap_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) -{ - return 0; -} - -static int cap_dentry_init_security(struct dentry *dentry, int mode, - struct qstr *name, void **ctx, - u32 *ctxlen) -{ - return -EOPNOTSUPP; -} - -static int cap_inode_alloc_security(struct inode *inode) -{ - return 0; -} - -static void cap_inode_free_security(struct inode *inode) -{ -} - -static int cap_inode_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, const char **name, - void **value, size_t *len) -{ - return -EOPNOTSUPP; -} - -static int cap_inode_create(struct inode *inode, struct dentry *dentry, - umode_t mask) -{ - return 0; -} - -static int cap_inode_link(struct dentry *old_dentry, struct inode *inode, - struct dentry *new_dentry) -{ - return 0; -} - -static int cap_inode_unlink(struct inode *inode, struct dentry *dentry) -{ - return 0; -} - -static int cap_inode_symlink(struct inode *inode, struct dentry *dentry, - const char *name) -{ - return 0; -} - -static int cap_inode_mkdir(struct inode *inode, struct dentry *dentry, - umode_t mask) -{ - return 0; -} - -static int cap_inode_rmdir(struct inode *inode, struct dentry *dentry) -{ - return 0; -} - -static int cap_inode_mknod(struct inode *inode, struct dentry *dentry, - umode_t mode, dev_t dev) -{ - return 0; -} - -static int cap_inode_rename(struct inode *old_inode, struct dentry *old_dentry, - struct inode *new_inode, struct dentry *new_dentry) -{ - return 0; -} - -static int cap_inode_readlink(struct dentry *dentry) -{ - return 0; -} - -static int cap_inode_follow_link(struct dentry *dentry, struct inode *inode, - bool rcu) -{ - return 0; -} - -static int cap_inode_permission(struct inode *inode, int mask) -{ - return 0; -} - -static int cap_inode_setattr(struct dentry *dentry, struct iattr *iattr) -{ - return 0; -} - -static int cap_inode_getattr(const struct path *path) -{ - return 0; -} - -static void cap_inode_post_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) -{ -} - -static int cap_inode_getxattr(struct dentry *dentry, const char *name) -{ - return 0; -} - -static int cap_inode_listxattr(struct dentry *dentry) -{ - return 0; -} - -static int cap_inode_getsecurity(const struct inode *inode, const char *name, - void **buffer, bool alloc) -{ - return -EOPNOTSUPP; -} - -static int cap_inode_setsecurity(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static int cap_inode_listsecurity(struct inode *inode, char *buffer, - size_t buffer_size) -{ - return 0; -} - -static void cap_inode_getsecid(const struct inode *inode, u32 *secid) -{ - *secid = 0; -} - -#ifdef CONFIG_SECURITY_PATH -static int cap_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, - unsigned int dev) -{ - return 0; -} - -static int cap_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode) -{ - return 0; -} - -static int cap_path_rmdir(struct path *dir, struct dentry *dentry) -{ - return 0; -} - -static int cap_path_unlink(struct path *dir, struct dentry *dentry) -{ - return 0; -} - -static int cap_path_symlink(struct path *dir, struct dentry *dentry, - const char *old_name) -{ - return 0; -} - -static int cap_path_link(struct dentry *old_dentry, struct path *new_dir, - struct dentry *new_dentry) -{ - return 0; -} - -static int cap_path_rename(struct path *old_path, struct dentry *old_dentry, - struct path *new_path, struct dentry *new_dentry) -{ - return 0; -} - -static int cap_path_truncate(struct path *path) -{ - return 0; -} - -static int cap_path_chmod(struct path *path, umode_t mode) -{ - return 0; -} - -static int cap_path_chown(struct path *path, kuid_t uid, kgid_t gid) -{ - return 0; -} - -static int cap_path_chroot(struct path *root) -{ - return 0; -} -#endif - -static int cap_file_permission(struct file *file, int mask) -{ - return 0; -} - -static int cap_file_alloc_security(struct file *file) -{ - return 0; -} - -static void cap_file_free_security(struct file *file) -{ -} - -static int cap_file_ioctl(struct file *file, unsigned int command, - unsigned long arg) -{ - return 0; -} - -static int cap_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, - unsigned long prot) -{ - return 0; -} - -static int cap_file_lock(struct file *file, unsigned int cmd) -{ - return 0; -} - -static int cap_file_fcntl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - return 0; -} - -static void cap_file_set_fowner(struct file *file) -{ - return; -} - -static int cap_file_send_sigiotask(struct task_struct *tsk, - struct fown_struct *fown, int sig) -{ - return 0; -} - -static int cap_file_receive(struct file *file) -{ - return 0; -} - -static int cap_file_open(struct file *file, const struct cred *cred) -{ - return 0; -} - -static int cap_task_create(unsigned long clone_flags) -{ - return 0; -} - -static void cap_task_free(struct task_struct *task) -{ -} - -static int cap_cred_alloc_blank(struct cred *cred, gfp_t gfp) -{ - return 0; -} - -static void cap_cred_free(struct cred *cred) -{ -} - -static int cap_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) -{ - return 0; -} - -static void cap_cred_transfer(struct cred *new, const struct cred *old) -{ -} - -static int cap_kernel_act_as(struct cred *new, u32 secid) -{ - return 0; -} - -static int cap_kernel_create_files_as(struct cred *new, struct inode *inode) -{ - return 0; -} - -static int cap_kernel_fw_from_file(struct file *file, char *buf, size_t size) -{ - return 0; -} - -static int cap_kernel_module_request(char *kmod_name) -{ - return 0; -} - -static int cap_kernel_module_from_file(struct file *file) -{ - return 0; -} - -static int cap_task_setpgid(struct task_struct *p, pid_t pgid) -{ - return 0; -} - -static int cap_task_getpgid(struct task_struct *p) -{ - return 0; -} - -static int cap_task_getsid(struct task_struct *p) -{ - return 0; -} - -static void cap_task_getsecid(struct task_struct *p, u32 *secid) -{ - *secid = 0; -} - -static int cap_task_getioprio(struct task_struct *p) -{ - return 0; -} - -static int cap_task_setrlimit(struct task_struct *p, unsigned int resource, - struct rlimit *new_rlim) -{ - return 0; -} - -static int cap_task_getscheduler(struct task_struct *p) -{ - return 0; -} - -static int cap_task_movememory(struct task_struct *p) -{ - return 0; -} - -static int cap_task_wait(struct task_struct *p) -{ - return 0; -} - -static int cap_task_kill(struct task_struct *p, struct siginfo *info, - int sig, u32 secid) -{ - return 0; -} - -static void cap_task_to_inode(struct task_struct *p, struct inode *inode) -{ -} - -static int cap_ipc_permission(struct kern_ipc_perm *ipcp, short flag) -{ - return 0; -} - -static void cap_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) -{ - *secid = 0; -} - -static int cap_msg_msg_alloc_security(struct msg_msg *msg) -{ - return 0; -} - -static void cap_msg_msg_free_security(struct msg_msg *msg) -{ -} - -static int cap_msg_queue_alloc_security(struct msg_queue *msq) -{ - return 0; -} - -static void cap_msg_queue_free_security(struct msg_queue *msq) -{ -} - -static int cap_msg_queue_associate(struct msg_queue *msq, int msqflg) -{ - return 0; -} - -static int cap_msg_queue_msgctl(struct msg_queue *msq, int cmd) -{ - return 0; -} - -static int cap_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, - int msgflg) -{ - return 0; -} - -static int cap_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, - struct task_struct *target, long type, int mode) -{ - return 0; -} - -static int cap_shm_alloc_security(struct shmid_kernel *shp) -{ - return 0; -} - -static void cap_shm_free_security(struct shmid_kernel *shp) -{ -} - -static int cap_shm_associate(struct shmid_kernel *shp, int shmflg) -{ - return 0; -} - -static int cap_shm_shmctl(struct shmid_kernel *shp, int cmd) -{ - return 0; -} - -static int cap_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, - int shmflg) -{ - return 0; -} - -static int cap_sem_alloc_security(struct sem_array *sma) -{ - return 0; -} - -static void cap_sem_free_security(struct sem_array *sma) -{ -} - -static int cap_sem_associate(struct sem_array *sma, int semflg) -{ - return 0; -} - -static int cap_sem_semctl(struct sem_array *sma, int cmd) -{ - return 0; -} - -static int cap_sem_semop(struct sem_array *sma, struct sembuf *sops, - unsigned nsops, int alter) -{ - return 0; -} - -#ifdef CONFIG_SECURITY_NETWORK -static int cap_unix_stream_connect(struct sock *sock, struct sock *other, - struct sock *newsk) -{ - return 0; -} - -static int cap_unix_may_send(struct socket *sock, struct socket *other) -{ - return 0; -} - -static int cap_socket_create(int family, int type, int protocol, int kern) -{ - return 0; -} - -static int cap_socket_post_create(struct socket *sock, int family, int type, - int protocol, int kern) -{ - return 0; -} - -static int cap_socket_bind(struct socket *sock, struct sockaddr *address, - int addrlen) -{ - return 0; -} - -static int cap_socket_connect(struct socket *sock, struct sockaddr *address, - int addrlen) -{ - return 0; -} - -static int cap_socket_listen(struct socket *sock, int backlog) -{ - return 0; -} - -static int cap_socket_accept(struct socket *sock, struct socket *newsock) -{ - return 0; -} - -static int cap_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) -{ - return 0; -} - -static int cap_socket_recvmsg(struct socket *sock, struct msghdr *msg, - int size, int flags) -{ - return 0; -} - -static int cap_socket_getsockname(struct socket *sock) -{ - return 0; -} - -static int cap_socket_getpeername(struct socket *sock) -{ - return 0; -} - -static int cap_socket_setsockopt(struct socket *sock, int level, int optname) -{ - return 0; -} - -static int cap_socket_getsockopt(struct socket *sock, int level, int optname) -{ - return 0; -} - -static int cap_socket_shutdown(struct socket *sock, int how) -{ - return 0; -} - -static int cap_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - -static int cap_socket_getpeersec_stream(struct socket *sock, - char __user *optval, - int __user *optlen, unsigned len) -{ - return -ENOPROTOOPT; -} - -static int cap_socket_getpeersec_dgram(struct socket *sock, - struct sk_buff *skb, u32 *secid) -{ - return -ENOPROTOOPT; -} - -static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority) -{ - return 0; -} - -static void cap_sk_free_security(struct sock *sk) -{ -} - -static void cap_sk_clone_security(const struct sock *sk, struct sock *newsk) -{ -} - -static void cap_sk_getsecid(struct sock *sk, u32 *secid) -{ -} - -static void cap_sock_graft(struct sock *sk, struct socket *parent) -{ -} - -static int cap_inet_conn_request(struct sock *sk, struct sk_buff *skb, - struct request_sock *req) -{ - return 0; -} - -static void cap_inet_csk_clone(struct sock *newsk, - const struct request_sock *req) -{ -} - -static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb) -{ -} - -static int cap_secmark_relabel_packet(u32 secid) -{ - return 0; -} - -static void cap_secmark_refcount_inc(void) -{ -} - -static void cap_secmark_refcount_dec(void) -{ -} - -static void cap_req_classify_flow(const struct request_sock *req, - struct flowi *fl) -{ -} - -static int cap_tun_dev_alloc_security(void **security) -{ - return 0; -} - -static void cap_tun_dev_free_security(void *security) -{ -} - -static int cap_tun_dev_create(void) -{ - return 0; -} - -static int cap_tun_dev_attach_queue(void *security) -{ - return 0; -} - -static int cap_tun_dev_attach(struct sock *sk, void *security) -{ - return 0; -} - -static int cap_tun_dev_open(void *security) -{ - return 0; -} -#endif /* CONFIG_SECURITY_NETWORK */ - -#ifdef CONFIG_SECURITY_NETWORK_XFRM -static int cap_xfrm_policy_alloc_security(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx, - gfp_t gfp) -{ - return 0; -} - -static int cap_xfrm_policy_clone_security(struct xfrm_sec_ctx *old_ctx, - struct xfrm_sec_ctx **new_ctxp) -{ - return 0; -} - -static void cap_xfrm_policy_free_security(struct xfrm_sec_ctx *ctx) -{ -} - -static int cap_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx) -{ - return 0; -} - -static int cap_xfrm_state_alloc(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx) -{ - return 0; -} - -static int cap_xfrm_state_alloc_acquire(struct xfrm_state *x, - struct xfrm_sec_ctx *polsec, - u32 secid) -{ - return 0; -} - -static void cap_xfrm_state_free_security(struct xfrm_state *x) -{ -} - -static int cap_xfrm_state_delete_security(struct xfrm_state *x) -{ - return 0; -} - -static int cap_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 sk_sid, u8 dir) -{ - return 0; -} - -static int cap_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, - const struct flowi *fl) -{ - return 1; -} - -static int cap_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall) -{ - return 0; -} - -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ -static void cap_d_instantiate(struct dentry *dentry, struct inode *inode) -{ -} - -static int cap_getprocattr(struct task_struct *p, char *name, char **value) -{ - return -EINVAL; -} - -static int cap_setprocattr(struct task_struct *p, char *name, void *value, - size_t size) -{ - return -EINVAL; -} - -static int cap_ismaclabel(const char *name) -{ - return 0; -} - -static int cap_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) -{ - return -EOPNOTSUPP; -} - -static int cap_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) -{ - *secid = 0; - return 0; -} - -static void cap_release_secctx(char *secdata, u32 seclen) -{ -} - -static int cap_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen) -{ - return 0; -} - -static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) -{ - return 0; -} - -static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) -{ - return -EOPNOTSUPP; -} -#ifdef CONFIG_KEYS -static int cap_key_alloc(struct key *key, const struct cred *cred, - unsigned long flags) -{ - return 0; -} - -static void cap_key_free(struct key *key) -{ -} - -static int cap_key_permission(key_ref_t key_ref, const struct cred *cred, - unsigned perm) -{ - return 0; -} - -static int cap_key_getsecurity(struct key *key, char **_buffer) -{ - *_buffer = NULL; - return 0; -} - -#endif /* CONFIG_KEYS */ - -#ifdef CONFIG_AUDIT -static int cap_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule) -{ - return 0; -} - -static int cap_audit_rule_known(struct audit_krule *krule) -{ - return 0; -} - -static int cap_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule, - struct audit_context *actx) -{ - return 0; -} - -static void cap_audit_rule_free(void *lsmrule) -{ -} -#endif /* CONFIG_AUDIT */ - -#define set_to_cap_if_null(ops, function) \ - do { \ - if (!ops->function) { \ - ops->function = cap_##function; \ - pr_debug("Had to override the " #function \ - " security operation with the default.\n");\ - } \ - } while (0) - -void __init security_fixup_ops(struct security_operations *ops) -{ - set_to_cap_if_null(ops, binder_set_context_mgr); - set_to_cap_if_null(ops, binder_transaction); - set_to_cap_if_null(ops, binder_transfer_binder); - set_to_cap_if_null(ops, binder_transfer_file); - set_to_cap_if_null(ops, ptrace_access_check); - set_to_cap_if_null(ops, ptrace_traceme); - set_to_cap_if_null(ops, capget); - set_to_cap_if_null(ops, capset); - set_to_cap_if_null(ops, capable); - set_to_cap_if_null(ops, quotactl); - set_to_cap_if_null(ops, quota_on); - set_to_cap_if_null(ops, syslog); - set_to_cap_if_null(ops, settime); - set_to_cap_if_null(ops, vm_enough_memory); - set_to_cap_if_null(ops, bprm_set_creds); - set_to_cap_if_null(ops, bprm_committing_creds); - set_to_cap_if_null(ops, bprm_committed_creds); - set_to_cap_if_null(ops, bprm_check_security); - set_to_cap_if_null(ops, bprm_secureexec); - set_to_cap_if_null(ops, sb_alloc_security); - set_to_cap_if_null(ops, sb_free_security); - set_to_cap_if_null(ops, sb_copy_data); - set_to_cap_if_null(ops, sb_remount); - set_to_cap_if_null(ops, sb_kern_mount); - set_to_cap_if_null(ops, sb_show_options); - set_to_cap_if_null(ops, sb_statfs); - set_to_cap_if_null(ops, sb_mount); - set_to_cap_if_null(ops, sb_umount); - set_to_cap_if_null(ops, sb_pivotroot); - set_to_cap_if_null(ops, sb_set_mnt_opts); - set_to_cap_if_null(ops, sb_clone_mnt_opts); - set_to_cap_if_null(ops, sb_parse_opts_str); - set_to_cap_if_null(ops, dentry_init_security); - set_to_cap_if_null(ops, inode_alloc_security); - set_to_cap_if_null(ops, inode_free_security); - set_to_cap_if_null(ops, inode_init_security); - set_to_cap_if_null(ops, inode_create); - set_to_cap_if_null(ops, inode_link); - set_to_cap_if_null(ops, inode_unlink); - set_to_cap_if_null(ops, inode_symlink); - set_to_cap_if_null(ops, inode_mkdir); - set_to_cap_if_null(ops, inode_rmdir); - set_to_cap_if_null(ops, inode_mknod); - set_to_cap_if_null(ops, inode_rename); - set_to_cap_if_null(ops, inode_readlink); - set_to_cap_if_null(ops, inode_follow_link); - set_to_cap_if_null(ops, inode_permission); - set_to_cap_if_null(ops, inode_setattr); - set_to_cap_if_null(ops, inode_getattr); - set_to_cap_if_null(ops, inode_setxattr); - set_to_cap_if_null(ops, inode_post_setxattr); - set_to_cap_if_null(ops, inode_getxattr); - set_to_cap_if_null(ops, inode_listxattr); - set_to_cap_if_null(ops, inode_removexattr); - set_to_cap_if_null(ops, inode_need_killpriv); - set_to_cap_if_null(ops, inode_killpriv); - set_to_cap_if_null(ops, inode_getsecurity); - set_to_cap_if_null(ops, inode_setsecurity); - set_to_cap_if_null(ops, inode_listsecurity); - set_to_cap_if_null(ops, inode_getsecid); -#ifdef CONFIG_SECURITY_PATH - set_to_cap_if_null(ops, path_mknod); - set_to_cap_if_null(ops, path_mkdir); - set_to_cap_if_null(ops, path_rmdir); - set_to_cap_if_null(ops, path_unlink); - set_to_cap_if_null(ops, path_symlink); - set_to_cap_if_null(ops, path_link); - set_to_cap_if_null(ops, path_rename); - set_to_cap_if_null(ops, path_truncate); - set_to_cap_if_null(ops, path_chmod); - set_to_cap_if_null(ops, path_chown); - set_to_cap_if_null(ops, path_chroot); -#endif - set_to_cap_if_null(ops, file_permission); - set_to_cap_if_null(ops, file_alloc_security); - set_to_cap_if_null(ops, file_free_security); - set_to_cap_if_null(ops, file_ioctl); - set_to_cap_if_null(ops, mmap_addr); - set_to_cap_if_null(ops, mmap_file); - set_to_cap_if_null(ops, file_mprotect); - set_to_cap_if_null(ops, file_lock); - set_to_cap_if_null(ops, file_fcntl); - set_to_cap_if_null(ops, file_set_fowner); - set_to_cap_if_null(ops, file_send_sigiotask); - set_to_cap_if_null(ops, file_receive); - set_to_cap_if_null(ops, file_open); - set_to_cap_if_null(ops, task_create); - set_to_cap_if_null(ops, task_free); - set_to_cap_if_null(ops, cred_alloc_blank); - set_to_cap_if_null(ops, cred_free); - set_to_cap_if_null(ops, cred_prepare); - set_to_cap_if_null(ops, cred_transfer); - set_to_cap_if_null(ops, kernel_act_as); - set_to_cap_if_null(ops, kernel_create_files_as); - set_to_cap_if_null(ops, kernel_fw_from_file); - set_to_cap_if_null(ops, kernel_module_request); - set_to_cap_if_null(ops, kernel_module_from_file); - set_to_cap_if_null(ops, task_fix_setuid); - set_to_cap_if_null(ops, task_setpgid); - set_to_cap_if_null(ops, task_getpgid); - set_to_cap_if_null(ops, task_getsid); - set_to_cap_if_null(ops, task_getsecid); - set_to_cap_if_null(ops, task_setnice); - set_to_cap_if_null(ops, task_setioprio); - set_to_cap_if_null(ops, task_getioprio); - set_to_cap_if_null(ops, task_setrlimit); - set_to_cap_if_null(ops, task_setscheduler); - set_to_cap_if_null(ops, task_getscheduler); - set_to_cap_if_null(ops, task_movememory); - set_to_cap_if_null(ops, task_wait); - set_to_cap_if_null(ops, task_kill); - set_to_cap_if_null(ops, task_prctl); - set_to_cap_if_null(ops, task_to_inode); - set_to_cap_if_null(ops, ipc_permission); - set_to_cap_if_null(ops, ipc_getsecid); - set_to_cap_if_null(ops, msg_msg_alloc_security); - set_to_cap_if_null(ops, msg_msg_free_security); - set_to_cap_if_null(ops, msg_queue_alloc_security); - set_to_cap_if_null(ops, msg_queue_free_security); - set_to_cap_if_null(ops, msg_queue_associate); - set_to_cap_if_null(ops, msg_queue_msgctl); - set_to_cap_if_null(ops, msg_queue_msgsnd); - set_to_cap_if_null(ops, msg_queue_msgrcv); - set_to_cap_if_null(ops, shm_alloc_security); - set_to_cap_if_null(ops, shm_free_security); - set_to_cap_if_null(ops, shm_associate); - set_to_cap_if_null(ops, shm_shmctl); - set_to_cap_if_null(ops, shm_shmat); - set_to_cap_if_null(ops, sem_alloc_security); - set_to_cap_if_null(ops, sem_free_security); - set_to_cap_if_null(ops, sem_associate); - set_to_cap_if_null(ops, sem_semctl); - set_to_cap_if_null(ops, sem_semop); - set_to_cap_if_null(ops, netlink_send); - set_to_cap_if_null(ops, d_instantiate); - set_to_cap_if_null(ops, getprocattr); - set_to_cap_if_null(ops, setprocattr); - set_to_cap_if_null(ops, ismaclabel); - set_to_cap_if_null(ops, secid_to_secctx); - set_to_cap_if_null(ops, secctx_to_secid); - set_to_cap_if_null(ops, release_secctx); - set_to_cap_if_null(ops, inode_notifysecctx); - set_to_cap_if_null(ops, inode_setsecctx); - set_to_cap_if_null(ops, inode_getsecctx); -#ifdef CONFIG_SECURITY_NETWORK - set_to_cap_if_null(ops, unix_stream_connect); - set_to_cap_if_null(ops, unix_may_send); - set_to_cap_if_null(ops, socket_create); - set_to_cap_if_null(ops, socket_post_create); - set_to_cap_if_null(ops, socket_bind); - set_to_cap_if_null(ops, socket_connect); - set_to_cap_if_null(ops, socket_listen); - set_to_cap_if_null(ops, socket_accept); - set_to_cap_if_null(ops, socket_sendmsg); - set_to_cap_if_null(ops, socket_recvmsg); - set_to_cap_if_null(ops, socket_getsockname); - set_to_cap_if_null(ops, socket_getpeername); - set_to_cap_if_null(ops, socket_setsockopt); - set_to_cap_if_null(ops, socket_getsockopt); - set_to_cap_if_null(ops, socket_shutdown); - set_to_cap_if_null(ops, socket_sock_rcv_skb); - set_to_cap_if_null(ops, socket_getpeersec_stream); - set_to_cap_if_null(ops, socket_getpeersec_dgram); - set_to_cap_if_null(ops, sk_alloc_security); - set_to_cap_if_null(ops, sk_free_security); - set_to_cap_if_null(ops, sk_clone_security); - set_to_cap_if_null(ops, sk_getsecid); - set_to_cap_if_null(ops, sock_graft); - set_to_cap_if_null(ops, inet_conn_request); - set_to_cap_if_null(ops, inet_csk_clone); - set_to_cap_if_null(ops, inet_conn_established); - set_to_cap_if_null(ops, secmark_relabel_packet); - set_to_cap_if_null(ops, secmark_refcount_inc); - set_to_cap_if_null(ops, secmark_refcount_dec); - set_to_cap_if_null(ops, req_classify_flow); - set_to_cap_if_null(ops, tun_dev_alloc_security); - set_to_cap_if_null(ops, tun_dev_free_security); - set_to_cap_if_null(ops, tun_dev_create); - set_to_cap_if_null(ops, tun_dev_open); - set_to_cap_if_null(ops, tun_dev_attach_queue); - set_to_cap_if_null(ops, tun_dev_attach); -#endif /* CONFIG_SECURITY_NETWORK */ -#ifdef CONFIG_SECURITY_NETWORK_XFRM - set_to_cap_if_null(ops, xfrm_policy_alloc_security); - set_to_cap_if_null(ops, xfrm_policy_clone_security); - set_to_cap_if_null(ops, xfrm_policy_free_security); - set_to_cap_if_null(ops, xfrm_policy_delete_security); - set_to_cap_if_null(ops, xfrm_state_alloc); - set_to_cap_if_null(ops, xfrm_state_alloc_acquire); - set_to_cap_if_null(ops, xfrm_state_free_security); - set_to_cap_if_null(ops, xfrm_state_delete_security); - set_to_cap_if_null(ops, xfrm_policy_lookup); - set_to_cap_if_null(ops, xfrm_state_pol_flow_match); - set_to_cap_if_null(ops, xfrm_decode_session); -#endif /* CONFIG_SECURITY_NETWORK_XFRM */ -#ifdef CONFIG_KEYS - set_to_cap_if_null(ops, key_alloc); - set_to_cap_if_null(ops, key_free); - set_to_cap_if_null(ops, key_permission); - set_to_cap_if_null(ops, key_getsecurity); -#endif /* CONFIG_KEYS */ -#ifdef CONFIG_AUDIT - set_to_cap_if_null(ops, audit_rule_init); - set_to_cap_if_null(ops, audit_rule_known); - set_to_cap_if_null(ops, audit_rule_match); - set_to_cap_if_null(ops, audit_rule_free); -#endif -} diff --git a/security/commoncap.c b/security/commoncap.c index f2875cd9f677..d103f5a4043d 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -12,7 +12,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> -#include <linux/security.h> +#include <linux/lsm_hooks.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> @@ -53,11 +53,6 @@ static void warn_setuid_and_fcaps_mixed(const char *fname) } } -int cap_netlink_send(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - /** * cap_capable - Determine whether a task has a particular effective capability * @cred: The credentials to use @@ -941,7 +936,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * @pages: The size of the mapping * * Determine whether the allocation of a new virtual mapping by the current - * task is permitted, returning 0 if permission is granted, -ve if not. + * task is permitted, returning 1 if permission is granted, 0 if not. */ int cap_vm_enough_memory(struct mm_struct *mm, long pages) { @@ -950,7 +945,7 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0) cap_sys_admin = 1; - return __vm_enough_memory(mm, pages, cap_sys_admin); + return cap_sys_admin; } /* @@ -981,3 +976,33 @@ int cap_mmap_file(struct file *file, unsigned long reqprot, { return 0; } + +#ifdef CONFIG_SECURITY + +struct security_hook_list capability_hooks[] = { + LSM_HOOK_INIT(capable, cap_capable), + LSM_HOOK_INIT(settime, cap_settime), + LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check), + LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme), + LSM_HOOK_INIT(capget, cap_capget), + LSM_HOOK_INIT(capset, cap_capset), + LSM_HOOK_INIT(bprm_set_creds, cap_bprm_set_creds), + LSM_HOOK_INIT(bprm_secureexec, cap_bprm_secureexec), + LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv), + LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv), + LSM_HOOK_INIT(mmap_addr, cap_mmap_addr), + LSM_HOOK_INIT(mmap_file, cap_mmap_file), + LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid), + LSM_HOOK_INIT(task_prctl, cap_task_prctl), + LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler), + LSM_HOOK_INIT(task_setioprio, cap_task_setioprio), + LSM_HOOK_INIT(task_setnice, cap_task_setnice), + LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory), +}; + +void __init capability_add_hooks(void) +{ + security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks)); +} + +#endif /* CONFIG_SECURITY */ diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 5e3bd72b299a..36fb6b527829 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -85,7 +85,7 @@ int __init integrity_init_keyring(const unsigned int id) return err; } -int __init integrity_load_x509(const unsigned int id, char *path) +int __init integrity_load_x509(const unsigned int id, const char *path) { key_ref_t key; char *data; diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 10f994307a04..1334e02ae8f4 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -296,6 +296,17 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name, iint = integrity_iint_find(d_backing_inode(dentry)); if (iint && (iint->flags & IMA_NEW_FILE)) return 0; + + /* exception for pseudo filesystems */ + if (dentry->d_inode->i_sb->s_magic == TMPFS_MAGIC + || dentry->d_inode->i_sb->s_magic == SYSFS_MAGIC) + return 0; + + integrity_audit_msg(AUDIT_INTEGRITY_METADATA, + dentry->d_inode, dentry->d_name.name, + "update_metadata", + integrity_status_msg[evm_status], + -EPERM, 0); } out: if (evm_status != INTEGRITY_PASS) @@ -376,17 +387,16 @@ void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name, * @xattr_name: pointer to the affected extended attribute name * * Update the HMAC stored in 'security.evm' to reflect removal of the xattr. + * + * No need to take the i_mutex lock here, as this function is called from + * vfs_removexattr() which takes the i_mutex. */ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name) { - struct inode *inode = d_backing_inode(dentry); - if (!evm_initialized || !evm_protected_xattr(xattr_name)) return; - mutex_lock(&inode->i_mutex); evm_update_evmxattr(dentry, xattr_name, NULL, 0); - mutex_unlock(&inode->i_mutex); } /** diff --git a/security/integrity/iint.c b/security/integrity/iint.c index dbb6d141c3db..3d2f5b45c8cb 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -213,6 +213,9 @@ int __init integrity_read_file(const char *path, char **data) char *buf; int rc = -EINVAL; + if (!path || !*path) + return -EINVAL; + file = filp_open(path, O_RDONLY, 0); if (IS_ERR(file)) { rc = PTR_ERR(file); diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 8ee997dff139..e2a60c30df44 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -52,6 +52,16 @@ extern int ima_used_chip; extern int ima_hash_algo; extern int ima_appraise; +/* IMA event related data */ +struct ima_event_data { + struct integrity_iint_cache *iint; + struct file *file; + const unsigned char *filename; + struct evm_ima_xattr_data *xattr_value; + int xattr_len; + const char *violation; +}; + /* IMA template field data definition */ struct ima_field_data { u8 *data; @@ -61,12 +71,10 @@ struct ima_field_data { /* IMA template field definition */ struct ima_template_field { const char field_id[IMA_TEMPLATE_FIELD_ID_MAX_LEN]; - int (*field_init) (struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, - int xattr_len, struct ima_field_data *field_data); - void (*field_show) (struct seq_file *m, enum ima_show_type show, - struct ima_field_data *field_data); + int (*field_init)(struct ima_event_data *event_data, + struct ima_field_data *field_data); + void (*field_show)(struct seq_file *m, enum ima_show_type show, + struct ima_field_data *field_data); }; /* IMA template descriptor definition */ @@ -103,10 +111,11 @@ int ima_calc_field_array_hash(struct ima_field_data *field_data, struct ima_digest_data *hash); int __init ima_calc_boot_aggregate(struct ima_digest_data *hash); void ima_add_violation(struct file *file, const unsigned char *filename, + struct integrity_iint_cache *iint, const char *op, const char *cause); int ima_init_crypto(void); void ima_putc(struct seq_file *m, void *data, int datalen); -void ima_print_digest(struct seq_file *m, u8 *digest, int size); +void ima_print_digest(struct seq_file *m, u8 *digest, u32 size); struct ima_template_desc *ima_template_desc_current(void); int ima_init_template(void); @@ -140,10 +149,8 @@ void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file, int xattr_len); void ima_audit_measurement(struct integrity_iint_cache *iint, const unsigned char *filename); -int ima_alloc_init_template(struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, - int xattr_len, struct ima_template_entry **entry); +int ima_alloc_init_template(struct ima_event_data *event_data, + struct ima_template_entry **entry); int ima_store_template(struct ima_template_entry *entry, int violation, struct inode *inode, const unsigned char *filename); void ima_free_template_entry(struct ima_template_entry *entry); diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index b8a27c5052d4..1d950fbb2aec 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -37,10 +37,8 @@ void ima_free_template_entry(struct ima_template_entry *entry) /* * ima_alloc_init_template - create and initialize a new template entry */ -int ima_alloc_init_template(struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, - int xattr_len, struct ima_template_entry **entry) +int ima_alloc_init_template(struct ima_event_data *event_data, + struct ima_template_entry **entry) { struct ima_template_desc *template_desc = ima_template_desc_current(); int i, result = 0; @@ -55,8 +53,7 @@ int ima_alloc_init_template(struct integrity_iint_cache *iint, struct ima_template_field *field = template_desc->fields[i]; u32 len; - result = field->field_init(iint, file, filename, - xattr_value, xattr_len, + result = field->field_init(event_data, &((*entry)->template_data[i])); if (result != 0) goto out; @@ -129,18 +126,20 @@ int ima_store_template(struct ima_template_entry *entry, * value is invalidated. */ void ima_add_violation(struct file *file, const unsigned char *filename, + struct integrity_iint_cache *iint, const char *op, const char *cause) { struct ima_template_entry *entry; struct inode *inode = file_inode(file); + struct ima_event_data event_data = {iint, file, filename, NULL, 0, + cause}; int violation = 1; int result; /* can overflow, only indicator */ atomic_long_inc(&ima_htable.violations); - result = ima_alloc_init_template(NULL, file, filename, - NULL, 0, &entry); + result = ima_alloc_init_template(&event_data, &entry); if (result < 0) { result = -ENOMEM; goto err_out; @@ -267,13 +266,14 @@ void ima_store_measurement(struct integrity_iint_cache *iint, int result = -ENOMEM; struct inode *inode = file_inode(file); struct ima_template_entry *entry; + struct ima_event_data event_data = {iint, file, filename, xattr_value, + xattr_len, NULL}; int violation = 0; if (iint->flags & IMA_MEASURED) return; - result = ima_alloc_init_template(iint, file, filename, - xattr_value, xattr_len, &entry); + result = ima_alloc_init_template(&event_data, &entry); if (result < 0) { integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename, op, audit_cause, result, 0); diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 4df493e4b3c9..1873b5536f80 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -378,10 +378,14 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, result = ima_protect_xattr(dentry, xattr_name, xattr_value, xattr_value_len); if (result == 1) { + bool digsig; + if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST)) return -EINVAL; - ima_reset_appraise_flags(d_backing_inode(dentry), - (xvalue->type == EVM_IMA_XATTR_DIGSIG) ? 1 : 0); + digsig = (xvalue->type == EVM_IMA_XATTR_DIGSIG); + if (!digsig && (ima_appraise & IMA_APPRAISE_ENFORCE)) + return -EPERM; + ima_reset_appraise_flags(d_backing_inode(dentry), digsig); result = 0; } return result; diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 461215e5fd31..816d175da79a 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -190,9 +190,9 @@ static const struct file_operations ima_measurements_ops = { .release = seq_release, }; -void ima_print_digest(struct seq_file *m, u8 *digest, int size) +void ima_print_digest(struct seq_file *m, u8 *digest, u32 size) { - int i; + u32 i; for (i = 0; i < size; i++) seq_printf(m, "%02x", *(digest + i)); diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c index 5e4c29d174ee..e600cadd231c 100644 --- a/security/integrity/ima/ima_init.c +++ b/security/integrity/ima/ima_init.c @@ -24,12 +24,6 @@ #include <crypto/hash_info.h> #include "ima.h" -#ifdef CONFIG_IMA_X509_PATH -#define IMA_X509_PATH CONFIG_IMA_X509_PATH -#else -#define IMA_X509_PATH "/etc/keys/x509_ima.der" -#endif - /* name for boot aggregate entry */ static const char *boot_aggregate_name = "boot_aggregate"; int ima_used_chip; @@ -55,6 +49,8 @@ static int __init ima_add_boot_aggregate(void) const char *audit_cause = "ENOMEM"; struct ima_template_entry *entry; struct integrity_iint_cache tmp_iint, *iint = &tmp_iint; + struct ima_event_data event_data = {iint, NULL, boot_aggregate_name, + NULL, 0, NULL}; int result = -ENOMEM; int violation = 0; struct { @@ -76,8 +72,7 @@ static int __init ima_add_boot_aggregate(void) } } - result = ima_alloc_init_template(iint, NULL, boot_aggregate_name, - NULL, 0, &entry); + result = ima_alloc_init_template(&event_data, &entry); if (result < 0) { audit_cause = "alloc_entry"; goto err_out; @@ -103,7 +98,7 @@ void __init ima_load_x509(void) int unset_flags = ima_policy_flag & IMA_APPRAISE; ima_policy_flag &= ~unset_flags; - integrity_load_x509(INTEGRITY_KEYRING_IMA, IMA_X509_PATH); + integrity_load_x509(INTEGRITY_KEYRING_IMA, CONFIG_IMA_X509_PATH); ima_policy_flag |= unset_flags; } #endif diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index eeee00dce729..c21f09bf8b99 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -106,9 +106,10 @@ static void ima_rdwr_violation_check(struct file *file, *pathname = ima_d_path(&file->f_path, pathbuf); if (send_tomtou) - ima_add_violation(file, *pathname, "invalid_pcr", "ToMToU"); + ima_add_violation(file, *pathname, iint, + "invalid_pcr", "ToMToU"); if (send_writers) - ima_add_violation(file, *pathname, + ima_add_violation(file, *pathname, iint, "invalid_pcr", "open_writers"); } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index d1eefb9d65fb..3997e206f82d 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -27,6 +27,8 @@ #define IMA_UID 0x0008 #define IMA_FOWNER 0x0010 #define IMA_FSUUID 0x0020 +#define IMA_INMASK 0x0040 +#define IMA_EUID 0x0080 #define UNKNOWN 0 #define MEASURE 0x0001 /* same as IMA_MEASURE */ @@ -42,6 +44,8 @@ enum lsm_rule_types { LSM_OBJ_USER, LSM_OBJ_ROLE, LSM_OBJ_TYPE, LSM_SUBJ_USER, LSM_SUBJ_ROLE, LSM_SUBJ_TYPE }; +enum policy_types { ORIGINAL_TCB = 1, DEFAULT_TCB }; + struct ima_rule_entry { struct list_head list; int action; @@ -70,7 +74,7 @@ struct ima_rule_entry { * normal users can easily run the machine out of memory simply building * and running executables. */ -static struct ima_rule_entry default_rules[] = { +static struct ima_rule_entry dont_measure_rules[] = { {.action = DONT_MEASURE, .fsmagic = PROC_SUPER_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SYSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = DEBUGFS_MAGIC, .flags = IMA_FSMAGIC}, @@ -79,12 +83,31 @@ static struct ima_rule_entry default_rules[] = { {.action = DONT_MEASURE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_MEASURE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE, .fsmagic = CGROUP_SUPER_MAGIC, + .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC} +}; + +static struct ima_rule_entry original_measurement_rules[] = { {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, - {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, .uid = GLOBAL_ROOT_UID, - .flags = IMA_FUNC | IMA_MASK | IMA_UID}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_MASK | IMA_UID}, + {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC}, + {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC}, +}; + +static struct ima_rule_entry default_measurement_rules[] = { + {.action = MEASURE, .func = MMAP_CHECK, .mask = MAY_EXEC, + .flags = IMA_FUNC | IMA_MASK}, + {.action = MEASURE, .func = BPRM_CHECK, .mask = MAY_EXEC, + .flags = IMA_FUNC | IMA_MASK}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_INMASK | IMA_EUID}, + {.action = MEASURE, .func = FILE_CHECK, .mask = MAY_READ, + .uid = GLOBAL_ROOT_UID, .flags = IMA_FUNC | IMA_INMASK | IMA_UID}, {.action = MEASURE, .func = MODULE_CHECK, .flags = IMA_FUNC}, {.action = MEASURE, .func = FIRMWARE_CHECK, .flags = IMA_FUNC}, }; @@ -99,6 +122,7 @@ static struct ima_rule_entry default_appraise_rules[] = { {.action = DONT_APPRAISE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC}, + {.action = DONT_APPRAISE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC}, {.action = DONT_APPRAISE, .fsmagic = CGROUP_SUPER_MAGIC, .flags = IMA_FSMAGIC}, #ifndef CONFIG_IMA_APPRAISE_SIGNED_INIT {.action = APPRAISE, .fowner = GLOBAL_ROOT_UID, .flags = IMA_FOWNER}, @@ -115,14 +139,29 @@ static struct list_head *ima_rules; static DEFINE_MUTEX(ima_rules_mutex); -static bool ima_use_tcb __initdata; +static int ima_policy __initdata; static int __init default_measure_policy_setup(char *str) { - ima_use_tcb = 1; + if (ima_policy) + return 1; + + ima_policy = ORIGINAL_TCB; return 1; } __setup("ima_tcb", default_measure_policy_setup); +static int __init policy_setup(char *str) +{ + if (ima_policy) + return 1; + + if (strcmp(str, "tcb") == 0) + ima_policy = DEFAULT_TCB; + + return 1; +} +__setup("ima_policy=", policy_setup); + static bool ima_use_appraise_tcb __initdata; static int __init default_appraise_policy_setup(char *str) { @@ -182,6 +221,9 @@ static bool ima_match_rules(struct ima_rule_entry *rule, if ((rule->flags & IMA_MASK) && (rule->mask != mask && func != POST_SETATTR)) return false; + if ((rule->flags & IMA_INMASK) && + (!(rule->mask & mask) && func != POST_SETATTR)) + return false; if ((rule->flags & IMA_FSMAGIC) && rule->fsmagic != inode->i_sb->s_magic) return false; @@ -190,6 +232,16 @@ static bool ima_match_rules(struct ima_rule_entry *rule, return false; if ((rule->flags & IMA_UID) && !uid_eq(rule->uid, cred->uid)) return false; + if (rule->flags & IMA_EUID) { + if (has_capability_noaudit(current, CAP_SETUID)) { + if (!uid_eq(rule->uid, cred->euid) + && !uid_eq(rule->uid, cred->suid) + && !uid_eq(rule->uid, cred->uid)) + return false; + } else if (!uid_eq(rule->uid, cred->euid)) + return false; + } + if ((rule->flags & IMA_FOWNER) && !uid_eq(rule->fowner, inode->i_uid)) return false; for (i = 0; i < MAX_LSM_RULES; i++) { @@ -333,21 +385,31 @@ void __init ima_init_policy(void) { int i, measure_entries, appraise_entries; - /* if !ima_use_tcb set entries = 0 so we load NO default rules */ - measure_entries = ima_use_tcb ? ARRAY_SIZE(default_rules) : 0; + /* if !ima_policy set entries = 0 so we load NO default rules */ + measure_entries = ima_policy ? ARRAY_SIZE(dont_measure_rules) : 0; appraise_entries = ima_use_appraise_tcb ? ARRAY_SIZE(default_appraise_rules) : 0; - for (i = 0; i < measure_entries + appraise_entries; i++) { - if (i < measure_entries) - list_add_tail(&default_rules[i].list, - &ima_default_rules); - else { - int j = i - measure_entries; + for (i = 0; i < measure_entries; i++) + list_add_tail(&dont_measure_rules[i].list, &ima_default_rules); - list_add_tail(&default_appraise_rules[j].list, + switch (ima_policy) { + case ORIGINAL_TCB: + for (i = 0; i < ARRAY_SIZE(original_measurement_rules); i++) + list_add_tail(&original_measurement_rules[i].list, &ima_default_rules); - } + break; + case DEFAULT_TCB: + for (i = 0; i < ARRAY_SIZE(default_measurement_rules); i++) + list_add_tail(&default_measurement_rules[i].list, + &ima_default_rules); + default: + break; + } + + for (i = 0; i < appraise_entries; i++) { + list_add_tail(&default_appraise_rules[i].list, + &ima_default_rules); } ima_rules = &ima_default_rules; @@ -373,7 +435,8 @@ enum { Opt_audit, Opt_obj_user, Opt_obj_role, Opt_obj_type, Opt_subj_user, Opt_subj_role, Opt_subj_type, - Opt_func, Opt_mask, Opt_fsmagic, Opt_uid, Opt_fowner, + Opt_func, Opt_mask, Opt_fsmagic, + Opt_uid, Opt_euid, Opt_fowner, Opt_appraise_type, Opt_fsuuid, Opt_permit_directio }; @@ -394,6 +457,7 @@ static match_table_t policy_tokens = { {Opt_fsmagic, "fsmagic=%s"}, {Opt_fsuuid, "fsuuid=%s"}, {Opt_uid, "uid=%s"}, + {Opt_euid, "euid=%s"}, {Opt_fowner, "fowner=%s"}, {Opt_appraise_type, "appraise_type=%s"}, {Opt_permit_directio, "permit_directio"}, @@ -435,6 +499,7 @@ static void ima_log_string(struct audit_buffer *ab, char *key, char *value) static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) { struct audit_buffer *ab; + char *from; char *p; int result = 0; @@ -525,18 +590,23 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) if (entry->mask) result = -EINVAL; - if ((strcmp(args[0].from, "MAY_EXEC")) == 0) + from = args[0].from; + if (*from == '^') + from++; + + if ((strcmp(from, "MAY_EXEC")) == 0) entry->mask = MAY_EXEC; - else if (strcmp(args[0].from, "MAY_WRITE") == 0) + else if (strcmp(from, "MAY_WRITE") == 0) entry->mask = MAY_WRITE; - else if (strcmp(args[0].from, "MAY_READ") == 0) + else if (strcmp(from, "MAY_READ") == 0) entry->mask = MAY_READ; - else if (strcmp(args[0].from, "MAY_APPEND") == 0) + else if (strcmp(from, "MAY_APPEND") == 0) entry->mask = MAY_APPEND; else result = -EINVAL; if (!result) - entry->flags |= IMA_MASK; + entry->flags |= (*args[0].from == '^') + ? IMA_INMASK : IMA_MASK; break; case Opt_fsmagic: ima_log_string(ab, "fsmagic", args[0].from); @@ -566,6 +636,9 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) break; case Opt_uid: ima_log_string(ab, "uid", args[0].from); + case Opt_euid: + if (token == Opt_euid) + ima_log_string(ab, "euid", args[0].from); if (uid_valid(entry->uid)) { result = -EINVAL; @@ -574,11 +647,14 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) result = kstrtoul(args[0].from, 10, &lnum); if (!result) { - entry->uid = make_kuid(current_user_ns(), (uid_t)lnum); - if (!uid_valid(entry->uid) || (((uid_t)lnum) != lnum)) + entry->uid = make_kuid(current_user_ns(), + (uid_t) lnum); + if (!uid_valid(entry->uid) || + (uid_t)lnum != lnum) result = -EINVAL; else - entry->flags |= IMA_UID; + entry->flags |= (token == Opt_uid) + ? IMA_UID : IMA_EUID; } break; case Opt_fowner: diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index bcfc36cbde6a..2934e3d377f1 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -70,7 +70,8 @@ static void ima_show_template_data_ascii(struct seq_file *m, enum data_formats datafmt, struct ima_field_data *field_data) { - u8 *buf_ptr = field_data->data, buflen = field_data->len; + u8 *buf_ptr = field_data->data; + u32 buflen = field_data->len; switch (datafmt) { case DATA_FMT_DIGEST_WITH_ALGO: @@ -195,9 +196,7 @@ static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 hash_algo, /* * This function writes the digest of an event (with size limit). */ -int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, int xattr_len, +int ima_eventdigest_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { struct { @@ -211,25 +210,25 @@ int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file, memset(&hash, 0, sizeof(hash)); - if (!iint) /* recording a violation. */ + if (event_data->violation) /* recording a violation. */ goto out; - if (ima_template_hash_algo_allowed(iint->ima_hash->algo)) { - cur_digest = iint->ima_hash->digest; - cur_digestsize = iint->ima_hash->length; + if (ima_template_hash_algo_allowed(event_data->iint->ima_hash->algo)) { + cur_digest = event_data->iint->ima_hash->digest; + cur_digestsize = event_data->iint->ima_hash->length; goto out; } - if (!file) /* missing info to re-calculate the digest */ + if (!event_data->file) /* missing info to re-calculate the digest */ return -EINVAL; - inode = file_inode(file); + inode = file_inode(event_data->file); hash.hdr.algo = ima_template_hash_algo_allowed(ima_hash_algo) ? ima_hash_algo : HASH_ALGO_SHA1; - result = ima_calc_file_hash(file, &hash.hdr); + result = ima_calc_file_hash(event_data->file, &hash.hdr); if (result) { integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, - filename, "collect_data", + event_data->filename, "collect_data", "failed", result, 0); return result; } @@ -243,48 +242,43 @@ out: /* * This function writes the digest of an event (without size limit). */ -int ima_eventdigest_ng_init(struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, - int xattr_len, struct ima_field_data *field_data) +int ima_eventdigest_ng_init(struct ima_event_data *event_data, + struct ima_field_data *field_data) { u8 *cur_digest = NULL, hash_algo = HASH_ALGO_SHA1; u32 cur_digestsize = 0; - /* If iint is NULL, we are recording a violation. */ - if (!iint) + if (event_data->violation) /* recording a violation. */ goto out; - cur_digest = iint->ima_hash->digest; - cur_digestsize = iint->ima_hash->length; + cur_digest = event_data->iint->ima_hash->digest; + cur_digestsize = event_data->iint->ima_hash->length; - hash_algo = iint->ima_hash->algo; + hash_algo = event_data->iint->ima_hash->algo; out: return ima_eventdigest_init_common(cur_digest, cur_digestsize, hash_algo, field_data); } -static int ima_eventname_init_common(struct integrity_iint_cache *iint, - struct file *file, - const unsigned char *filename, +static int ima_eventname_init_common(struct ima_event_data *event_data, struct ima_field_data *field_data, bool size_limit) { const char *cur_filename = NULL; u32 cur_filename_len = 0; - BUG_ON(filename == NULL && file == NULL); + BUG_ON(event_data->filename == NULL && event_data->file == NULL); - if (filename) { - cur_filename = filename; - cur_filename_len = strlen(filename); + if (event_data->filename) { + cur_filename = event_data->filename; + cur_filename_len = strlen(event_data->filename); if (!size_limit || cur_filename_len <= IMA_EVENT_NAME_LEN_MAX) goto out; } - if (file) { - cur_filename = file->f_path.dentry->d_name.name; + if (event_data->file) { + cur_filename = event_data->file->f_path.dentry->d_name.name; cur_filename_len = strlen(cur_filename); } else /* @@ -300,36 +294,30 @@ out: /* * This function writes the name of an event (with size limit). */ -int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, int xattr_len, +int ima_eventname_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { - return ima_eventname_init_common(iint, file, filename, - field_data, true); + return ima_eventname_init_common(event_data, field_data, true); } /* * This function writes the name of an event (without size limit). */ -int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, int xattr_len, +int ima_eventname_ng_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { - return ima_eventname_init_common(iint, file, filename, - field_data, false); + return ima_eventname_init_common(event_data, field_data, false); } /* * ima_eventsig_init - include the file signature as part of the template data */ -int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, int xattr_len, +int ima_eventsig_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { enum data_formats fmt = DATA_FMT_HEX; + struct evm_ima_xattr_data *xattr_value = event_data->xattr_value; + int xattr_len = event_data->xattr_len; int rc = 0; if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG)) diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h index 63f6b52cb1c2..c344530c1d69 100644 --- a/security/integrity/ima/ima_template_lib.h +++ b/security/integrity/ima/ima_template_lib.h @@ -26,24 +26,14 @@ void ima_show_template_string(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data); void ima_show_template_sig(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data); -int ima_eventdigest_init(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, int xattr_len, +int ima_eventdigest_init(struct ima_event_data *event_data, struct ima_field_data *field_data); -int ima_eventname_init(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, int xattr_len, +int ima_eventname_init(struct ima_event_data *event_data, struct ima_field_data *field_data); -int ima_eventdigest_ng_init(struct integrity_iint_cache *iint, - struct file *file, const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, - int xattr_len, struct ima_field_data *field_data); -int ima_eventname_ng_init(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, int xattr_len, +int ima_eventdigest_ng_init(struct ima_event_data *event_data, + struct ima_field_data *field_data); +int ima_eventname_ng_init(struct ima_event_data *event_data, struct ima_field_data *field_data); -int ima_eventsig_init(struct integrity_iint_cache *iint, struct file *file, - const unsigned char *filename, - struct evm_ima_xattr_data *xattr_value, int xattr_len, +int ima_eventsig_init(struct ima_event_data *event_data, struct ima_field_data *field_data); #endif /* __LINUX_IMA_TEMPLATE_LIB_H */ diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index 0fc9519fefa9..9c6168709d3b 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -135,7 +135,7 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, const char *digest, int digestlen); int __init integrity_init_keyring(const unsigned int id); -int __init integrity_load_x509(const unsigned int id, char *path); +int __init integrity_load_x509(const unsigned int id, const char *path); #else static inline int integrity_digsig_verify(const unsigned int id, diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 1d34277dc402..4ed98107ace3 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -282,7 +282,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, pid_t pid = task_pid_nr(tsk); if (pid) { char comm[sizeof(tsk->comm)]; - audit_log_format(ab, " pid=%d comm=", pid); + audit_log_format(ab, " opid=%d ocomm=", pid); audit_log_untrustedstring(ab, memcpy(comm, tsk->comm, sizeof(comm))); } diff --git a/security/security.c b/security/security.c index 04c8feca081a..595fffab48b0 100644 --- a/security/security.c +++ b/security/security.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> -#include <linux/security.h> +#include <linux/lsm_hooks.h> #include <linux/integrity.h> #include <linux/ima.h> #include <linux/evm.h> @@ -29,24 +29,13 @@ #define MAX_LSM_EVM_XATTR 2 +/* Maximum number of letters for an LSM name string */ +#define SECURITY_NAME_MAX 10 + /* Boot-time LSM user choice */ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = CONFIG_DEFAULT_SECURITY; -static struct security_operations *security_ops; -static struct security_operations default_security_ops = { - .name = "default", -}; - -static inline int __init verify(struct security_operations *ops) -{ - /* verify the security_operations structure exists */ - if (!ops) - return -EINVAL; - security_fixup_ops(ops); - return 0; -} - static void __init do_security_initcalls(void) { initcall_t *call; @@ -64,20 +53,27 @@ static void __init do_security_initcalls(void) */ int __init security_init(void) { - printk(KERN_INFO "Security Framework initialized\n"); + pr_info("Security Framework initialized\n"); - security_fixup_ops(&default_security_ops); - security_ops = &default_security_ops; + /* + * Always load the capability module. + */ + capability_add_hooks(); +#ifdef CONFIG_SECURITY_YAMA_STACKED + /* + * If Yama is configured for stacking load it next. + */ + yama_add_hooks(); +#endif + /* + * Load the chosen module if there is one. + * This will also find yama if it is stacking + */ do_security_initcalls(); return 0; } -void reset_security_ops(void) -{ - security_ops = &default_security_ops; -} - /* Save user chosen LSM */ static int __init choose_lsm(char *str) { @@ -88,7 +84,7 @@ __setup("security=", choose_lsm); /** * security_module_enable - Load given security module on boot ? - * @ops: a pointer to the struct security_operations that is to be checked. + * @module: the name of the module * * Each LSM must pass this method before registering its own operations * to avoid security registration races. This method may also be used @@ -100,84 +96,76 @@ __setup("security=", choose_lsm); * choose an alternate LSM at boot time. * Otherwise, return false. */ -int __init security_module_enable(struct security_operations *ops) +int __init security_module_enable(const char *module) { - return !strcmp(ops->name, chosen_lsm); + return !strcmp(module, chosen_lsm); } -/** - * register_security - registers a security framework with the kernel - * @ops: a pointer to the struct security_options that is to be registered +/* + * Hook list operation macros. * - * This function allows a security module to register itself with the - * kernel security subsystem. Some rudimentary checking is done on the @ops - * value passed to this function. You'll need to check first if your LSM - * is allowed to register its @ops by calling security_module_enable(@ops). + * call_void_hook: + * This is a hook that does not return a value. * - * If there is already a security module registered with the kernel, - * an error will be returned. Otherwise %0 is returned on success. + * call_int_hook: + * This is a hook that returns a value. */ -int __init register_security(struct security_operations *ops) -{ - if (verify(ops)) { - printk(KERN_DEBUG "%s could not verify " - "security_operations structure.\n", __func__); - return -EINVAL; - } - - if (security_ops != &default_security_ops) - return -EAGAIN; - security_ops = ops; - - return 0; -} +#define call_void_hook(FUNC, ...) \ + do { \ + struct security_hook_list *P; \ + \ + list_for_each_entry(P, &security_hook_heads.FUNC, list) \ + P->hook.FUNC(__VA_ARGS__); \ + } while (0) + +#define call_int_hook(FUNC, IRC, ...) ({ \ + int RC = IRC; \ + do { \ + struct security_hook_list *P; \ + \ + list_for_each_entry(P, &security_hook_heads.FUNC, list) { \ + RC = P->hook.FUNC(__VA_ARGS__); \ + if (RC != 0) \ + break; \ + } \ + } while (0); \ + RC; \ +}) /* Security operations */ int security_binder_set_context_mgr(struct task_struct *mgr) { - return security_ops->binder_set_context_mgr(mgr); + return call_int_hook(binder_set_context_mgr, 0, mgr); } int security_binder_transaction(struct task_struct *from, struct task_struct *to) { - return security_ops->binder_transaction(from, to); + return call_int_hook(binder_transaction, 0, from, to); } int security_binder_transfer_binder(struct task_struct *from, struct task_struct *to) { - return security_ops->binder_transfer_binder(from, to); + return call_int_hook(binder_transfer_binder, 0, from, to); } int security_binder_transfer_file(struct task_struct *from, struct task_struct *to, struct file *file) { - return security_ops->binder_transfer_file(from, to, file); + return call_int_hook(binder_transfer_file, 0, from, to, file); } int security_ptrace_access_check(struct task_struct *child, unsigned int mode) { -#ifdef CONFIG_SECURITY_YAMA_STACKED - int rc; - rc = yama_ptrace_access_check(child, mode); - if (rc) - return rc; -#endif - return security_ops->ptrace_access_check(child, mode); + return call_int_hook(ptrace_access_check, 0, child, mode); } int security_ptrace_traceme(struct task_struct *parent) { -#ifdef CONFIG_SECURITY_YAMA_STACKED - int rc; - rc = yama_ptrace_traceme(parent); - if (rc) - return rc; -#endif - return security_ops->ptrace_traceme(parent); + return call_int_hook(ptrace_traceme, 0, parent); } int security_capget(struct task_struct *target, @@ -185,7 +173,8 @@ int security_capget(struct task_struct *target, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - return security_ops->capget(target, effective, inheritable, permitted); + return call_int_hook(capget, 0, target, + effective, inheritable, permitted); } int security_capset(struct cred *new, const struct cred *old, @@ -193,57 +182,75 @@ int security_capset(struct cred *new, const struct cred *old, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { - return security_ops->capset(new, old, - effective, inheritable, permitted); + return call_int_hook(capset, 0, new, old, + effective, inheritable, permitted); } int security_capable(const struct cred *cred, struct user_namespace *ns, int cap) { - return security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT); + return call_int_hook(capable, 0, cred, ns, cap, SECURITY_CAP_AUDIT); } int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns, int cap) { - return security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT); + return call_int_hook(capable, 0, cred, ns, cap, SECURITY_CAP_NOAUDIT); } int security_quotactl(int cmds, int type, int id, struct super_block *sb) { - return security_ops->quotactl(cmds, type, id, sb); + return call_int_hook(quotactl, 0, cmds, type, id, sb); } int security_quota_on(struct dentry *dentry) { - return security_ops->quota_on(dentry); + return call_int_hook(quota_on, 0, dentry); } int security_syslog(int type) { - return security_ops->syslog(type); + return call_int_hook(syslog, 0, type); } int security_settime(const struct timespec *ts, const struct timezone *tz) { - return security_ops->settime(ts, tz); + return call_int_hook(settime, 0, ts, tz); } int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return security_ops->vm_enough_memory(mm, pages); + struct security_hook_list *hp; + int cap_sys_admin = 1; + int rc; + + /* + * The module will respond with a positive value if + * it thinks the __vm_enough_memory() call should be + * made with the cap_sys_admin set. If all of the modules + * agree that it should be set it will. If any module + * thinks it should not be set it won't. + */ + list_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) { + rc = hp->hook.vm_enough_memory(mm, pages); + if (rc <= 0) { + cap_sys_admin = 0; + break; + } + } + return __vm_enough_memory(mm, pages, cap_sys_admin); } int security_bprm_set_creds(struct linux_binprm *bprm) { - return security_ops->bprm_set_creds(bprm); + return call_int_hook(bprm_set_creds, 0, bprm); } int security_bprm_check(struct linux_binprm *bprm) { int ret; - ret = security_ops->bprm_check_security(bprm); + ret = call_int_hook(bprm_check_security, 0, bprm); if (ret) return ret; return ima_bprm_check(bprm); @@ -251,69 +258,69 @@ int security_bprm_check(struct linux_binprm *bprm) void security_bprm_committing_creds(struct linux_binprm *bprm) { - security_ops->bprm_committing_creds(bprm); + call_void_hook(bprm_committing_creds, bprm); } void security_bprm_committed_creds(struct linux_binprm *bprm) { - security_ops->bprm_committed_creds(bprm); + call_void_hook(bprm_committed_creds, bprm); } int security_bprm_secureexec(struct linux_binprm *bprm) { - return security_ops->bprm_secureexec(bprm); + return call_int_hook(bprm_secureexec, 0, bprm); } int security_sb_alloc(struct super_block *sb) { - return security_ops->sb_alloc_security(sb); + return call_int_hook(sb_alloc_security, 0, sb); } void security_sb_free(struct super_block *sb) { - security_ops->sb_free_security(sb); + call_void_hook(sb_free_security, sb); } int security_sb_copy_data(char *orig, char *copy) { - return security_ops->sb_copy_data(orig, copy); + return call_int_hook(sb_copy_data, 0, orig, copy); } EXPORT_SYMBOL(security_sb_copy_data); int security_sb_remount(struct super_block *sb, void *data) { - return security_ops->sb_remount(sb, data); + return call_int_hook(sb_remount, 0, sb, data); } int security_sb_kern_mount(struct super_block *sb, int flags, void *data) { - return security_ops->sb_kern_mount(sb, flags, data); + return call_int_hook(sb_kern_mount, 0, sb, flags, data); } int security_sb_show_options(struct seq_file *m, struct super_block *sb) { - return security_ops->sb_show_options(m, sb); + return call_int_hook(sb_show_options, 0, m, sb); } int security_sb_statfs(struct dentry *dentry) { - return security_ops->sb_statfs(dentry); + return call_int_hook(sb_statfs, 0, dentry); } int security_sb_mount(const char *dev_name, struct path *path, const char *type, unsigned long flags, void *data) { - return security_ops->sb_mount(dev_name, path, type, flags, data); + return call_int_hook(sb_mount, 0, dev_name, path, type, flags, data); } int security_sb_umount(struct vfsmount *mnt, int flags) { - return security_ops->sb_umount(mnt, flags); + return call_int_hook(sb_umount, 0, mnt, flags); } int security_sb_pivotroot(struct path *old_path, struct path *new_path) { - return security_ops->sb_pivotroot(old_path, new_path); + return call_int_hook(sb_pivotroot, 0, old_path, new_path); } int security_sb_set_mnt_opts(struct super_block *sb, @@ -321,42 +328,43 @@ int security_sb_set_mnt_opts(struct super_block *sb, unsigned long kern_flags, unsigned long *set_kern_flags) { - return security_ops->sb_set_mnt_opts(sb, opts, kern_flags, - set_kern_flags); + return call_int_hook(sb_set_mnt_opts, + opts->num_mnt_opts ? -EOPNOTSUPP : 0, sb, + opts, kern_flags, set_kern_flags); } EXPORT_SYMBOL(security_sb_set_mnt_opts); int security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb) { - return security_ops->sb_clone_mnt_opts(oldsb, newsb); + return call_int_hook(sb_clone_mnt_opts, 0, oldsb, newsb); } EXPORT_SYMBOL(security_sb_clone_mnt_opts); int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) { - return security_ops->sb_parse_opts_str(options, opts); + return call_int_hook(sb_parse_opts_str, 0, options, opts); } EXPORT_SYMBOL(security_sb_parse_opts_str); int security_inode_alloc(struct inode *inode) { inode->i_security = NULL; - return security_ops->inode_alloc_security(inode); + return call_int_hook(inode_alloc_security, 0, inode); } void security_inode_free(struct inode *inode) { integrity_inode_free(inode); - security_ops->inode_free_security(inode); + call_void_hook(inode_free_security, inode); } int security_dentry_init_security(struct dentry *dentry, int mode, struct qstr *name, void **ctx, u32 *ctxlen) { - return security_ops->dentry_init_security(dentry, mode, name, - ctx, ctxlen); + return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode, + name, ctx, ctxlen); } EXPORT_SYMBOL(security_dentry_init_security); @@ -372,11 +380,11 @@ int security_inode_init_security(struct inode *inode, struct inode *dir, return 0; if (!initxattrs) - return security_ops->inode_init_security(inode, dir, qstr, + return call_int_hook(inode_init_security, 0, inode, dir, qstr, NULL, NULL, NULL); memset(new_xattrs, 0, sizeof(new_xattrs)); lsm_xattr = new_xattrs; - ret = security_ops->inode_init_security(inode, dir, qstr, + ret = call_int_hook(inode_init_security, -EOPNOTSUPP, inode, dir, qstr, &lsm_xattr->name, &lsm_xattr->value, &lsm_xattr->value_len); @@ -401,8 +409,8 @@ int security_old_inode_init_security(struct inode *inode, struct inode *dir, { if (unlikely(IS_PRIVATE(inode))) return -EOPNOTSUPP; - return security_ops->inode_init_security(inode, dir, qstr, name, value, - len); + return call_int_hook(inode_init_security, 0, inode, dir, qstr, + name, value, len); } EXPORT_SYMBOL(security_old_inode_init_security); @@ -412,7 +420,7 @@ int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode, { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; - return security_ops->path_mknod(dir, dentry, mode, dev); + return call_int_hook(path_mknod, 0, dir, dentry, mode, dev); } EXPORT_SYMBOL(security_path_mknod); @@ -420,7 +428,7 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; - return security_ops->path_mkdir(dir, dentry, mode); + return call_int_hook(path_mkdir, 0, dir, dentry, mode); } EXPORT_SYMBOL(security_path_mkdir); @@ -428,14 +436,14 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; - return security_ops->path_rmdir(dir, dentry); + return call_int_hook(path_rmdir, 0, dir, dentry); } int security_path_unlink(struct path *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; - return security_ops->path_unlink(dir, dentry); + return call_int_hook(path_unlink, 0, dir, dentry); } EXPORT_SYMBOL(security_path_unlink); @@ -444,7 +452,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry, { if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry)))) return 0; - return security_ops->path_symlink(dir, dentry, old_name); + return call_int_hook(path_symlink, 0, dir, dentry, old_name); } int security_path_link(struct dentry *old_dentry, struct path *new_dir, @@ -452,7 +460,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, { if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)))) return 0; - return security_ops->path_link(old_dentry, new_dir, new_dentry); + return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry); } int security_path_rename(struct path *old_dir, struct dentry *old_dentry, @@ -464,14 +472,14 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, return 0; if (flags & RENAME_EXCHANGE) { - int err = security_ops->path_rename(new_dir, new_dentry, - old_dir, old_dentry); + int err = call_int_hook(path_rename, 0, new_dir, new_dentry, + old_dir, old_dentry); if (err) return err; } - return security_ops->path_rename(old_dir, old_dentry, new_dir, - new_dentry); + return call_int_hook(path_rename, 0, old_dir, old_dentry, new_dir, + new_dentry); } EXPORT_SYMBOL(security_path_rename); @@ -479,26 +487,26 @@ int security_path_truncate(struct path *path) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; - return security_ops->path_truncate(path); + return call_int_hook(path_truncate, 0, path); } int security_path_chmod(struct path *path, umode_t mode) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; - return security_ops->path_chmod(path, mode); + return call_int_hook(path_chmod, 0, path, mode); } int security_path_chown(struct path *path, kuid_t uid, kgid_t gid) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; - return security_ops->path_chown(path, uid, gid); + return call_int_hook(path_chown, 0, path, uid, gid); } int security_path_chroot(struct path *path) { - return security_ops->path_chroot(path); + return call_int_hook(path_chroot, 0, path); } #endif @@ -506,7 +514,7 @@ int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode { if (unlikely(IS_PRIVATE(dir))) return 0; - return security_ops->inode_create(dir, dentry, mode); + return call_int_hook(inode_create, 0, dir, dentry, mode); } EXPORT_SYMBOL_GPL(security_inode_create); @@ -515,14 +523,14 @@ int security_inode_link(struct dentry *old_dentry, struct inode *dir, { if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)))) return 0; - return security_ops->inode_link(old_dentry, dir, new_dentry); + return call_int_hook(inode_link, 0, old_dentry, dir, new_dentry); } int security_inode_unlink(struct inode *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - return security_ops->inode_unlink(dir, dentry); + return call_int_hook(inode_unlink, 0, dir, dentry); } int security_inode_symlink(struct inode *dir, struct dentry *dentry, @@ -530,14 +538,14 @@ int security_inode_symlink(struct inode *dir, struct dentry *dentry, { if (unlikely(IS_PRIVATE(dir))) return 0; - return security_ops->inode_symlink(dir, dentry, old_name); + return call_int_hook(inode_symlink, 0, dir, dentry, old_name); } int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { if (unlikely(IS_PRIVATE(dir))) return 0; - return security_ops->inode_mkdir(dir, dentry, mode); + return call_int_hook(inode_mkdir, 0, dir, dentry, mode); } EXPORT_SYMBOL_GPL(security_inode_mkdir); @@ -545,14 +553,14 @@ int security_inode_rmdir(struct inode *dir, struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - return security_ops->inode_rmdir(dir, dentry); + return call_int_hook(inode_rmdir, 0, dir, dentry); } int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { if (unlikely(IS_PRIVATE(dir))) return 0; - return security_ops->inode_mknod(dir, dentry, mode, dev); + return call_int_hook(inode_mknod, 0, dir, dentry, mode, dev); } int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -564,13 +572,13 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; if (flags & RENAME_EXCHANGE) { - int err = security_ops->inode_rename(new_dir, new_dentry, + int err = call_int_hook(inode_rename, 0, new_dir, new_dentry, old_dir, old_dentry); if (err) return err; } - return security_ops->inode_rename(old_dir, old_dentry, + return call_int_hook(inode_rename, 0, old_dir, old_dentry, new_dir, new_dentry); } @@ -578,7 +586,7 @@ int security_inode_readlink(struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - return security_ops->inode_readlink(dentry); + return call_int_hook(inode_readlink, 0, dentry); } int security_inode_follow_link(struct dentry *dentry, struct inode *inode, @@ -586,14 +594,14 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode, { if (unlikely(IS_PRIVATE(inode))) return 0; - return security_ops->inode_follow_link(dentry, inode, rcu); + return call_int_hook(inode_follow_link, 0, dentry, inode, rcu); } int security_inode_permission(struct inode *inode, int mask) { if (unlikely(IS_PRIVATE(inode))) return 0; - return security_ops->inode_permission(inode, mask); + return call_int_hook(inode_permission, 0, inode, mask); } int security_inode_setattr(struct dentry *dentry, struct iattr *attr) @@ -602,7 +610,7 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - ret = security_ops->inode_setattr(dentry, attr); + ret = call_int_hook(inode_setattr, 0, dentry, attr); if (ret) return ret; return evm_inode_setattr(dentry, attr); @@ -613,7 +621,7 @@ int security_inode_getattr(const struct path *path) { if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))) return 0; - return security_ops->inode_getattr(path); + return call_int_hook(inode_getattr, 0, path); } int security_inode_setxattr(struct dentry *dentry, const char *name, @@ -623,7 +631,15 @@ int security_inode_setxattr(struct dentry *dentry, const char *name, if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - ret = security_ops->inode_setxattr(dentry, name, value, size, flags); + /* + * SELinux and Smack integrate the cap call, + * so assume that all LSMs supplying this call do so. + */ + ret = call_int_hook(inode_setxattr, 1, dentry, name, value, size, + flags); + + if (ret == 1) + ret = cap_inode_setxattr(dentry, name, value, size, flags); if (ret) return ret; ret = ima_inode_setxattr(dentry, name, value, size); @@ -637,7 +653,7 @@ void security_inode_post_setxattr(struct dentry *dentry, const char *name, { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return; - security_ops->inode_post_setxattr(dentry, name, value, size, flags); + call_void_hook(inode_post_setxattr, dentry, name, value, size, flags); evm_inode_post_setxattr(dentry, name, value, size); } @@ -645,14 +661,14 @@ int security_inode_getxattr(struct dentry *dentry, const char *name) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - return security_ops->inode_getxattr(dentry, name); + return call_int_hook(inode_getxattr, 0, dentry, name); } int security_inode_listxattr(struct dentry *dentry) { if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - return security_ops->inode_listxattr(dentry); + return call_int_hook(inode_listxattr, 0, dentry); } int security_inode_removexattr(struct dentry *dentry, const char *name) @@ -661,7 +677,13 @@ int security_inode_removexattr(struct dentry *dentry, const char *name) if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - ret = security_ops->inode_removexattr(dentry, name); + /* + * SELinux and Smack integrate the cap call, + * so assume that all LSMs supplying this call do so. + */ + ret = call_int_hook(inode_removexattr, 1, dentry, name); + if (ret == 1) + ret = cap_inode_removexattr(dentry, name); if (ret) return ret; ret = ima_inode_removexattr(dentry, name); @@ -672,46 +694,48 @@ int security_inode_removexattr(struct dentry *dentry, const char *name) int security_inode_need_killpriv(struct dentry *dentry) { - return security_ops->inode_need_killpriv(dentry); + return call_int_hook(inode_need_killpriv, 0, dentry); } int security_inode_killpriv(struct dentry *dentry) { - return security_ops->inode_killpriv(dentry); + return call_int_hook(inode_killpriv, 0, dentry); } int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) { if (unlikely(IS_PRIVATE(inode))) return -EOPNOTSUPP; - return security_ops->inode_getsecurity(inode, name, buffer, alloc); + return call_int_hook(inode_getsecurity, -EOPNOTSUPP, inode, name, + buffer, alloc); } int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags) { if (unlikely(IS_PRIVATE(inode))) return -EOPNOTSUPP; - return security_ops->inode_setsecurity(inode, name, value, size, flags); + return call_int_hook(inode_setsecurity, -EOPNOTSUPP, inode, name, + value, size, flags); } int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size) { if (unlikely(IS_PRIVATE(inode))) return 0; - return security_ops->inode_listsecurity(inode, buffer, buffer_size); + return call_int_hook(inode_listsecurity, 0, inode, buffer, buffer_size); } EXPORT_SYMBOL(security_inode_listsecurity); void security_inode_getsecid(const struct inode *inode, u32 *secid) { - security_ops->inode_getsecid(inode, secid); + call_void_hook(inode_getsecid, inode, secid); } int security_file_permission(struct file *file, int mask) { int ret; - ret = security_ops->file_permission(file, mask); + ret = call_int_hook(file_permission, 0, file, mask); if (ret) return ret; @@ -720,17 +744,17 @@ int security_file_permission(struct file *file, int mask) int security_file_alloc(struct file *file) { - return security_ops->file_alloc_security(file); + return call_int_hook(file_alloc_security, 0, file); } void security_file_free(struct file *file) { - security_ops->file_free_security(file); + call_void_hook(file_free_security, file); } int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return security_ops->file_ioctl(file, cmd, arg); + return call_int_hook(file_ioctl, 0, file, cmd, arg); } static inline unsigned long mmap_prot(struct file *file, unsigned long prot) @@ -770,7 +794,7 @@ int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { int ret; - ret = security_ops->mmap_file(file, prot, + ret = call_int_hook(mmap_file, 0, file, prot, mmap_prot(file, prot), flags); if (ret) return ret; @@ -779,46 +803,46 @@ int security_mmap_file(struct file *file, unsigned long prot, int security_mmap_addr(unsigned long addr) { - return security_ops->mmap_addr(addr); + return call_int_hook(mmap_addr, 0, addr); } int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { - return security_ops->file_mprotect(vma, reqprot, prot); + return call_int_hook(file_mprotect, 0, vma, reqprot, prot); } int security_file_lock(struct file *file, unsigned int cmd) { - return security_ops->file_lock(file, cmd); + return call_int_hook(file_lock, 0, file, cmd); } int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { - return security_ops->file_fcntl(file, cmd, arg); + return call_int_hook(file_fcntl, 0, file, cmd, arg); } void security_file_set_fowner(struct file *file) { - security_ops->file_set_fowner(file); + call_void_hook(file_set_fowner, file); } int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig) { - return security_ops->file_send_sigiotask(tsk, fown, sig); + return call_int_hook(file_send_sigiotask, 0, tsk, fown, sig); } int security_file_receive(struct file *file) { - return security_ops->file_receive(file); + return call_int_hook(file_receive, 0, file); } int security_file_open(struct file *file, const struct cred *cred) { int ret; - ret = security_ops->file_open(file, cred); + ret = call_int_hook(file_open, 0, file, cred); if (ret) return ret; @@ -827,52 +851,49 @@ int security_file_open(struct file *file, const struct cred *cred) int security_task_create(unsigned long clone_flags) { - return security_ops->task_create(clone_flags); + return call_int_hook(task_create, 0, clone_flags); } void security_task_free(struct task_struct *task) { -#ifdef CONFIG_SECURITY_YAMA_STACKED - yama_task_free(task); -#endif - security_ops->task_free(task); + call_void_hook(task_free, task); } int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) { - return security_ops->cred_alloc_blank(cred, gfp); + return call_int_hook(cred_alloc_blank, 0, cred, gfp); } void security_cred_free(struct cred *cred) { - security_ops->cred_free(cred); + call_void_hook(cred_free, cred); } int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) { - return security_ops->cred_prepare(new, old, gfp); + return call_int_hook(cred_prepare, 0, new, old, gfp); } void security_transfer_creds(struct cred *new, const struct cred *old) { - security_ops->cred_transfer(new, old); + call_void_hook(cred_transfer, new, old); } int security_kernel_act_as(struct cred *new, u32 secid) { - return security_ops->kernel_act_as(new, secid); + return call_int_hook(kernel_act_as, 0, new, secid); } int security_kernel_create_files_as(struct cred *new, struct inode *inode) { - return security_ops->kernel_create_files_as(new, inode); + return call_int_hook(kernel_create_files_as, 0, new, inode); } int security_kernel_fw_from_file(struct file *file, char *buf, size_t size) { int ret; - ret = security_ops->kernel_fw_from_file(file, buf, size); + ret = call_int_hook(kernel_fw_from_file, 0, file, buf, size); if (ret) return ret; return ima_fw_from_file(file, buf, size); @@ -881,14 +902,14 @@ EXPORT_SYMBOL_GPL(security_kernel_fw_from_file); int security_kernel_module_request(char *kmod_name) { - return security_ops->kernel_module_request(kmod_name); + return call_int_hook(kernel_module_request, 0, kmod_name); } int security_kernel_module_from_file(struct file *file) { int ret; - ret = security_ops->kernel_module_from_file(file); + ret = call_int_hook(kernel_module_from_file, 0, file); if (ret) return ret; return ima_module_check(file); @@ -897,259 +918,269 @@ int security_kernel_module_from_file(struct file *file) int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { - return security_ops->task_fix_setuid(new, old, flags); + return call_int_hook(task_fix_setuid, 0, new, old, flags); } int security_task_setpgid(struct task_struct *p, pid_t pgid) { - return security_ops->task_setpgid(p, pgid); + return call_int_hook(task_setpgid, 0, p, pgid); } int security_task_getpgid(struct task_struct *p) { - return security_ops->task_getpgid(p); + return call_int_hook(task_getpgid, 0, p); } int security_task_getsid(struct task_struct *p) { - return security_ops->task_getsid(p); + return call_int_hook(task_getsid, 0, p); } void security_task_getsecid(struct task_struct *p, u32 *secid) { - security_ops->task_getsecid(p, secid); + *secid = 0; + call_void_hook(task_getsecid, p, secid); } EXPORT_SYMBOL(security_task_getsecid); int security_task_setnice(struct task_struct *p, int nice) { - return security_ops->task_setnice(p, nice); + return call_int_hook(task_setnice, 0, p, nice); } int security_task_setioprio(struct task_struct *p, int ioprio) { - return security_ops->task_setioprio(p, ioprio); + return call_int_hook(task_setioprio, 0, p, ioprio); } int security_task_getioprio(struct task_struct *p) { - return security_ops->task_getioprio(p); + return call_int_hook(task_getioprio, 0, p); } int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim) { - return security_ops->task_setrlimit(p, resource, new_rlim); + return call_int_hook(task_setrlimit, 0, p, resource, new_rlim); } int security_task_setscheduler(struct task_struct *p) { - return security_ops->task_setscheduler(p); + return call_int_hook(task_setscheduler, 0, p); } int security_task_getscheduler(struct task_struct *p) { - return security_ops->task_getscheduler(p); + return call_int_hook(task_getscheduler, 0, p); } int security_task_movememory(struct task_struct *p) { - return security_ops->task_movememory(p); + return call_int_hook(task_movememory, 0, p); } int security_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid) { - return security_ops->task_kill(p, info, sig, secid); + return call_int_hook(task_kill, 0, p, info, sig, secid); } int security_task_wait(struct task_struct *p) { - return security_ops->task_wait(p); + return call_int_hook(task_wait, 0, p); } int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { -#ifdef CONFIG_SECURITY_YAMA_STACKED - int rc; - rc = yama_task_prctl(option, arg2, arg3, arg4, arg5); - if (rc != -ENOSYS) - return rc; -#endif - return security_ops->task_prctl(option, arg2, arg3, arg4, arg5); + int thisrc; + int rc = -ENOSYS; + struct security_hook_list *hp; + + list_for_each_entry(hp, &security_hook_heads.task_prctl, list) { + thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5); + if (thisrc != -ENOSYS) { + rc = thisrc; + if (thisrc != 0) + break; + } + } + return rc; } void security_task_to_inode(struct task_struct *p, struct inode *inode) { - security_ops->task_to_inode(p, inode); + call_void_hook(task_to_inode, p, inode); } int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag) { - return security_ops->ipc_permission(ipcp, flag); + return call_int_hook(ipc_permission, 0, ipcp, flag); } void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) { - security_ops->ipc_getsecid(ipcp, secid); + *secid = 0; + call_void_hook(ipc_getsecid, ipcp, secid); } int security_msg_msg_alloc(struct msg_msg *msg) { - return security_ops->msg_msg_alloc_security(msg); + return call_int_hook(msg_msg_alloc_security, 0, msg); } void security_msg_msg_free(struct msg_msg *msg) { - security_ops->msg_msg_free_security(msg); + call_void_hook(msg_msg_free_security, msg); } int security_msg_queue_alloc(struct msg_queue *msq) { - return security_ops->msg_queue_alloc_security(msq); + return call_int_hook(msg_queue_alloc_security, 0, msq); } void security_msg_queue_free(struct msg_queue *msq) { - security_ops->msg_queue_free_security(msq); + call_void_hook(msg_queue_free_security, msq); } int security_msg_queue_associate(struct msg_queue *msq, int msqflg) { - return security_ops->msg_queue_associate(msq, msqflg); + return call_int_hook(msg_queue_associate, 0, msq, msqflg); } int security_msg_queue_msgctl(struct msg_queue *msq, int cmd) { - return security_ops->msg_queue_msgctl(msq, cmd); + return call_int_hook(msg_queue_msgctl, 0, msq, cmd); } int security_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) { - return security_ops->msg_queue_msgsnd(msq, msg, msqflg); + return call_int_hook(msg_queue_msgsnd, 0, msq, msg, msqflg); } int security_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, struct task_struct *target, long type, int mode) { - return security_ops->msg_queue_msgrcv(msq, msg, target, type, mode); + return call_int_hook(msg_queue_msgrcv, 0, msq, msg, target, type, mode); } int security_shm_alloc(struct shmid_kernel *shp) { - return security_ops->shm_alloc_security(shp); + return call_int_hook(shm_alloc_security, 0, shp); } void security_shm_free(struct shmid_kernel *shp) { - security_ops->shm_free_security(shp); + call_void_hook(shm_free_security, shp); } int security_shm_associate(struct shmid_kernel *shp, int shmflg) { - return security_ops->shm_associate(shp, shmflg); + return call_int_hook(shm_associate, 0, shp, shmflg); } int security_shm_shmctl(struct shmid_kernel *shp, int cmd) { - return security_ops->shm_shmctl(shp, cmd); + return call_int_hook(shm_shmctl, 0, shp, cmd); } int security_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, int shmflg) { - return security_ops->shm_shmat(shp, shmaddr, shmflg); + return call_int_hook(shm_shmat, 0, shp, shmaddr, shmflg); } int security_sem_alloc(struct sem_array *sma) { - return security_ops->sem_alloc_security(sma); + return call_int_hook(sem_alloc_security, 0, sma); } void security_sem_free(struct sem_array *sma) { - security_ops->sem_free_security(sma); + call_void_hook(sem_free_security, sma); } int security_sem_associate(struct sem_array *sma, int semflg) { - return security_ops->sem_associate(sma, semflg); + return call_int_hook(sem_associate, 0, sma, semflg); } int security_sem_semctl(struct sem_array *sma, int cmd) { - return security_ops->sem_semctl(sma, cmd); + return call_int_hook(sem_semctl, 0, sma, cmd); } int security_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops, int alter) { - return security_ops->sem_semop(sma, sops, nsops, alter); + return call_int_hook(sem_semop, 0, sma, sops, nsops, alter); } void security_d_instantiate(struct dentry *dentry, struct inode *inode) { if (unlikely(inode && IS_PRIVATE(inode))) return; - security_ops->d_instantiate(dentry, inode); + call_void_hook(d_instantiate, dentry, inode); } EXPORT_SYMBOL(security_d_instantiate); int security_getprocattr(struct task_struct *p, char *name, char **value) { - return security_ops->getprocattr(p, name, value); + return call_int_hook(getprocattr, -EINVAL, p, name, value); } int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size) { - return security_ops->setprocattr(p, name, value, size); + return call_int_hook(setprocattr, -EINVAL, p, name, value, size); } int security_netlink_send(struct sock *sk, struct sk_buff *skb) { - return security_ops->netlink_send(sk, skb); + return call_int_hook(netlink_send, 0, sk, skb); } int security_ismaclabel(const char *name) { - return security_ops->ismaclabel(name); + return call_int_hook(ismaclabel, 0, name); } EXPORT_SYMBOL(security_ismaclabel); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) { - return security_ops->secid_to_secctx(secid, secdata, seclen); + return call_int_hook(secid_to_secctx, -EOPNOTSUPP, secid, secdata, + seclen); } EXPORT_SYMBOL(security_secid_to_secctx); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { - return security_ops->secctx_to_secid(secdata, seclen, secid); + *secid = 0; + return call_int_hook(secctx_to_secid, 0, secdata, seclen, secid); } EXPORT_SYMBOL(security_secctx_to_secid); void security_release_secctx(char *secdata, u32 seclen) { - security_ops->release_secctx(secdata, seclen); + call_void_hook(release_secctx, secdata, seclen); } EXPORT_SYMBOL(security_release_secctx); int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen) { - return security_ops->inode_notifysecctx(inode, ctx, ctxlen); + return call_int_hook(inode_notifysecctx, 0, inode, ctx, ctxlen); } EXPORT_SYMBOL(security_inode_notifysecctx); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) { - return security_ops->inode_setsecctx(dentry, ctx, ctxlen); + return call_int_hook(inode_setsecctx, 0, dentry, ctx, ctxlen); } EXPORT_SYMBOL(security_inode_setsecctx); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return security_ops->inode_getsecctx(inode, ctx, ctxlen); + return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); } EXPORT_SYMBOL(security_inode_getsecctx); @@ -1157,206 +1188,207 @@ EXPORT_SYMBOL(security_inode_getsecctx); int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk) { - return security_ops->unix_stream_connect(sock, other, newsk); + return call_int_hook(unix_stream_connect, 0, sock, other, newsk); } EXPORT_SYMBOL(security_unix_stream_connect); int security_unix_may_send(struct socket *sock, struct socket *other) { - return security_ops->unix_may_send(sock, other); + return call_int_hook(unix_may_send, 0, sock, other); } EXPORT_SYMBOL(security_unix_may_send); int security_socket_create(int family, int type, int protocol, int kern) { - return security_ops->socket_create(family, type, protocol, kern); + return call_int_hook(socket_create, 0, family, type, protocol, kern); } int security_socket_post_create(struct socket *sock, int family, int type, int protocol, int kern) { - return security_ops->socket_post_create(sock, family, type, + return call_int_hook(socket_post_create, 0, sock, family, type, protocol, kern); } int security_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { - return security_ops->socket_bind(sock, address, addrlen); + return call_int_hook(socket_bind, 0, sock, address, addrlen); } int security_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen) { - return security_ops->socket_connect(sock, address, addrlen); + return call_int_hook(socket_connect, 0, sock, address, addrlen); } int security_socket_listen(struct socket *sock, int backlog) { - return security_ops->socket_listen(sock, backlog); + return call_int_hook(socket_listen, 0, sock, backlog); } int security_socket_accept(struct socket *sock, struct socket *newsock) { - return security_ops->socket_accept(sock, newsock); + return call_int_hook(socket_accept, 0, sock, newsock); } int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { - return security_ops->socket_sendmsg(sock, msg, size); + return call_int_hook(socket_sendmsg, 0, sock, msg, size); } int security_socket_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags) { - return security_ops->socket_recvmsg(sock, msg, size, flags); + return call_int_hook(socket_recvmsg, 0, sock, msg, size, flags); } int security_socket_getsockname(struct socket *sock) { - return security_ops->socket_getsockname(sock); + return call_int_hook(socket_getsockname, 0, sock); } int security_socket_getpeername(struct socket *sock) { - return security_ops->socket_getpeername(sock); + return call_int_hook(socket_getpeername, 0, sock); } int security_socket_getsockopt(struct socket *sock, int level, int optname) { - return security_ops->socket_getsockopt(sock, level, optname); + return call_int_hook(socket_getsockopt, 0, sock, level, optname); } int security_socket_setsockopt(struct socket *sock, int level, int optname) { - return security_ops->socket_setsockopt(sock, level, optname); + return call_int_hook(socket_setsockopt, 0, sock, level, optname); } int security_socket_shutdown(struct socket *sock, int how) { - return security_ops->socket_shutdown(sock, how); + return call_int_hook(socket_shutdown, 0, sock, how); } int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { - return security_ops->socket_sock_rcv_skb(sk, skb); + return call_int_hook(socket_sock_rcv_skb, 0, sk, skb); } EXPORT_SYMBOL(security_sock_rcv_skb); int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, int __user *optlen, unsigned len) { - return security_ops->socket_getpeersec_stream(sock, optval, optlen, len); + return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, + optval, optlen, len); } int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { - return security_ops->socket_getpeersec_dgram(sock, skb, secid); + return call_int_hook(socket_getpeersec_dgram, 0, sock, skb, secid); } EXPORT_SYMBOL(security_socket_getpeersec_dgram); int security_sk_alloc(struct sock *sk, int family, gfp_t priority) { - return security_ops->sk_alloc_security(sk, family, priority); + return call_int_hook(sk_alloc_security, 0, sk, family, priority); } void security_sk_free(struct sock *sk) { - security_ops->sk_free_security(sk); + call_void_hook(sk_free_security, sk); } void security_sk_clone(const struct sock *sk, struct sock *newsk) { - security_ops->sk_clone_security(sk, newsk); + call_void_hook(sk_clone_security, sk, newsk); } EXPORT_SYMBOL(security_sk_clone); void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - security_ops->sk_getsecid(sk, &fl->flowi_secid); + call_void_hook(sk_getsecid, sk, &fl->flowi_secid); } EXPORT_SYMBOL(security_sk_classify_flow); void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) { - security_ops->req_classify_flow(req, fl); + call_void_hook(req_classify_flow, req, fl); } EXPORT_SYMBOL(security_req_classify_flow); void security_sock_graft(struct sock *sk, struct socket *parent) { - security_ops->sock_graft(sk, parent); + call_void_hook(sock_graft, sk, parent); } EXPORT_SYMBOL(security_sock_graft); int security_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - return security_ops->inet_conn_request(sk, skb, req); + return call_int_hook(inet_conn_request, 0, sk, skb, req); } EXPORT_SYMBOL(security_inet_conn_request); void security_inet_csk_clone(struct sock *newsk, const struct request_sock *req) { - security_ops->inet_csk_clone(newsk, req); + call_void_hook(inet_csk_clone, newsk, req); } void security_inet_conn_established(struct sock *sk, struct sk_buff *skb) { - security_ops->inet_conn_established(sk, skb); + call_void_hook(inet_conn_established, sk, skb); } int security_secmark_relabel_packet(u32 secid) { - return security_ops->secmark_relabel_packet(secid); + return call_int_hook(secmark_relabel_packet, 0, secid); } EXPORT_SYMBOL(security_secmark_relabel_packet); void security_secmark_refcount_inc(void) { - security_ops->secmark_refcount_inc(); + call_void_hook(secmark_refcount_inc); } EXPORT_SYMBOL(security_secmark_refcount_inc); void security_secmark_refcount_dec(void) { - security_ops->secmark_refcount_dec(); + call_void_hook(secmark_refcount_dec); } EXPORT_SYMBOL(security_secmark_refcount_dec); int security_tun_dev_alloc_security(void **security) { - return security_ops->tun_dev_alloc_security(security); + return call_int_hook(tun_dev_alloc_security, 0, security); } EXPORT_SYMBOL(security_tun_dev_alloc_security); void security_tun_dev_free_security(void *security) { - security_ops->tun_dev_free_security(security); + call_void_hook(tun_dev_free_security, security); } EXPORT_SYMBOL(security_tun_dev_free_security); int security_tun_dev_create(void) { - return security_ops->tun_dev_create(); + return call_int_hook(tun_dev_create, 0); } EXPORT_SYMBOL(security_tun_dev_create); int security_tun_dev_attach_queue(void *security) { - return security_ops->tun_dev_attach_queue(security); + return call_int_hook(tun_dev_attach_queue, 0, security); } EXPORT_SYMBOL(security_tun_dev_attach_queue); int security_tun_dev_attach(struct sock *sk, void *security) { - return security_ops->tun_dev_attach(sk, security); + return call_int_hook(tun_dev_attach, 0, sk, security); } EXPORT_SYMBOL(security_tun_dev_attach); int security_tun_dev_open(void *security) { - return security_ops->tun_dev_open(security); + return call_int_hook(tun_dev_open, 0, security); } EXPORT_SYMBOL(security_tun_dev_open); @@ -1368,71 +1400,89 @@ int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp) { - return security_ops->xfrm_policy_alloc_security(ctxp, sec_ctx, gfp); + return call_int_hook(xfrm_policy_alloc_security, 0, ctxp, sec_ctx, gfp); } EXPORT_SYMBOL(security_xfrm_policy_alloc); int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp) { - return security_ops->xfrm_policy_clone_security(old_ctx, new_ctxp); + return call_int_hook(xfrm_policy_clone_security, 0, old_ctx, new_ctxp); } void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx) { - security_ops->xfrm_policy_free_security(ctx); + call_void_hook(xfrm_policy_free_security, ctx); } EXPORT_SYMBOL(security_xfrm_policy_free); int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - return security_ops->xfrm_policy_delete_security(ctx); + return call_int_hook(xfrm_policy_delete_security, 0, ctx); } int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_state_alloc(x, sec_ctx); + return call_int_hook(xfrm_state_alloc, 0, x, sec_ctx); } EXPORT_SYMBOL(security_xfrm_state_alloc); int security_xfrm_state_alloc_acquire(struct xfrm_state *x, struct xfrm_sec_ctx *polsec, u32 secid) { - return security_ops->xfrm_state_alloc_acquire(x, polsec, secid); + return call_int_hook(xfrm_state_alloc_acquire, 0, x, polsec, secid); } int security_xfrm_state_delete(struct xfrm_state *x) { - return security_ops->xfrm_state_delete_security(x); + return call_int_hook(xfrm_state_delete_security, 0, x); } EXPORT_SYMBOL(security_xfrm_state_delete); void security_xfrm_state_free(struct xfrm_state *x) { - security_ops->xfrm_state_free_security(x); + call_void_hook(xfrm_state_free_security, x); } int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) { - return security_ops->xfrm_policy_lookup(ctx, fl_secid, dir); + return call_int_hook(xfrm_policy_lookup, 0, ctx, fl_secid, dir); } int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, const struct flowi *fl) { - return security_ops->xfrm_state_pol_flow_match(x, xp, fl); + struct security_hook_list *hp; + int rc = 1; + + /* + * Since this function is expected to return 0 or 1, the judgment + * becomes difficult if multiple LSMs supply this call. Fortunately, + * we can use the first LSM's judgment because currently only SELinux + * supplies this call. + * + * For speed optimization, we explicitly break the loop rather than + * using the macro + */ + list_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match, + list) { + rc = hp->hook.xfrm_state_pol_flow_match(x, xp, fl); + break; + } + return rc; } int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) { - return security_ops->xfrm_decode_session(skb, secid, 1); + return call_int_hook(xfrm_decode_session, 0, skb, secid, 1); } void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) { - int rc = security_ops->xfrm_decode_session(skb, &fl->flowi_secid, 0); + int rc = call_int_hook(xfrm_decode_session, 0, skb, &fl->flowi_secid, + 0); BUG_ON(rc); } @@ -1445,23 +1495,24 @@ EXPORT_SYMBOL(security_skb_classify_flow); int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { - return security_ops->key_alloc(key, cred, flags); + return call_int_hook(key_alloc, 0, key, cred, flags); } void security_key_free(struct key *key) { - security_ops->key_free(key); + call_void_hook(key_free, key); } int security_key_permission(key_ref_t key_ref, const struct cred *cred, unsigned perm) { - return security_ops->key_permission(key_ref, cred, perm); + return call_int_hook(key_permission, 0, key_ref, cred, perm); } int security_key_getsecurity(struct key *key, char **_buffer) { - return security_ops->key_getsecurity(key, _buffer); + *_buffer = NULL; + return call_int_hook(key_getsecurity, 0, key, _buffer); } #endif /* CONFIG_KEYS */ @@ -1470,23 +1521,369 @@ int security_key_getsecurity(struct key *key, char **_buffer) int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule) { - return security_ops->audit_rule_init(field, op, rulestr, lsmrule); + return call_int_hook(audit_rule_init, 0, field, op, rulestr, lsmrule); } int security_audit_rule_known(struct audit_krule *krule) { - return security_ops->audit_rule_known(krule); + return call_int_hook(audit_rule_known, 0, krule); } void security_audit_rule_free(void *lsmrule) { - security_ops->audit_rule_free(lsmrule); + call_void_hook(audit_rule_free, lsmrule); } int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule, struct audit_context *actx) { - return security_ops->audit_rule_match(secid, field, op, lsmrule, actx); + return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule, + actx); } +#endif /* CONFIG_AUDIT */ +struct security_hook_heads security_hook_heads = { + .binder_set_context_mgr = + LIST_HEAD_INIT(security_hook_heads.binder_set_context_mgr), + .binder_transaction = + LIST_HEAD_INIT(security_hook_heads.binder_transaction), + .binder_transfer_binder = + LIST_HEAD_INIT(security_hook_heads.binder_transfer_binder), + .binder_transfer_file = + LIST_HEAD_INIT(security_hook_heads.binder_transfer_file), + + .ptrace_access_check = + LIST_HEAD_INIT(security_hook_heads.ptrace_access_check), + .ptrace_traceme = + LIST_HEAD_INIT(security_hook_heads.ptrace_traceme), + .capget = LIST_HEAD_INIT(security_hook_heads.capget), + .capset = LIST_HEAD_INIT(security_hook_heads.capset), + .capable = LIST_HEAD_INIT(security_hook_heads.capable), + .quotactl = LIST_HEAD_INIT(security_hook_heads.quotactl), + .quota_on = LIST_HEAD_INIT(security_hook_heads.quota_on), + .syslog = LIST_HEAD_INIT(security_hook_heads.syslog), + .settime = LIST_HEAD_INIT(security_hook_heads.settime), + .vm_enough_memory = + LIST_HEAD_INIT(security_hook_heads.vm_enough_memory), + .bprm_set_creds = + LIST_HEAD_INIT(security_hook_heads.bprm_set_creds), + .bprm_check_security = + LIST_HEAD_INIT(security_hook_heads.bprm_check_security), + .bprm_secureexec = + LIST_HEAD_INIT(security_hook_heads.bprm_secureexec), + .bprm_committing_creds = + LIST_HEAD_INIT(security_hook_heads.bprm_committing_creds), + .bprm_committed_creds = + LIST_HEAD_INIT(security_hook_heads.bprm_committed_creds), + .sb_alloc_security = + LIST_HEAD_INIT(security_hook_heads.sb_alloc_security), + .sb_free_security = + LIST_HEAD_INIT(security_hook_heads.sb_free_security), + .sb_copy_data = LIST_HEAD_INIT(security_hook_heads.sb_copy_data), + .sb_remount = LIST_HEAD_INIT(security_hook_heads.sb_remount), + .sb_kern_mount = + LIST_HEAD_INIT(security_hook_heads.sb_kern_mount), + .sb_show_options = + LIST_HEAD_INIT(security_hook_heads.sb_show_options), + .sb_statfs = LIST_HEAD_INIT(security_hook_heads.sb_statfs), + .sb_mount = LIST_HEAD_INIT(security_hook_heads.sb_mount), + .sb_umount = LIST_HEAD_INIT(security_hook_heads.sb_umount), + .sb_pivotroot = LIST_HEAD_INIT(security_hook_heads.sb_pivotroot), + .sb_set_mnt_opts = + LIST_HEAD_INIT(security_hook_heads.sb_set_mnt_opts), + .sb_clone_mnt_opts = + LIST_HEAD_INIT(security_hook_heads.sb_clone_mnt_opts), + .sb_parse_opts_str = + LIST_HEAD_INIT(security_hook_heads.sb_parse_opts_str), + .dentry_init_security = + LIST_HEAD_INIT(security_hook_heads.dentry_init_security), +#ifdef CONFIG_SECURITY_PATH + .path_unlink = LIST_HEAD_INIT(security_hook_heads.path_unlink), + .path_mkdir = LIST_HEAD_INIT(security_hook_heads.path_mkdir), + .path_rmdir = LIST_HEAD_INIT(security_hook_heads.path_rmdir), + .path_mknod = LIST_HEAD_INIT(security_hook_heads.path_mknod), + .path_truncate = + LIST_HEAD_INIT(security_hook_heads.path_truncate), + .path_symlink = LIST_HEAD_INIT(security_hook_heads.path_symlink), + .path_link = LIST_HEAD_INIT(security_hook_heads.path_link), + .path_rename = LIST_HEAD_INIT(security_hook_heads.path_rename), + .path_chmod = LIST_HEAD_INIT(security_hook_heads.path_chmod), + .path_chown = LIST_HEAD_INIT(security_hook_heads.path_chown), + .path_chroot = LIST_HEAD_INIT(security_hook_heads.path_chroot), +#endif + .inode_alloc_security = + LIST_HEAD_INIT(security_hook_heads.inode_alloc_security), + .inode_free_security = + LIST_HEAD_INIT(security_hook_heads.inode_free_security), + .inode_init_security = + LIST_HEAD_INIT(security_hook_heads.inode_init_security), + .inode_create = LIST_HEAD_INIT(security_hook_heads.inode_create), + .inode_link = LIST_HEAD_INIT(security_hook_heads.inode_link), + .inode_unlink = LIST_HEAD_INIT(security_hook_heads.inode_unlink), + .inode_symlink = + LIST_HEAD_INIT(security_hook_heads.inode_symlink), + .inode_mkdir = LIST_HEAD_INIT(security_hook_heads.inode_mkdir), + .inode_rmdir = LIST_HEAD_INIT(security_hook_heads.inode_rmdir), + .inode_mknod = LIST_HEAD_INIT(security_hook_heads.inode_mknod), + .inode_rename = LIST_HEAD_INIT(security_hook_heads.inode_rename), + .inode_readlink = + LIST_HEAD_INIT(security_hook_heads.inode_readlink), + .inode_follow_link = + LIST_HEAD_INIT(security_hook_heads.inode_follow_link), + .inode_permission = + LIST_HEAD_INIT(security_hook_heads.inode_permission), + .inode_setattr = + LIST_HEAD_INIT(security_hook_heads.inode_setattr), + .inode_getattr = + LIST_HEAD_INIT(security_hook_heads.inode_getattr), + .inode_setxattr = + LIST_HEAD_INIT(security_hook_heads.inode_setxattr), + .inode_post_setxattr = + LIST_HEAD_INIT(security_hook_heads.inode_post_setxattr), + .inode_getxattr = + LIST_HEAD_INIT(security_hook_heads.inode_getxattr), + .inode_listxattr = + LIST_HEAD_INIT(security_hook_heads.inode_listxattr), + .inode_removexattr = + LIST_HEAD_INIT(security_hook_heads.inode_removexattr), + .inode_need_killpriv = + LIST_HEAD_INIT(security_hook_heads.inode_need_killpriv), + .inode_killpriv = + LIST_HEAD_INIT(security_hook_heads.inode_killpriv), + .inode_getsecurity = + LIST_HEAD_INIT(security_hook_heads.inode_getsecurity), + .inode_setsecurity = + LIST_HEAD_INIT(security_hook_heads.inode_setsecurity), + .inode_listsecurity = + LIST_HEAD_INIT(security_hook_heads.inode_listsecurity), + .inode_getsecid = + LIST_HEAD_INIT(security_hook_heads.inode_getsecid), + .file_permission = + LIST_HEAD_INIT(security_hook_heads.file_permission), + .file_alloc_security = + LIST_HEAD_INIT(security_hook_heads.file_alloc_security), + .file_free_security = + LIST_HEAD_INIT(security_hook_heads.file_free_security), + .file_ioctl = LIST_HEAD_INIT(security_hook_heads.file_ioctl), + .mmap_addr = LIST_HEAD_INIT(security_hook_heads.mmap_addr), + .mmap_file = LIST_HEAD_INIT(security_hook_heads.mmap_file), + .file_mprotect = + LIST_HEAD_INIT(security_hook_heads.file_mprotect), + .file_lock = LIST_HEAD_INIT(security_hook_heads.file_lock), + .file_fcntl = LIST_HEAD_INIT(security_hook_heads.file_fcntl), + .file_set_fowner = + LIST_HEAD_INIT(security_hook_heads.file_set_fowner), + .file_send_sigiotask = + LIST_HEAD_INIT(security_hook_heads.file_send_sigiotask), + .file_receive = LIST_HEAD_INIT(security_hook_heads.file_receive), + .file_open = LIST_HEAD_INIT(security_hook_heads.file_open), + .task_create = LIST_HEAD_INIT(security_hook_heads.task_create), + .task_free = LIST_HEAD_INIT(security_hook_heads.task_free), + .cred_alloc_blank = + LIST_HEAD_INIT(security_hook_heads.cred_alloc_blank), + .cred_free = LIST_HEAD_INIT(security_hook_heads.cred_free), + .cred_prepare = LIST_HEAD_INIT(security_hook_heads.cred_prepare), + .cred_transfer = + LIST_HEAD_INIT(security_hook_heads.cred_transfer), + .kernel_act_as = + LIST_HEAD_INIT(security_hook_heads.kernel_act_as), + .kernel_create_files_as = + LIST_HEAD_INIT(security_hook_heads.kernel_create_files_as), + .kernel_fw_from_file = + LIST_HEAD_INIT(security_hook_heads.kernel_fw_from_file), + .kernel_module_request = + LIST_HEAD_INIT(security_hook_heads.kernel_module_request), + .kernel_module_from_file = + LIST_HEAD_INIT(security_hook_heads.kernel_module_from_file), + .task_fix_setuid = + LIST_HEAD_INIT(security_hook_heads.task_fix_setuid), + .task_setpgid = LIST_HEAD_INIT(security_hook_heads.task_setpgid), + .task_getpgid = LIST_HEAD_INIT(security_hook_heads.task_getpgid), + .task_getsid = LIST_HEAD_INIT(security_hook_heads.task_getsid), + .task_getsecid = + LIST_HEAD_INIT(security_hook_heads.task_getsecid), + .task_setnice = LIST_HEAD_INIT(security_hook_heads.task_setnice), + .task_setioprio = + LIST_HEAD_INIT(security_hook_heads.task_setioprio), + .task_getioprio = + LIST_HEAD_INIT(security_hook_heads.task_getioprio), + .task_setrlimit = + LIST_HEAD_INIT(security_hook_heads.task_setrlimit), + .task_setscheduler = + LIST_HEAD_INIT(security_hook_heads.task_setscheduler), + .task_getscheduler = + LIST_HEAD_INIT(security_hook_heads.task_getscheduler), + .task_movememory = + LIST_HEAD_INIT(security_hook_heads.task_movememory), + .task_kill = LIST_HEAD_INIT(security_hook_heads.task_kill), + .task_wait = LIST_HEAD_INIT(security_hook_heads.task_wait), + .task_prctl = LIST_HEAD_INIT(security_hook_heads.task_prctl), + .task_to_inode = + LIST_HEAD_INIT(security_hook_heads.task_to_inode), + .ipc_permission = + LIST_HEAD_INIT(security_hook_heads.ipc_permission), + .ipc_getsecid = LIST_HEAD_INIT(security_hook_heads.ipc_getsecid), + .msg_msg_alloc_security = + LIST_HEAD_INIT(security_hook_heads.msg_msg_alloc_security), + .msg_msg_free_security = + LIST_HEAD_INIT(security_hook_heads.msg_msg_free_security), + .msg_queue_alloc_security = + LIST_HEAD_INIT(security_hook_heads.msg_queue_alloc_security), + .msg_queue_free_security = + LIST_HEAD_INIT(security_hook_heads.msg_queue_free_security), + .msg_queue_associate = + LIST_HEAD_INIT(security_hook_heads.msg_queue_associate), + .msg_queue_msgctl = + LIST_HEAD_INIT(security_hook_heads.msg_queue_msgctl), + .msg_queue_msgsnd = + LIST_HEAD_INIT(security_hook_heads.msg_queue_msgsnd), + .msg_queue_msgrcv = + LIST_HEAD_INIT(security_hook_heads.msg_queue_msgrcv), + .shm_alloc_security = + LIST_HEAD_INIT(security_hook_heads.shm_alloc_security), + .shm_free_security = + LIST_HEAD_INIT(security_hook_heads.shm_free_security), + .shm_associate = + LIST_HEAD_INIT(security_hook_heads.shm_associate), + .shm_shmctl = LIST_HEAD_INIT(security_hook_heads.shm_shmctl), + .shm_shmat = LIST_HEAD_INIT(security_hook_heads.shm_shmat), + .sem_alloc_security = + LIST_HEAD_INIT(security_hook_heads.sem_alloc_security), + .sem_free_security = + LIST_HEAD_INIT(security_hook_heads.sem_free_security), + .sem_associate = + LIST_HEAD_INIT(security_hook_heads.sem_associate), + .sem_semctl = LIST_HEAD_INIT(security_hook_heads.sem_semctl), + .sem_semop = LIST_HEAD_INIT(security_hook_heads.sem_semop), + .netlink_send = LIST_HEAD_INIT(security_hook_heads.netlink_send), + .d_instantiate = + LIST_HEAD_INIT(security_hook_heads.d_instantiate), + .getprocattr = LIST_HEAD_INIT(security_hook_heads.getprocattr), + .setprocattr = LIST_HEAD_INIT(security_hook_heads.setprocattr), + .ismaclabel = LIST_HEAD_INIT(security_hook_heads.ismaclabel), + .secid_to_secctx = + LIST_HEAD_INIT(security_hook_heads.secid_to_secctx), + .secctx_to_secid = + LIST_HEAD_INIT(security_hook_heads.secctx_to_secid), + .release_secctx = + LIST_HEAD_INIT(security_hook_heads.release_secctx), + .inode_notifysecctx = + LIST_HEAD_INIT(security_hook_heads.inode_notifysecctx), + .inode_setsecctx = + LIST_HEAD_INIT(security_hook_heads.inode_setsecctx), + .inode_getsecctx = + LIST_HEAD_INIT(security_hook_heads.inode_getsecctx), +#ifdef CONFIG_SECURITY_NETWORK + .unix_stream_connect = + LIST_HEAD_INIT(security_hook_heads.unix_stream_connect), + .unix_may_send = + LIST_HEAD_INIT(security_hook_heads.unix_may_send), + .socket_create = + LIST_HEAD_INIT(security_hook_heads.socket_create), + .socket_post_create = + LIST_HEAD_INIT(security_hook_heads.socket_post_create), + .socket_bind = LIST_HEAD_INIT(security_hook_heads.socket_bind), + .socket_connect = + LIST_HEAD_INIT(security_hook_heads.socket_connect), + .socket_listen = + LIST_HEAD_INIT(security_hook_heads.socket_listen), + .socket_accept = + LIST_HEAD_INIT(security_hook_heads.socket_accept), + .socket_sendmsg = + LIST_HEAD_INIT(security_hook_heads.socket_sendmsg), + .socket_recvmsg = + LIST_HEAD_INIT(security_hook_heads.socket_recvmsg), + .socket_getsockname = + LIST_HEAD_INIT(security_hook_heads.socket_getsockname), + .socket_getpeername = + LIST_HEAD_INIT(security_hook_heads.socket_getpeername), + .socket_getsockopt = + LIST_HEAD_INIT(security_hook_heads.socket_getsockopt), + .socket_setsockopt = + LIST_HEAD_INIT(security_hook_heads.socket_setsockopt), + .socket_shutdown = + LIST_HEAD_INIT(security_hook_heads.socket_shutdown), + .socket_sock_rcv_skb = + LIST_HEAD_INIT(security_hook_heads.socket_sock_rcv_skb), + .socket_getpeersec_stream = + LIST_HEAD_INIT(security_hook_heads.socket_getpeersec_stream), + .socket_getpeersec_dgram = + LIST_HEAD_INIT(security_hook_heads.socket_getpeersec_dgram), + .sk_alloc_security = + LIST_HEAD_INIT(security_hook_heads.sk_alloc_security), + .sk_free_security = + LIST_HEAD_INIT(security_hook_heads.sk_free_security), + .sk_clone_security = + LIST_HEAD_INIT(security_hook_heads.sk_clone_security), + .sk_getsecid = LIST_HEAD_INIT(security_hook_heads.sk_getsecid), + .sock_graft = LIST_HEAD_INIT(security_hook_heads.sock_graft), + .inet_conn_request = + LIST_HEAD_INIT(security_hook_heads.inet_conn_request), + .inet_csk_clone = + LIST_HEAD_INIT(security_hook_heads.inet_csk_clone), + .inet_conn_established = + LIST_HEAD_INIT(security_hook_heads.inet_conn_established), + .secmark_relabel_packet = + LIST_HEAD_INIT(security_hook_heads.secmark_relabel_packet), + .secmark_refcount_inc = + LIST_HEAD_INIT(security_hook_heads.secmark_refcount_inc), + .secmark_refcount_dec = + LIST_HEAD_INIT(security_hook_heads.secmark_refcount_dec), + .req_classify_flow = + LIST_HEAD_INIT(security_hook_heads.req_classify_flow), + .tun_dev_alloc_security = + LIST_HEAD_INIT(security_hook_heads.tun_dev_alloc_security), + .tun_dev_free_security = + LIST_HEAD_INIT(security_hook_heads.tun_dev_free_security), + .tun_dev_create = + LIST_HEAD_INIT(security_hook_heads.tun_dev_create), + .tun_dev_attach_queue = + LIST_HEAD_INIT(security_hook_heads.tun_dev_attach_queue), + .tun_dev_attach = + LIST_HEAD_INIT(security_hook_heads.tun_dev_attach), + .tun_dev_open = LIST_HEAD_INIT(security_hook_heads.tun_dev_open), + .skb_owned_by = LIST_HEAD_INIT(security_hook_heads.skb_owned_by), +#endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM + .xfrm_policy_alloc_security = + LIST_HEAD_INIT(security_hook_heads.xfrm_policy_alloc_security), + .xfrm_policy_clone_security = + LIST_HEAD_INIT(security_hook_heads.xfrm_policy_clone_security), + .xfrm_policy_free_security = + LIST_HEAD_INIT(security_hook_heads.xfrm_policy_free_security), + .xfrm_policy_delete_security = + LIST_HEAD_INIT(security_hook_heads.xfrm_policy_delete_security), + .xfrm_state_alloc = + LIST_HEAD_INIT(security_hook_heads.xfrm_state_alloc), + .xfrm_state_alloc_acquire = + LIST_HEAD_INIT(security_hook_heads.xfrm_state_alloc_acquire), + .xfrm_state_free_security = + LIST_HEAD_INIT(security_hook_heads.xfrm_state_free_security), + .xfrm_state_delete_security = + LIST_HEAD_INIT(security_hook_heads.xfrm_state_delete_security), + .xfrm_policy_lookup = + LIST_HEAD_INIT(security_hook_heads.xfrm_policy_lookup), + .xfrm_state_pol_flow_match = + LIST_HEAD_INIT(security_hook_heads.xfrm_state_pol_flow_match), + .xfrm_decode_session = + LIST_HEAD_INIT(security_hook_heads.xfrm_decode_session), +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ +#ifdef CONFIG_KEYS + .key_alloc = LIST_HEAD_INIT(security_hook_heads.key_alloc), + .key_free = LIST_HEAD_INIT(security_hook_heads.key_free), + .key_permission = + LIST_HEAD_INIT(security_hook_heads.key_permission), + .key_getsecurity = + LIST_HEAD_INIT(security_hook_heads.key_getsecurity), +#endif /* CONFIG_KEYS */ +#ifdef CONFIG_AUDIT + .audit_rule_init = + LIST_HEAD_INIT(security_hook_heads.audit_rule_init), + .audit_rule_known = + LIST_HEAD_INIT(security_hook_heads.audit_rule_known), + .audit_rule_match = + LIST_HEAD_INIT(security_hook_heads.audit_rule_match), + .audit_rule_free = + LIST_HEAD_INIT(security_hook_heads.audit_rule_free), #endif /* CONFIG_AUDIT */ +}; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ffa5a642629a..623108199641 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -29,7 +29,7 @@ #include <linux/tracehook.h> #include <linux/errno.h> #include <linux/sched.h> -#include <linux/security.h> +#include <linux/lsm_hooks.h> #include <linux/xattr.h> #include <linux/capability.h> #include <linux/unistd.h> @@ -403,6 +403,7 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) return sbsec->behavior == SECURITY_FS_USE_XATTR || sbsec->behavior == SECURITY_FS_USE_TRANS || sbsec->behavior == SECURITY_FS_USE_TASK || + sbsec->behavior == SECURITY_FS_USE_NATIVE || /* Special handling. Genfs but also in-core setxattr handler */ !strcmp(sb->s_type->name, "sysfs") || !strcmp(sb->s_type->name, "pstore") || @@ -724,7 +725,12 @@ static int selinux_set_mnt_opts(struct super_block *sb, } if (strcmp(sb->s_type->name, "proc") == 0) - sbsec->flags |= SE_SBPROC; + sbsec->flags |= SE_SBPROC | SE_SBGENFS; + + if (!strcmp(sb->s_type->name, "debugfs") || + !strcmp(sb->s_type->name, "sysfs") || + !strcmp(sb->s_type->name, "pstore")) + sbsec->flags |= SE_SBGENFS; if (!sbsec->behavior) { /* @@ -1188,8 +1194,6 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc switch (protocol) { case NETLINK_ROUTE: return SECCLASS_NETLINK_ROUTE_SOCKET; - case NETLINK_FIREWALL: - return SECCLASS_NETLINK_FIREWALL_SOCKET; case NETLINK_SOCK_DIAG: return SECCLASS_NETLINK_TCPDIAG_SOCKET; case NETLINK_NFLOG: @@ -1198,14 +1202,28 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc return SECCLASS_NETLINK_XFRM_SOCKET; case NETLINK_SELINUX: return SECCLASS_NETLINK_SELINUX_SOCKET; + case NETLINK_ISCSI: + return SECCLASS_NETLINK_ISCSI_SOCKET; case NETLINK_AUDIT: return SECCLASS_NETLINK_AUDIT_SOCKET; - case NETLINK_IP6_FW: - return SECCLASS_NETLINK_IP6FW_SOCKET; + case NETLINK_FIB_LOOKUP: + return SECCLASS_NETLINK_FIB_LOOKUP_SOCKET; + case NETLINK_CONNECTOR: + return SECCLASS_NETLINK_CONNECTOR_SOCKET; + case NETLINK_NETFILTER: + return SECCLASS_NETLINK_NETFILTER_SOCKET; case NETLINK_DNRTMSG: return SECCLASS_NETLINK_DNRT_SOCKET; case NETLINK_KOBJECT_UEVENT: return SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET; + case NETLINK_GENERIC: + return SECCLASS_NETLINK_GENERIC_SOCKET; + case NETLINK_SCSITRANSPORT: + return SECCLASS_NETLINK_SCSITRANSPORT_SOCKET; + case NETLINK_RDMA: + return SECCLASS_NETLINK_RDMA_SOCKET; + case NETLINK_CRYPTO: + return SECCLASS_NETLINK_CRYPTO_SOCKET; default: return SECCLASS_NETLINK_SOCKET; } @@ -1220,12 +1238,13 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc return SECCLASS_SOCKET; } -#ifdef CONFIG_PROC_FS -static int selinux_proc_get_sid(struct dentry *dentry, - u16 tclass, - u32 *sid) +static int selinux_genfs_get_sid(struct dentry *dentry, + u16 tclass, + u16 flags, + u32 *sid) { int rc; + struct super_block *sb = dentry->d_inode->i_sb; char *buffer, *path; buffer = (char *)__get_free_page(GFP_KERNEL); @@ -1236,26 +1255,20 @@ static int selinux_proc_get_sid(struct dentry *dentry, if (IS_ERR(path)) rc = PTR_ERR(path); else { - /* each process gets a /proc/PID/ entry. Strip off the - * PID part to get a valid selinux labeling. - * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */ - while (path[1] >= '0' && path[1] <= '9') { - path[1] = '/'; - path++; + if (flags & SE_SBPROC) { + /* each process gets a /proc/PID/ entry. Strip off the + * PID part to get a valid selinux labeling. + * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */ + while (path[1] >= '0' && path[1] <= '9') { + path[1] = '/'; + path++; + } } - rc = security_genfs_sid("proc", path, tclass, sid); + rc = security_genfs_sid(sb->s_type->name, path, tclass, sid); } free_page((unsigned long)buffer); return rc; } -#else -static int selinux_proc_get_sid(struct dentry *dentry, - u16 tclass, - u32 *sid) -{ - return -EINVAL; -} -#endif /* The inode's security attributes must be initialized before first use. */ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry) @@ -1412,7 +1425,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent /* Default to the fs superblock SID. */ isec->sid = sbsec->sid; - if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) { + if ((sbsec->flags & SE_SBGENFS) && !S_ISLNK(inode->i_mode)) { /* We must have a dentry to determine the label on * procfs inodes */ if (opt_dentry) @@ -1435,7 +1448,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent if (!dentry) goto out_unlock; isec->sclass = inode_mode_to_security_class(inode->i_mode); - rc = selinux_proc_get_sid(dentry, isec->sclass, &sid); + rc = selinux_genfs_get_sid(dentry, isec->sclass, + sbsec->flags, &sid); dput(dentry); if (rc) goto out_unlock; @@ -1990,12 +2004,6 @@ static int selinux_binder_transfer_file(struct task_struct *from, static int selinux_ptrace_access_check(struct task_struct *child, unsigned int mode) { - int rc; - - rc = cap_ptrace_access_check(child, mode); - if (rc) - return rc; - if (mode & PTRACE_MODE_READ) { u32 sid = current_sid(); u32 csid = task_sid(child); @@ -2007,25 +2015,13 @@ static int selinux_ptrace_access_check(struct task_struct *child, static int selinux_ptrace_traceme(struct task_struct *parent) { - int rc; - - rc = cap_ptrace_traceme(parent); - if (rc) - return rc; - return task_has_perm(parent, current, PROCESS__PTRACE); } static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - int error; - - error = current_has_perm(target, PROCESS__GETCAP); - if (error) - return error; - - return cap_capget(target, effective, inheritable, permitted); + return current_has_perm(target, PROCESS__GETCAP); } static int selinux_capset(struct cred *new, const struct cred *old, @@ -2033,13 +2029,6 @@ static int selinux_capset(struct cred *new, const struct cred *old, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { - int error; - - error = cap_capset(new, old, - effective, inheritable, permitted); - if (error) - return error; - return cred_has_perm(old, new, PROCESS__SETCAP); } @@ -2056,12 +2045,6 @@ static int selinux_capset(struct cred *new, const struct cred *old, static int selinux_capable(const struct cred *cred, struct user_namespace *ns, int cap, int audit) { - int rc; - - rc = cap_capable(cred, ns, cap, audit); - if (rc) - return rc; - return cred_has_capability(cred, cap, audit); } @@ -2139,12 +2122,12 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) { int rc, cap_sys_admin = 0; - rc = selinux_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN, - SECURITY_CAP_NOAUDIT); + rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN, + SECURITY_CAP_NOAUDIT); if (rc == 0) cap_sys_admin = 1; - return __vm_enough_memory(mm, pages, cap_sys_admin); + return cap_sys_admin; } /* binprm security operations */ @@ -2193,10 +2176,6 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) struct inode *inode = file_inode(bprm->file); int rc; - rc = cap_bprm_set_creds(bprm); - if (rc) - return rc; - /* SELinux context only depends on initial program or script and not * the script interpreter */ if (bprm->cred_prepared) @@ -2320,7 +2299,7 @@ static int selinux_bprm_secureexec(struct linux_binprm *bprm) PROCESS__NOATSECURE, NULL); } - return (atsecure || cap_bprm_secureexec(bprm)); + return !!atsecure; } static int match_file(const void *p, struct file *file, unsigned fd) @@ -2451,10 +2430,12 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) for (i = 0; i < 3; i++) do_setitimer(i, &itimer, NULL); spin_lock_irq(¤t->sighand->siglock); - if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { - __flush_signals(current); + if (!fatal_signal_pending(current)) { + flush_sigqueue(¤t->pending); + flush_sigqueue(¤t->signal->shared_pending); flush_signal_handlers(current, 1); sigemptyset(¤t->blocked); + recalc_sigpending(); } spin_unlock_irq(¤t->sighand->siglock); } @@ -3144,8 +3125,11 @@ static int selinux_inode_getsecurity(const struct inode *inode, const char *name * and lack of permission just means that we fall back to the * in-core context value, not a denial. */ - error = selinux_capable(current_cred(), &init_user_ns, CAP_MAC_ADMIN, - SECURITY_CAP_NOAUDIT); + error = cap_capable(current_cred(), &init_user_ns, CAP_MAC_ADMIN, + SECURITY_CAP_NOAUDIT); + if (!error) + error = cred_has_capability(current_cred(), CAP_MAC_ADMIN, + SECURITY_CAP_NOAUDIT); if (!error) error = security_sid_to_context_force(isec->sid, &context, &size); @@ -3330,12 +3314,7 @@ error: static int selinux_mmap_addr(unsigned long addr) { - int rc; - - /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc) - return rc; + int rc = 0; if (addr < CONFIG_LSM_MMAP_MIN_ADDR) { u32 sid = current_sid(); @@ -3651,23 +3630,11 @@ static void selinux_task_getsecid(struct task_struct *p, u32 *secid) static int selinux_task_setnice(struct task_struct *p, int nice) { - int rc; - - rc = cap_task_setnice(p, nice); - if (rc) - return rc; - return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_setioprio(struct task_struct *p, int ioprio) { - int rc; - - rc = cap_task_setioprio(p, ioprio); - if (rc) - return rc; - return current_has_perm(p, PROCESS__SETSCHED); } @@ -3693,12 +3660,6 @@ static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, static int selinux_task_setscheduler(struct task_struct *p) { - int rc; - - rc = cap_task_setscheduler(p); - if (rc) - return rc; - return current_has_perm(p, PROCESS__SETSCHED); } @@ -4780,8 +4741,9 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) if (err == -EINVAL) { printk(KERN_WARNING "SELinux: unrecognized netlink message:" - " protocol=%hu nlmsg_type=%hu sclass=%hu\n", - sk->sk_protocol, nlh->nlmsg_type, sksec->sclass); + " protocol=%hu nlmsg_type=%hu sclass=%s\n", + sk->sk_protocol, nlh->nlmsg_type, + secclass_map[sksec->sclass - 1].name); if (!selinux_enforcing || security_get_allow_unknown()) err = 0; } @@ -5109,12 +5071,6 @@ static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops, static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) { - int err; - - err = cap_netlink_send(sk, skb); - if (err) - return err; - return selinux_nlmsg_perm(sk, skb); } @@ -5852,218 +5808,220 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer) #endif -static struct security_operations selinux_ops = { - .name = "selinux", - - .binder_set_context_mgr = selinux_binder_set_context_mgr, - .binder_transaction = selinux_binder_transaction, - .binder_transfer_binder = selinux_binder_transfer_binder, - .binder_transfer_file = selinux_binder_transfer_file, - - .ptrace_access_check = selinux_ptrace_access_check, - .ptrace_traceme = selinux_ptrace_traceme, - .capget = selinux_capget, - .capset = selinux_capset, - .capable = selinux_capable, - .quotactl = selinux_quotactl, - .quota_on = selinux_quota_on, - .syslog = selinux_syslog, - .vm_enough_memory = selinux_vm_enough_memory, - - .netlink_send = selinux_netlink_send, - - .bprm_set_creds = selinux_bprm_set_creds, - .bprm_committing_creds = selinux_bprm_committing_creds, - .bprm_committed_creds = selinux_bprm_committed_creds, - .bprm_secureexec = selinux_bprm_secureexec, - - .sb_alloc_security = selinux_sb_alloc_security, - .sb_free_security = selinux_sb_free_security, - .sb_copy_data = selinux_sb_copy_data, - .sb_remount = selinux_sb_remount, - .sb_kern_mount = selinux_sb_kern_mount, - .sb_show_options = selinux_sb_show_options, - .sb_statfs = selinux_sb_statfs, - .sb_mount = selinux_mount, - .sb_umount = selinux_umount, - .sb_set_mnt_opts = selinux_set_mnt_opts, - .sb_clone_mnt_opts = selinux_sb_clone_mnt_opts, - .sb_parse_opts_str = selinux_parse_opts_str, - - .dentry_init_security = selinux_dentry_init_security, - - .inode_alloc_security = selinux_inode_alloc_security, - .inode_free_security = selinux_inode_free_security, - .inode_init_security = selinux_inode_init_security, - .inode_create = selinux_inode_create, - .inode_link = selinux_inode_link, - .inode_unlink = selinux_inode_unlink, - .inode_symlink = selinux_inode_symlink, - .inode_mkdir = selinux_inode_mkdir, - .inode_rmdir = selinux_inode_rmdir, - .inode_mknod = selinux_inode_mknod, - .inode_rename = selinux_inode_rename, - .inode_readlink = selinux_inode_readlink, - .inode_follow_link = selinux_inode_follow_link, - .inode_permission = selinux_inode_permission, - .inode_setattr = selinux_inode_setattr, - .inode_getattr = selinux_inode_getattr, - .inode_setxattr = selinux_inode_setxattr, - .inode_post_setxattr = selinux_inode_post_setxattr, - .inode_getxattr = selinux_inode_getxattr, - .inode_listxattr = selinux_inode_listxattr, - .inode_removexattr = selinux_inode_removexattr, - .inode_getsecurity = selinux_inode_getsecurity, - .inode_setsecurity = selinux_inode_setsecurity, - .inode_listsecurity = selinux_inode_listsecurity, - .inode_getsecid = selinux_inode_getsecid, - - .file_permission = selinux_file_permission, - .file_alloc_security = selinux_file_alloc_security, - .file_free_security = selinux_file_free_security, - .file_ioctl = selinux_file_ioctl, - .mmap_file = selinux_mmap_file, - .mmap_addr = selinux_mmap_addr, - .file_mprotect = selinux_file_mprotect, - .file_lock = selinux_file_lock, - .file_fcntl = selinux_file_fcntl, - .file_set_fowner = selinux_file_set_fowner, - .file_send_sigiotask = selinux_file_send_sigiotask, - .file_receive = selinux_file_receive, - - .file_open = selinux_file_open, - - .task_create = selinux_task_create, - .cred_alloc_blank = selinux_cred_alloc_blank, - .cred_free = selinux_cred_free, - .cred_prepare = selinux_cred_prepare, - .cred_transfer = selinux_cred_transfer, - .kernel_act_as = selinux_kernel_act_as, - .kernel_create_files_as = selinux_kernel_create_files_as, - .kernel_module_request = selinux_kernel_module_request, - .task_setpgid = selinux_task_setpgid, - .task_getpgid = selinux_task_getpgid, - .task_getsid = selinux_task_getsid, - .task_getsecid = selinux_task_getsecid, - .task_setnice = selinux_task_setnice, - .task_setioprio = selinux_task_setioprio, - .task_getioprio = selinux_task_getioprio, - .task_setrlimit = selinux_task_setrlimit, - .task_setscheduler = selinux_task_setscheduler, - .task_getscheduler = selinux_task_getscheduler, - .task_movememory = selinux_task_movememory, - .task_kill = selinux_task_kill, - .task_wait = selinux_task_wait, - .task_to_inode = selinux_task_to_inode, - - .ipc_permission = selinux_ipc_permission, - .ipc_getsecid = selinux_ipc_getsecid, - - .msg_msg_alloc_security = selinux_msg_msg_alloc_security, - .msg_msg_free_security = selinux_msg_msg_free_security, - - .msg_queue_alloc_security = selinux_msg_queue_alloc_security, - .msg_queue_free_security = selinux_msg_queue_free_security, - .msg_queue_associate = selinux_msg_queue_associate, - .msg_queue_msgctl = selinux_msg_queue_msgctl, - .msg_queue_msgsnd = selinux_msg_queue_msgsnd, - .msg_queue_msgrcv = selinux_msg_queue_msgrcv, - - .shm_alloc_security = selinux_shm_alloc_security, - .shm_free_security = selinux_shm_free_security, - .shm_associate = selinux_shm_associate, - .shm_shmctl = selinux_shm_shmctl, - .shm_shmat = selinux_shm_shmat, - - .sem_alloc_security = selinux_sem_alloc_security, - .sem_free_security = selinux_sem_free_security, - .sem_associate = selinux_sem_associate, - .sem_semctl = selinux_sem_semctl, - .sem_semop = selinux_sem_semop, - - .d_instantiate = selinux_d_instantiate, - - .getprocattr = selinux_getprocattr, - .setprocattr = selinux_setprocattr, - - .ismaclabel = selinux_ismaclabel, - .secid_to_secctx = selinux_secid_to_secctx, - .secctx_to_secid = selinux_secctx_to_secid, - .release_secctx = selinux_release_secctx, - .inode_notifysecctx = selinux_inode_notifysecctx, - .inode_setsecctx = selinux_inode_setsecctx, - .inode_getsecctx = selinux_inode_getsecctx, - - .unix_stream_connect = selinux_socket_unix_stream_connect, - .unix_may_send = selinux_socket_unix_may_send, - - .socket_create = selinux_socket_create, - .socket_post_create = selinux_socket_post_create, - .socket_bind = selinux_socket_bind, - .socket_connect = selinux_socket_connect, - .socket_listen = selinux_socket_listen, - .socket_accept = selinux_socket_accept, - .socket_sendmsg = selinux_socket_sendmsg, - .socket_recvmsg = selinux_socket_recvmsg, - .socket_getsockname = selinux_socket_getsockname, - .socket_getpeername = selinux_socket_getpeername, - .socket_getsockopt = selinux_socket_getsockopt, - .socket_setsockopt = selinux_socket_setsockopt, - .socket_shutdown = selinux_socket_shutdown, - .socket_sock_rcv_skb = selinux_socket_sock_rcv_skb, - .socket_getpeersec_stream = selinux_socket_getpeersec_stream, - .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram, - .sk_alloc_security = selinux_sk_alloc_security, - .sk_free_security = selinux_sk_free_security, - .sk_clone_security = selinux_sk_clone_security, - .sk_getsecid = selinux_sk_getsecid, - .sock_graft = selinux_sock_graft, - .inet_conn_request = selinux_inet_conn_request, - .inet_csk_clone = selinux_inet_csk_clone, - .inet_conn_established = selinux_inet_conn_established, - .secmark_relabel_packet = selinux_secmark_relabel_packet, - .secmark_refcount_inc = selinux_secmark_refcount_inc, - .secmark_refcount_dec = selinux_secmark_refcount_dec, - .req_classify_flow = selinux_req_classify_flow, - .tun_dev_alloc_security = selinux_tun_dev_alloc_security, - .tun_dev_free_security = selinux_tun_dev_free_security, - .tun_dev_create = selinux_tun_dev_create, - .tun_dev_attach_queue = selinux_tun_dev_attach_queue, - .tun_dev_attach = selinux_tun_dev_attach, - .tun_dev_open = selinux_tun_dev_open, +static struct security_hook_list selinux_hooks[] = { + LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), + LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), + LSM_HOOK_INIT(binder_transfer_binder, selinux_binder_transfer_binder), + LSM_HOOK_INIT(binder_transfer_file, selinux_binder_transfer_file), + + LSM_HOOK_INIT(ptrace_access_check, selinux_ptrace_access_check), + LSM_HOOK_INIT(ptrace_traceme, selinux_ptrace_traceme), + LSM_HOOK_INIT(capget, selinux_capget), + LSM_HOOK_INIT(capset, selinux_capset), + LSM_HOOK_INIT(capable, selinux_capable), + LSM_HOOK_INIT(quotactl, selinux_quotactl), + LSM_HOOK_INIT(quota_on, selinux_quota_on), + LSM_HOOK_INIT(syslog, selinux_syslog), + LSM_HOOK_INIT(vm_enough_memory, selinux_vm_enough_memory), + + LSM_HOOK_INIT(netlink_send, selinux_netlink_send), + + LSM_HOOK_INIT(bprm_set_creds, selinux_bprm_set_creds), + LSM_HOOK_INIT(bprm_committing_creds, selinux_bprm_committing_creds), + LSM_HOOK_INIT(bprm_committed_creds, selinux_bprm_committed_creds), + LSM_HOOK_INIT(bprm_secureexec, selinux_bprm_secureexec), + + LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security), + LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security), + LSM_HOOK_INIT(sb_copy_data, selinux_sb_copy_data), + LSM_HOOK_INIT(sb_remount, selinux_sb_remount), + LSM_HOOK_INIT(sb_kern_mount, selinux_sb_kern_mount), + LSM_HOOK_INIT(sb_show_options, selinux_sb_show_options), + LSM_HOOK_INIT(sb_statfs, selinux_sb_statfs), + LSM_HOOK_INIT(sb_mount, selinux_mount), + LSM_HOOK_INIT(sb_umount, selinux_umount), + LSM_HOOK_INIT(sb_set_mnt_opts, selinux_set_mnt_opts), + LSM_HOOK_INIT(sb_clone_mnt_opts, selinux_sb_clone_mnt_opts), + LSM_HOOK_INIT(sb_parse_opts_str, selinux_parse_opts_str), + + LSM_HOOK_INIT(dentry_init_security, selinux_dentry_init_security), + + LSM_HOOK_INIT(inode_alloc_security, selinux_inode_alloc_security), + LSM_HOOK_INIT(inode_free_security, selinux_inode_free_security), + LSM_HOOK_INIT(inode_init_security, selinux_inode_init_security), + LSM_HOOK_INIT(inode_create, selinux_inode_create), + LSM_HOOK_INIT(inode_link, selinux_inode_link), + LSM_HOOK_INIT(inode_unlink, selinux_inode_unlink), + LSM_HOOK_INIT(inode_symlink, selinux_inode_symlink), + LSM_HOOK_INIT(inode_mkdir, selinux_inode_mkdir), + LSM_HOOK_INIT(inode_rmdir, selinux_inode_rmdir), + LSM_HOOK_INIT(inode_mknod, selinux_inode_mknod), + LSM_HOOK_INIT(inode_rename, selinux_inode_rename), + LSM_HOOK_INIT(inode_readlink, selinux_inode_readlink), + LSM_HOOK_INIT(inode_follow_link, selinux_inode_follow_link), + LSM_HOOK_INIT(inode_permission, selinux_inode_permission), + LSM_HOOK_INIT(inode_setattr, selinux_inode_setattr), + LSM_HOOK_INIT(inode_getattr, selinux_inode_getattr), + LSM_HOOK_INIT(inode_setxattr, selinux_inode_setxattr), + LSM_HOOK_INIT(inode_post_setxattr, selinux_inode_post_setxattr), + LSM_HOOK_INIT(inode_getxattr, selinux_inode_getxattr), + LSM_HOOK_INIT(inode_listxattr, selinux_inode_listxattr), + LSM_HOOK_INIT(inode_removexattr, selinux_inode_removexattr), + LSM_HOOK_INIT(inode_getsecurity, selinux_inode_getsecurity), + LSM_HOOK_INIT(inode_setsecurity, selinux_inode_setsecurity), + LSM_HOOK_INIT(inode_listsecurity, selinux_inode_listsecurity), + LSM_HOOK_INIT(inode_getsecid, selinux_inode_getsecid), + + LSM_HOOK_INIT(file_permission, selinux_file_permission), + LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security), + LSM_HOOK_INIT(file_free_security, selinux_file_free_security), + LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl), + LSM_HOOK_INIT(mmap_file, selinux_mmap_file), + LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr), + LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect), + LSM_HOOK_INIT(file_lock, selinux_file_lock), + LSM_HOOK_INIT(file_fcntl, selinux_file_fcntl), + LSM_HOOK_INIT(file_set_fowner, selinux_file_set_fowner), + LSM_HOOK_INIT(file_send_sigiotask, selinux_file_send_sigiotask), + LSM_HOOK_INIT(file_receive, selinux_file_receive), + + LSM_HOOK_INIT(file_open, selinux_file_open), + + LSM_HOOK_INIT(task_create, selinux_task_create), + LSM_HOOK_INIT(cred_alloc_blank, selinux_cred_alloc_blank), + LSM_HOOK_INIT(cred_free, selinux_cred_free), + LSM_HOOK_INIT(cred_prepare, selinux_cred_prepare), + LSM_HOOK_INIT(cred_transfer, selinux_cred_transfer), + LSM_HOOK_INIT(kernel_act_as, selinux_kernel_act_as), + LSM_HOOK_INIT(kernel_create_files_as, selinux_kernel_create_files_as), + LSM_HOOK_INIT(kernel_module_request, selinux_kernel_module_request), + LSM_HOOK_INIT(task_setpgid, selinux_task_setpgid), + LSM_HOOK_INIT(task_getpgid, selinux_task_getpgid), + LSM_HOOK_INIT(task_getsid, selinux_task_getsid), + LSM_HOOK_INIT(task_getsecid, selinux_task_getsecid), + LSM_HOOK_INIT(task_setnice, selinux_task_setnice), + LSM_HOOK_INIT(task_setioprio, selinux_task_setioprio), + LSM_HOOK_INIT(task_getioprio, selinux_task_getioprio), + LSM_HOOK_INIT(task_setrlimit, selinux_task_setrlimit), + LSM_HOOK_INIT(task_setscheduler, selinux_task_setscheduler), + LSM_HOOK_INIT(task_getscheduler, selinux_task_getscheduler), + LSM_HOOK_INIT(task_movememory, selinux_task_movememory), + LSM_HOOK_INIT(task_kill, selinux_task_kill), + LSM_HOOK_INIT(task_wait, selinux_task_wait), + LSM_HOOK_INIT(task_to_inode, selinux_task_to_inode), + + LSM_HOOK_INIT(ipc_permission, selinux_ipc_permission), + LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid), + + LSM_HOOK_INIT(msg_msg_alloc_security, selinux_msg_msg_alloc_security), + LSM_HOOK_INIT(msg_msg_free_security, selinux_msg_msg_free_security), + + LSM_HOOK_INIT(msg_queue_alloc_security, + selinux_msg_queue_alloc_security), + LSM_HOOK_INIT(msg_queue_free_security, selinux_msg_queue_free_security), + LSM_HOOK_INIT(msg_queue_associate, selinux_msg_queue_associate), + LSM_HOOK_INIT(msg_queue_msgctl, selinux_msg_queue_msgctl), + LSM_HOOK_INIT(msg_queue_msgsnd, selinux_msg_queue_msgsnd), + LSM_HOOK_INIT(msg_queue_msgrcv, selinux_msg_queue_msgrcv), + + LSM_HOOK_INIT(shm_alloc_security, selinux_shm_alloc_security), + LSM_HOOK_INIT(shm_free_security, selinux_shm_free_security), + LSM_HOOK_INIT(shm_associate, selinux_shm_associate), + LSM_HOOK_INIT(shm_shmctl, selinux_shm_shmctl), + LSM_HOOK_INIT(shm_shmat, selinux_shm_shmat), + + LSM_HOOK_INIT(sem_alloc_security, selinux_sem_alloc_security), + LSM_HOOK_INIT(sem_free_security, selinux_sem_free_security), + LSM_HOOK_INIT(sem_associate, selinux_sem_associate), + LSM_HOOK_INIT(sem_semctl, selinux_sem_semctl), + LSM_HOOK_INIT(sem_semop, selinux_sem_semop), + + LSM_HOOK_INIT(d_instantiate, selinux_d_instantiate), + + LSM_HOOK_INIT(getprocattr, selinux_getprocattr), + LSM_HOOK_INIT(setprocattr, selinux_setprocattr), + + LSM_HOOK_INIT(ismaclabel, selinux_ismaclabel), + LSM_HOOK_INIT(secid_to_secctx, selinux_secid_to_secctx), + LSM_HOOK_INIT(secctx_to_secid, selinux_secctx_to_secid), + LSM_HOOK_INIT(release_secctx, selinux_release_secctx), + LSM_HOOK_INIT(inode_notifysecctx, selinux_inode_notifysecctx), + LSM_HOOK_INIT(inode_setsecctx, selinux_inode_setsecctx), + LSM_HOOK_INIT(inode_getsecctx, selinux_inode_getsecctx), + + LSM_HOOK_INIT(unix_stream_connect, selinux_socket_unix_stream_connect), + LSM_HOOK_INIT(unix_may_send, selinux_socket_unix_may_send), + + LSM_HOOK_INIT(socket_create, selinux_socket_create), + LSM_HOOK_INIT(socket_post_create, selinux_socket_post_create), + LSM_HOOK_INIT(socket_bind, selinux_socket_bind), + LSM_HOOK_INIT(socket_connect, selinux_socket_connect), + LSM_HOOK_INIT(socket_listen, selinux_socket_listen), + LSM_HOOK_INIT(socket_accept, selinux_socket_accept), + LSM_HOOK_INIT(socket_sendmsg, selinux_socket_sendmsg), + LSM_HOOK_INIT(socket_recvmsg, selinux_socket_recvmsg), + LSM_HOOK_INIT(socket_getsockname, selinux_socket_getsockname), + LSM_HOOK_INIT(socket_getpeername, selinux_socket_getpeername), + LSM_HOOK_INIT(socket_getsockopt, selinux_socket_getsockopt), + LSM_HOOK_INIT(socket_setsockopt, selinux_socket_setsockopt), + LSM_HOOK_INIT(socket_shutdown, selinux_socket_shutdown), + LSM_HOOK_INIT(socket_sock_rcv_skb, selinux_socket_sock_rcv_skb), + LSM_HOOK_INIT(socket_getpeersec_stream, + selinux_socket_getpeersec_stream), + LSM_HOOK_INIT(socket_getpeersec_dgram, selinux_socket_getpeersec_dgram), + LSM_HOOK_INIT(sk_alloc_security, selinux_sk_alloc_security), + LSM_HOOK_INIT(sk_free_security, selinux_sk_free_security), + LSM_HOOK_INIT(sk_clone_security, selinux_sk_clone_security), + LSM_HOOK_INIT(sk_getsecid, selinux_sk_getsecid), + LSM_HOOK_INIT(sock_graft, selinux_sock_graft), + LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request), + LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone), + LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established), + LSM_HOOK_INIT(secmark_relabel_packet, selinux_secmark_relabel_packet), + LSM_HOOK_INIT(secmark_refcount_inc, selinux_secmark_refcount_inc), + LSM_HOOK_INIT(secmark_refcount_dec, selinux_secmark_refcount_dec), + LSM_HOOK_INIT(req_classify_flow, selinux_req_classify_flow), + LSM_HOOK_INIT(tun_dev_alloc_security, selinux_tun_dev_alloc_security), + LSM_HOOK_INIT(tun_dev_free_security, selinux_tun_dev_free_security), + LSM_HOOK_INIT(tun_dev_create, selinux_tun_dev_create), + LSM_HOOK_INIT(tun_dev_attach_queue, selinux_tun_dev_attach_queue), + LSM_HOOK_INIT(tun_dev_attach, selinux_tun_dev_attach), + LSM_HOOK_INIT(tun_dev_open, selinux_tun_dev_open), #ifdef CONFIG_SECURITY_NETWORK_XFRM - .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, - .xfrm_policy_clone_security = selinux_xfrm_policy_clone, - .xfrm_policy_free_security = selinux_xfrm_policy_free, - .xfrm_policy_delete_security = selinux_xfrm_policy_delete, - .xfrm_state_alloc = selinux_xfrm_state_alloc, - .xfrm_state_alloc_acquire = selinux_xfrm_state_alloc_acquire, - .xfrm_state_free_security = selinux_xfrm_state_free, - .xfrm_state_delete_security = selinux_xfrm_state_delete, - .xfrm_policy_lookup = selinux_xfrm_policy_lookup, - .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match, - .xfrm_decode_session = selinux_xfrm_decode_session, + LSM_HOOK_INIT(xfrm_policy_alloc_security, selinux_xfrm_policy_alloc), + LSM_HOOK_INIT(xfrm_policy_clone_security, selinux_xfrm_policy_clone), + LSM_HOOK_INIT(xfrm_policy_free_security, selinux_xfrm_policy_free), + LSM_HOOK_INIT(xfrm_policy_delete_security, selinux_xfrm_policy_delete), + LSM_HOOK_INIT(xfrm_state_alloc, selinux_xfrm_state_alloc), + LSM_HOOK_INIT(xfrm_state_alloc_acquire, + selinux_xfrm_state_alloc_acquire), + LSM_HOOK_INIT(xfrm_state_free_security, selinux_xfrm_state_free), + LSM_HOOK_INIT(xfrm_state_delete_security, selinux_xfrm_state_delete), + LSM_HOOK_INIT(xfrm_policy_lookup, selinux_xfrm_policy_lookup), + LSM_HOOK_INIT(xfrm_state_pol_flow_match, + selinux_xfrm_state_pol_flow_match), + LSM_HOOK_INIT(xfrm_decode_session, selinux_xfrm_decode_session), #endif #ifdef CONFIG_KEYS - .key_alloc = selinux_key_alloc, - .key_free = selinux_key_free, - .key_permission = selinux_key_permission, - .key_getsecurity = selinux_key_getsecurity, + LSM_HOOK_INIT(key_alloc, selinux_key_alloc), + LSM_HOOK_INIT(key_free, selinux_key_free), + LSM_HOOK_INIT(key_permission, selinux_key_permission), + LSM_HOOK_INIT(key_getsecurity, selinux_key_getsecurity), #endif #ifdef CONFIG_AUDIT - .audit_rule_init = selinux_audit_rule_init, - .audit_rule_known = selinux_audit_rule_known, - .audit_rule_match = selinux_audit_rule_match, - .audit_rule_free = selinux_audit_rule_free, + LSM_HOOK_INIT(audit_rule_init, selinux_audit_rule_init), + LSM_HOOK_INIT(audit_rule_known, selinux_audit_rule_known), + LSM_HOOK_INIT(audit_rule_match, selinux_audit_rule_match), + LSM_HOOK_INIT(audit_rule_free, selinux_audit_rule_free), #endif }; static __init int selinux_init(void) { - if (!security_module_enable(&selinux_ops)) { + if (!security_module_enable("selinux")) { selinux_enabled = 0; return 0; } @@ -6085,8 +6043,7 @@ static __init int selinux_init(void) 0, SLAB_PANIC, NULL); avc_init(); - if (register_security(&selinux_ops)) - panic("SELinux: Unable to register with kernel.\n"); + security_add_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks)); if (avc_add_callback(selinux_netcache_avc_callback, AVC_CALLBACK_RESET)) panic("SELinux: Unable to register AVC netcache callback\n"); @@ -6214,7 +6171,7 @@ int selinux_disable(void) selinux_disabled = 1; selinux_enabled = 0; - reset_security_ops(); + security_delete_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks)); /* Try to destroy the avc node cache */ avc_disable(); diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index eccd61b3de8a..5a4eef59aeff 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -2,12 +2,12 @@ "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append" #define COMMON_FILE_PERMS COMMON_FILE_SOCK_PERMS, "unlink", "link", \ - "rename", "execute", "swapon", "quotaon", "mounton", "audit_access", \ + "rename", "execute", "quotaon", "mounton", "audit_access", \ "open", "execmod" #define COMMON_SOCK_PERMS COMMON_FILE_SOCK_PERMS, "bind", "connect", \ "listen", "accept", "getopt", "setopt", "shutdown", "recvfrom", \ - "sendto", "recv_msg", "send_msg", "name_bind" + "sendto", "name_bind" #define COMMON_IPC_PERMS "create", "destroy", "getattr", "setattr", "read", \ "write", "associate", "unix_read", "unix_write" @@ -44,7 +44,7 @@ struct security_class_mapping secclass_map[] = { "audit_control", "setfcap", NULL } }, { "filesystem", { "mount", "remount", "unmount", "getattr", - "relabelfrom", "relabelto", "transition", "associate", "quotamod", + "relabelfrom", "relabelto", "associate", "quotamod", "quotaget", NULL } }, { "file", { COMMON_FILE_PERMS, @@ -67,7 +67,7 @@ struct security_class_mapping secclass_map[] = { { COMMON_SOCK_PERMS, NULL } }, { "tcp_socket", { COMMON_SOCK_PERMS, - "connectto", "newconn", "acceptfrom", "node_bind", "name_connect", + "node_bind", "name_connect", NULL } }, { "udp_socket", { COMMON_SOCK_PERMS, @@ -76,13 +76,9 @@ struct security_class_mapping secclass_map[] = { { COMMON_SOCK_PERMS, "node_bind", NULL } }, { "node", - { "tcp_recv", "tcp_send", "udp_recv", "udp_send", - "rawip_recv", "rawip_send", "enforce_dest", - "dccp_recv", "dccp_send", "recvfrom", "sendto", NULL } }, + { "recvfrom", "sendto", NULL } }, { "netif", - { "tcp_recv", "tcp_send", "udp_recv", "udp_send", - "rawip_recv", "rawip_send", "dccp_recv", "dccp_send", - "ingress", "egress", NULL } }, + { "ingress", "egress", NULL } }, { "netlink_socket", { COMMON_SOCK_PERMS, NULL } }, { "packet_socket", @@ -90,11 +86,9 @@ struct security_class_mapping secclass_map[] = { { "key_socket", { COMMON_SOCK_PERMS, NULL } }, { "unix_stream_socket", - { COMMON_SOCK_PERMS, "connectto", "newconn", "acceptfrom", NULL - } }, + { COMMON_SOCK_PERMS, "connectto", NULL } }, { "unix_dgram_socket", - { COMMON_SOCK_PERMS, NULL - } }, + { COMMON_SOCK_PERMS, NULL } }, { "sem", { COMMON_IPC_PERMS, NULL } }, { "msg", { "send", "receive", NULL } }, @@ -107,9 +101,6 @@ struct security_class_mapping secclass_map[] = { { "netlink_route_socket", { COMMON_SOCK_PERMS, "nlmsg_read", "nlmsg_write", NULL } }, - { "netlink_firewall_socket", - { COMMON_SOCK_PERMS, - "nlmsg_read", "nlmsg_write", NULL } }, { "netlink_tcpdiag_socket", { COMMON_SOCK_PERMS, "nlmsg_read", "nlmsg_write", NULL } }, @@ -120,19 +111,32 @@ struct security_class_mapping secclass_map[] = { "nlmsg_read", "nlmsg_write", NULL } }, { "netlink_selinux_socket", { COMMON_SOCK_PERMS, NULL } }, + { "netlink_iscsi_socket", + { COMMON_SOCK_PERMS, NULL } }, { "netlink_audit_socket", { COMMON_SOCK_PERMS, "nlmsg_read", "nlmsg_write", "nlmsg_relay", "nlmsg_readpriv", "nlmsg_tty_audit", NULL } }, - { "netlink_ip6fw_socket", - { COMMON_SOCK_PERMS, - "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_fib_lookup_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_connector_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_netfilter_socket", + { COMMON_SOCK_PERMS, NULL } }, { "netlink_dnrt_socket", { COMMON_SOCK_PERMS, NULL } }, { "association", { "sendto", "recvfrom", "setcontext", "polmatch", NULL } }, { "netlink_kobject_uevent_socket", { COMMON_SOCK_PERMS, NULL } }, + { "netlink_generic_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_scsitransport_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_rdma_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_crypto_socket", + { COMMON_SOCK_PERMS, NULL } }, { "appletalk_socket", { COMMON_SOCK_PERMS, NULL } }, { "packet", diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index d1e0b239b602..36993ad1c067 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -56,6 +56,7 @@ /* Non-mount related flags */ #define SE_SBINITIALIZED 0x0100 #define SE_SBPROC 0x0200 +#define SE_SBGENFS 0x0400 #define CONTEXT_STR "context=" #define FSCONTEXT_STR "fscontext=" diff --git a/security/smack/smack.h b/security/smack/smack.h index 49eada6266ec..244e035e5a99 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -15,7 +15,7 @@ #include <linux/capability.h> #include <linux/spinlock.h> -#include <linux/security.h> +#include <linux/lsm_hooks.h> #include <linux/in.h> #include <net/netlabel.h> #include <linux/list.h> @@ -138,6 +138,11 @@ struct smk_port_label { struct smack_known *smk_out; /* outgoing label */ }; +struct smack_onlycap { + struct list_head list; + struct smack_known *smk_label; +}; + /* * Mount options */ @@ -249,6 +254,7 @@ int smk_netlbl_mls(int, char *, struct netlbl_lsm_secattr *, int); struct smack_known *smk_import_entry(const char *, int); void smk_insert_entry(struct smack_known *skp); struct smack_known *smk_find_entry(const char *); +int smack_privileged(int cap); /* * Shared data. @@ -257,7 +263,6 @@ extern int smack_enabled; extern int smack_cipso_direct; extern int smack_cipso_mapped; extern struct smack_known *smack_net_ambient; -extern struct smack_known *smack_onlycap; extern struct smack_known *smack_syslog_label; #ifdef CONFIG_SECURITY_SMACK_BRINGUP extern struct smack_known *smack_unconfined; @@ -276,7 +281,8 @@ extern struct mutex smack_known_lock; extern struct list_head smack_known_list; extern struct list_head smk_netlbladdr_list; -extern struct security_operations smack_ops; +extern struct mutex smack_onlycap_lock; +extern struct list_head smack_onlycap_list; #define SMACK_HASH_SLOTS 16 extern struct hlist_head smack_known_hash[SMACK_HASH_SLOTS]; @@ -334,21 +340,6 @@ static inline struct smack_known *smk_of_current(void) } /* - * Is the task privileged and allowed to be privileged - * by the onlycap rule. - */ -static inline int smack_privileged(int cap) -{ - struct smack_known *skp = smk_of_current(); - - if (!capable(cap)) - return 0; - if (smack_onlycap == NULL || smack_onlycap == skp) - return 1; - return 0; -} - -/* * logging functions */ #define SMACK_AUDIT_DENIED 0x1 diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 0f410fc56e33..00f6b38bffbd 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -425,7 +425,7 @@ void smk_insert_entry(struct smack_known *skp) * @string: a text string that might be a Smack label * * Returns a pointer to the entry in the label list that - * matches the passed string. + * matches the passed string or NULL if not found. */ struct smack_known *smk_find_entry(const char *string) { @@ -448,7 +448,7 @@ struct smack_known *smk_find_entry(const char *string) * @string: a text string that might contain a Smack label * @len: the maximum size, or zero if it is NULL terminated. * - * Returns a pointer to the clean label, or NULL + * Returns a pointer to the clean label or an error code. */ char *smk_parse_smack(const char *string, int len) { @@ -464,7 +464,7 @@ char *smk_parse_smack(const char *string, int len) * including /smack/cipso and /smack/cipso2 */ if (string[0] == '-') - return NULL; + return ERR_PTR(-EINVAL); for (i = 0; i < len; i++) if (string[i] > '~' || string[i] <= ' ' || string[i] == '/' || @@ -472,11 +472,13 @@ char *smk_parse_smack(const char *string, int len) break; if (i == 0 || i >= SMK_LONGLABEL) - return NULL; + return ERR_PTR(-EINVAL); smack = kzalloc(i + 1, GFP_KERNEL); - if (smack != NULL) - strncpy(smack, string, i); + if (smack == NULL) + return ERR_PTR(-ENOMEM); + + strncpy(smack, string, i); return smack; } @@ -523,7 +525,8 @@ int smk_netlbl_mls(int level, char *catset, struct netlbl_lsm_secattr *sap, * @len: the maximum size, or zero if it is NULL terminated. * * Returns a pointer to the entry in the label list that - * matches the passed string, adding it if necessary. + * matches the passed string, adding it if necessary, + * or an error code. */ struct smack_known *smk_import_entry(const char *string, int len) { @@ -533,8 +536,8 @@ struct smack_known *smk_import_entry(const char *string, int len) int rc; smack = smk_parse_smack(string, len); - if (smack == NULL) - return NULL; + if (IS_ERR(smack)) + return ERR_CAST(smack); mutex_lock(&smack_known_lock); @@ -543,8 +546,10 @@ struct smack_known *smk_import_entry(const char *string, int len) goto freeout; skp = kzalloc(sizeof(*skp), GFP_KERNEL); - if (skp == NULL) + if (skp == NULL) { + skp = ERR_PTR(-ENOMEM); goto freeout; + } skp->smk_known = smack; skp->smk_secid = smack_next_secid++; @@ -577,7 +582,7 @@ struct smack_known *smk_import_entry(const char *string, int len) * smk_netlbl_mls failed. */ kfree(skp); - skp = NULL; + skp = ERR_PTR(rc); freeout: kfree(smack); unlockout: @@ -612,3 +617,44 @@ struct smack_known *smack_from_secid(const u32 secid) rcu_read_unlock(); return &smack_known_invalid; } + +/* + * Unless a process is running with one of these labels + * even having CAP_MAC_OVERRIDE isn't enough to grant + * privilege to violate MAC policy. If no labels are + * designated (the empty list case) capabilities apply to + * everyone. + */ +LIST_HEAD(smack_onlycap_list); +DEFINE_MUTEX(smack_onlycap_lock); + +/* + * Is the task privileged and allowed to be privileged + * by the onlycap rule. + * + * Returns 1 if the task is allowed to be privileged, 0 if it's not. + */ +int smack_privileged(int cap) +{ + struct smack_known *skp = smk_of_current(); + struct smack_onlycap *sop; + + if (!capable(cap)) + return 0; + + rcu_read_lock(); + if (list_empty(&smack_onlycap_list)) { + rcu_read_unlock(); + return 1; + } + + list_for_each_entry_rcu(sop, &smack_onlycap_list, list) { + if (sop->smk_label == skp) { + rcu_read_unlock(); + return 1; + } + } + rcu_read_unlock(); + + return 0; +} diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index b644757886bc..a143328f75eb 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -245,8 +245,8 @@ static int smk_bu_credfile(const struct cred *cred, struct file *file, * @ip: a pointer to the inode * @dp: a pointer to the dentry * - * Returns a pointer to the master list entry for the Smack label - * or NULL if there was no label to fetch. + * Returns a pointer to the master list entry for the Smack label, + * NULL if there was no label to fetch, or an error code. */ static struct smack_known *smk_fetch(const char *name, struct inode *ip, struct dentry *dp) @@ -256,14 +256,18 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip, struct smack_known *skp = NULL; if (ip->i_op->getxattr == NULL) - return NULL; + return ERR_PTR(-EOPNOTSUPP); buffer = kzalloc(SMK_LONGLABEL, GFP_KERNEL); if (buffer == NULL) - return NULL; + return ERR_PTR(-ENOMEM); rc = ip->i_op->getxattr(dp, name, buffer, SMK_LONGLABEL); - if (rc > 0) + if (rc < 0) + skp = ERR_PTR(rc); + else if (rc == 0) + skp = NULL; + else skp = smk_import_entry(buffer, rc); kfree(buffer); @@ -436,17 +440,11 @@ static int smk_ptrace_rule_check(struct task_struct *tracer, */ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode) { - int rc; struct smack_known *skp; - rc = cap_ptrace_access_check(ctp, mode); - if (rc != 0) - return rc; - skp = smk_of_task_struct(ctp); - rc = smk_ptrace_rule_check(current, skp, mode, __func__); - return rc; + return smk_ptrace_rule_check(current, skp, mode, __func__); } /** @@ -462,10 +460,6 @@ static int smack_ptrace_traceme(struct task_struct *ptp) int rc; struct smack_known *skp; - rc = cap_ptrace_traceme(ptp); - if (rc != 0) - return rc; - skp = smk_of_task(current_security()); rc = smk_ptrace_rule_check(ptp, skp, PTRACE_MODE_ATTACH, __func__); @@ -615,40 +609,44 @@ static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data) if (strncmp(op, SMK_FSHAT, strlen(SMK_FSHAT)) == 0) { op += strlen(SMK_FSHAT); skp = smk_import_entry(op, 0); - if (skp != NULL) { - sp->smk_hat = skp; - specified = 1; - } + if (IS_ERR(skp)) + return PTR_ERR(skp); + sp->smk_hat = skp; + specified = 1; + } else if (strncmp(op, SMK_FSFLOOR, strlen(SMK_FSFLOOR)) == 0) { op += strlen(SMK_FSFLOOR); skp = smk_import_entry(op, 0); - if (skp != NULL) { - sp->smk_floor = skp; - specified = 1; - } + if (IS_ERR(skp)) + return PTR_ERR(skp); + sp->smk_floor = skp; + specified = 1; + } else if (strncmp(op, SMK_FSDEFAULT, strlen(SMK_FSDEFAULT)) == 0) { op += strlen(SMK_FSDEFAULT); skp = smk_import_entry(op, 0); - if (skp != NULL) { - sp->smk_default = skp; - specified = 1; - } + if (IS_ERR(skp)) + return PTR_ERR(skp); + sp->smk_default = skp; + specified = 1; + } else if (strncmp(op, SMK_FSROOT, strlen(SMK_FSROOT)) == 0) { op += strlen(SMK_FSROOT); skp = smk_import_entry(op, 0); - if (skp != NULL) { - sp->smk_root = skp; - specified = 1; - } + if (IS_ERR(skp)) + return PTR_ERR(skp); + sp->smk_root = skp; + specified = 1; + } else if (strncmp(op, SMK_FSTRANS, strlen(SMK_FSTRANS)) == 0) { op += strlen(SMK_FSTRANS); skp = smk_import_entry(op, 0); - if (skp != NULL) { - sp->smk_root = skp; - transmute = 1; - specified = 1; - } + if (IS_ERR(skp)) + return PTR_ERR(skp); + sp->smk_root = skp; + transmute = 1; + specified = 1; } } @@ -721,10 +719,6 @@ static int smack_bprm_set_creds(struct linux_binprm *bprm) struct inode_smack *isp; int rc; - rc = cap_bprm_set_creds(bprm); - if (rc != 0) - return rc; - if (bprm->cred_prepared) return 0; @@ -779,12 +773,11 @@ static void smack_bprm_committing_creds(struct linux_binprm *bprm) static int smack_bprm_secureexec(struct linux_binprm *bprm) { struct task_smack *tsp = current_security(); - int ret = cap_bprm_secureexec(bprm); - if (!ret && (tsp->smk_task != tsp->smk_forked)) - ret = 1; + if (tsp->smk_task != tsp->smk_forked) + return 1; - return ret; + return 0; } /* @@ -1133,7 +1126,9 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name, if (rc == 0 && check_import) { skp = size ? smk_import_entry(value, size) : NULL; - if (skp == NULL || (check_star && + if (IS_ERR(skp)) + rc = PTR_ERR(skp); + else if (skp == NULL || (check_star && (skp == &smack_known_star || skp == &smack_known_web))) rc = -EINVAL; } @@ -1173,19 +1168,19 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name, if (strcmp(name, XATTR_NAME_SMACK) == 0) { skp = smk_import_entry(value, size); - if (skp != NULL) + if (!IS_ERR(skp)) isp->smk_inode = skp; else isp->smk_inode = &smack_known_invalid; } else if (strcmp(name, XATTR_NAME_SMACKEXEC) == 0) { skp = smk_import_entry(value, size); - if (skp != NULL) + if (!IS_ERR(skp)) isp->smk_task = skp; else isp->smk_task = &smack_known_invalid; } else if (strcmp(name, XATTR_NAME_SMACKMMAP) == 0) { skp = smk_import_entry(value, size); - if (skp != NULL) + if (!IS_ERR(skp)) isp->smk_mmap = skp; else isp->smk_mmap = &smack_known_invalid; @@ -1673,6 +1668,9 @@ static int smack_file_receive(struct file *file) struct smk_audit_info ad; struct inode *inode = file_inode(file); + if (unlikely(IS_PRIVATE(inode))) + return 0; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); /* @@ -1934,12 +1932,7 @@ static void smack_task_getsecid(struct task_struct *p, u32 *secid) */ static int smack_task_setnice(struct task_struct *p, int nice) { - int rc; - - rc = cap_task_setnice(p, nice); - if (rc == 0) - rc = smk_curacc_on_task(p, MAY_WRITE, __func__); - return rc; + return smk_curacc_on_task(p, MAY_WRITE, __func__); } /** @@ -1951,12 +1944,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) */ static int smack_task_setioprio(struct task_struct *p, int ioprio) { - int rc; - - rc = cap_task_setioprio(p, ioprio); - if (rc == 0) - rc = smk_curacc_on_task(p, MAY_WRITE, __func__); - return rc; + return smk_curacc_on_task(p, MAY_WRITE, __func__); } /** @@ -1980,12 +1968,7 @@ static int smack_task_getioprio(struct task_struct *p) */ static int smack_task_setscheduler(struct task_struct *p) { - int rc; - - rc = cap_task_setscheduler(p); - if (rc == 0) - rc = smk_curacc_on_task(p, MAY_WRITE, __func__); - return rc; + return smk_curacc_on_task(p, MAY_WRITE, __func__); } /** @@ -2430,8 +2413,8 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, return -EINVAL; skp = smk_import_entry(value, size); - if (skp == NULL) - return -EINVAL; + if (IS_ERR(skp)) + return PTR_ERR(skp); if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) { nsp->smk_inode = skp; @@ -3204,7 +3187,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) */ dp = dget(opt_dentry); skp = smk_fetch(XATTR_NAME_SMACK, inode, dp); - if (skp != NULL) + if (!IS_ERR_OR_NULL(skp)) final = skp; /* @@ -3241,11 +3224,14 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) * Don't let the exec or mmap label be "*" or "@". */ skp = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp); - if (skp == &smack_known_star || skp == &smack_known_web) + if (IS_ERR(skp) || skp == &smack_known_star || + skp == &smack_known_web) skp = NULL; isp->smk_task = skp; + skp = smk_fetch(XATTR_NAME_SMACKMMAP, inode, dp); - if (skp == &smack_known_star || skp == &smack_known_web) + if (IS_ERR(skp) || skp == &smack_known_star || + skp == &smack_known_web) skp = NULL; isp->smk_mmap = skp; @@ -3329,8 +3315,8 @@ static int smack_setprocattr(struct task_struct *p, char *name, return -EINVAL; skp = smk_import_entry(value, size); - if (skp == NULL) - return -EINVAL; + if (IS_ERR(skp)) + return PTR_ERR(skp); /* * No process is ever allowed the web ("@") label. @@ -4105,8 +4091,10 @@ static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) return -EINVAL; skp = smk_import_entry(rulestr, 0); - if (skp) - *rule = skp->smk_known; + if (IS_ERR(skp)) + return PTR_ERR(skp); + + *rule = skp->smk_known; return 0; } @@ -4266,147 +4254,145 @@ static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) return 0; } -struct security_operations smack_ops = { - .name = "smack", - - .ptrace_access_check = smack_ptrace_access_check, - .ptrace_traceme = smack_ptrace_traceme, - .syslog = smack_syslog, - - .sb_alloc_security = smack_sb_alloc_security, - .sb_free_security = smack_sb_free_security, - .sb_copy_data = smack_sb_copy_data, - .sb_kern_mount = smack_sb_kern_mount, - .sb_statfs = smack_sb_statfs, - - .bprm_set_creds = smack_bprm_set_creds, - .bprm_committing_creds = smack_bprm_committing_creds, - .bprm_secureexec = smack_bprm_secureexec, - - .inode_alloc_security = smack_inode_alloc_security, - .inode_free_security = smack_inode_free_security, - .inode_init_security = smack_inode_init_security, - .inode_link = smack_inode_link, - .inode_unlink = smack_inode_unlink, - .inode_rmdir = smack_inode_rmdir, - .inode_rename = smack_inode_rename, - .inode_permission = smack_inode_permission, - .inode_setattr = smack_inode_setattr, - .inode_getattr = smack_inode_getattr, - .inode_setxattr = smack_inode_setxattr, - .inode_post_setxattr = smack_inode_post_setxattr, - .inode_getxattr = smack_inode_getxattr, - .inode_removexattr = smack_inode_removexattr, - .inode_getsecurity = smack_inode_getsecurity, - .inode_setsecurity = smack_inode_setsecurity, - .inode_listsecurity = smack_inode_listsecurity, - .inode_getsecid = smack_inode_getsecid, - - .file_permission = smack_file_permission, - .file_alloc_security = smack_file_alloc_security, - .file_free_security = smack_file_free_security, - .file_ioctl = smack_file_ioctl, - .file_lock = smack_file_lock, - .file_fcntl = smack_file_fcntl, - .mmap_file = smack_mmap_file, - .mmap_addr = cap_mmap_addr, - .file_set_fowner = smack_file_set_fowner, - .file_send_sigiotask = smack_file_send_sigiotask, - .file_receive = smack_file_receive, - - .file_open = smack_file_open, - - .cred_alloc_blank = smack_cred_alloc_blank, - .cred_free = smack_cred_free, - .cred_prepare = smack_cred_prepare, - .cred_transfer = smack_cred_transfer, - .kernel_act_as = smack_kernel_act_as, - .kernel_create_files_as = smack_kernel_create_files_as, - .task_setpgid = smack_task_setpgid, - .task_getpgid = smack_task_getpgid, - .task_getsid = smack_task_getsid, - .task_getsecid = smack_task_getsecid, - .task_setnice = smack_task_setnice, - .task_setioprio = smack_task_setioprio, - .task_getioprio = smack_task_getioprio, - .task_setscheduler = smack_task_setscheduler, - .task_getscheduler = smack_task_getscheduler, - .task_movememory = smack_task_movememory, - .task_kill = smack_task_kill, - .task_wait = smack_task_wait, - .task_to_inode = smack_task_to_inode, - - .ipc_permission = smack_ipc_permission, - .ipc_getsecid = smack_ipc_getsecid, - - .msg_msg_alloc_security = smack_msg_msg_alloc_security, - .msg_msg_free_security = smack_msg_msg_free_security, - - .msg_queue_alloc_security = smack_msg_queue_alloc_security, - .msg_queue_free_security = smack_msg_queue_free_security, - .msg_queue_associate = smack_msg_queue_associate, - .msg_queue_msgctl = smack_msg_queue_msgctl, - .msg_queue_msgsnd = smack_msg_queue_msgsnd, - .msg_queue_msgrcv = smack_msg_queue_msgrcv, - - .shm_alloc_security = smack_shm_alloc_security, - .shm_free_security = smack_shm_free_security, - .shm_associate = smack_shm_associate, - .shm_shmctl = smack_shm_shmctl, - .shm_shmat = smack_shm_shmat, - - .sem_alloc_security = smack_sem_alloc_security, - .sem_free_security = smack_sem_free_security, - .sem_associate = smack_sem_associate, - .sem_semctl = smack_sem_semctl, - .sem_semop = smack_sem_semop, - - .d_instantiate = smack_d_instantiate, - - .getprocattr = smack_getprocattr, - .setprocattr = smack_setprocattr, - - .unix_stream_connect = smack_unix_stream_connect, - .unix_may_send = smack_unix_may_send, - - .socket_post_create = smack_socket_post_create, +struct security_hook_list smack_hooks[] = { + LSM_HOOK_INIT(ptrace_access_check, smack_ptrace_access_check), + LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme), + LSM_HOOK_INIT(syslog, smack_syslog), + + LSM_HOOK_INIT(sb_alloc_security, smack_sb_alloc_security), + LSM_HOOK_INIT(sb_free_security, smack_sb_free_security), + LSM_HOOK_INIT(sb_copy_data, smack_sb_copy_data), + LSM_HOOK_INIT(sb_kern_mount, smack_sb_kern_mount), + LSM_HOOK_INIT(sb_statfs, smack_sb_statfs), + + LSM_HOOK_INIT(bprm_set_creds, smack_bprm_set_creds), + LSM_HOOK_INIT(bprm_committing_creds, smack_bprm_committing_creds), + LSM_HOOK_INIT(bprm_secureexec, smack_bprm_secureexec), + + LSM_HOOK_INIT(inode_alloc_security, smack_inode_alloc_security), + LSM_HOOK_INIT(inode_free_security, smack_inode_free_security), + LSM_HOOK_INIT(inode_init_security, smack_inode_init_security), + LSM_HOOK_INIT(inode_link, smack_inode_link), + LSM_HOOK_INIT(inode_unlink, smack_inode_unlink), + LSM_HOOK_INIT(inode_rmdir, smack_inode_rmdir), + LSM_HOOK_INIT(inode_rename, smack_inode_rename), + LSM_HOOK_INIT(inode_permission, smack_inode_permission), + LSM_HOOK_INIT(inode_setattr, smack_inode_setattr), + LSM_HOOK_INIT(inode_getattr, smack_inode_getattr), + LSM_HOOK_INIT(inode_setxattr, smack_inode_setxattr), + LSM_HOOK_INIT(inode_post_setxattr, smack_inode_post_setxattr), + LSM_HOOK_INIT(inode_getxattr, smack_inode_getxattr), + LSM_HOOK_INIT(inode_removexattr, smack_inode_removexattr), + LSM_HOOK_INIT(inode_getsecurity, smack_inode_getsecurity), + LSM_HOOK_INIT(inode_setsecurity, smack_inode_setsecurity), + LSM_HOOK_INIT(inode_listsecurity, smack_inode_listsecurity), + LSM_HOOK_INIT(inode_getsecid, smack_inode_getsecid), + + LSM_HOOK_INIT(file_permission, smack_file_permission), + LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security), + LSM_HOOK_INIT(file_free_security, smack_file_free_security), + LSM_HOOK_INIT(file_ioctl, smack_file_ioctl), + LSM_HOOK_INIT(file_lock, smack_file_lock), + LSM_HOOK_INIT(file_fcntl, smack_file_fcntl), + LSM_HOOK_INIT(mmap_file, smack_mmap_file), + LSM_HOOK_INIT(mmap_addr, cap_mmap_addr), + LSM_HOOK_INIT(file_set_fowner, smack_file_set_fowner), + LSM_HOOK_INIT(file_send_sigiotask, smack_file_send_sigiotask), + LSM_HOOK_INIT(file_receive, smack_file_receive), + + LSM_HOOK_INIT(file_open, smack_file_open), + + LSM_HOOK_INIT(cred_alloc_blank, smack_cred_alloc_blank), + LSM_HOOK_INIT(cred_free, smack_cred_free), + LSM_HOOK_INIT(cred_prepare, smack_cred_prepare), + LSM_HOOK_INIT(cred_transfer, smack_cred_transfer), + LSM_HOOK_INIT(kernel_act_as, smack_kernel_act_as), + LSM_HOOK_INIT(kernel_create_files_as, smack_kernel_create_files_as), + LSM_HOOK_INIT(task_setpgid, smack_task_setpgid), + LSM_HOOK_INIT(task_getpgid, smack_task_getpgid), + LSM_HOOK_INIT(task_getsid, smack_task_getsid), + LSM_HOOK_INIT(task_getsecid, smack_task_getsecid), + LSM_HOOK_INIT(task_setnice, smack_task_setnice), + LSM_HOOK_INIT(task_setioprio, smack_task_setioprio), + LSM_HOOK_INIT(task_getioprio, smack_task_getioprio), + LSM_HOOK_INIT(task_setscheduler, smack_task_setscheduler), + LSM_HOOK_INIT(task_getscheduler, smack_task_getscheduler), + LSM_HOOK_INIT(task_movememory, smack_task_movememory), + LSM_HOOK_INIT(task_kill, smack_task_kill), + LSM_HOOK_INIT(task_wait, smack_task_wait), + LSM_HOOK_INIT(task_to_inode, smack_task_to_inode), + + LSM_HOOK_INIT(ipc_permission, smack_ipc_permission), + LSM_HOOK_INIT(ipc_getsecid, smack_ipc_getsecid), + + LSM_HOOK_INIT(msg_msg_alloc_security, smack_msg_msg_alloc_security), + LSM_HOOK_INIT(msg_msg_free_security, smack_msg_msg_free_security), + + LSM_HOOK_INIT(msg_queue_alloc_security, smack_msg_queue_alloc_security), + LSM_HOOK_INIT(msg_queue_free_security, smack_msg_queue_free_security), + LSM_HOOK_INIT(msg_queue_associate, smack_msg_queue_associate), + LSM_HOOK_INIT(msg_queue_msgctl, smack_msg_queue_msgctl), + LSM_HOOK_INIT(msg_queue_msgsnd, smack_msg_queue_msgsnd), + LSM_HOOK_INIT(msg_queue_msgrcv, smack_msg_queue_msgrcv), + + LSM_HOOK_INIT(shm_alloc_security, smack_shm_alloc_security), + LSM_HOOK_INIT(shm_free_security, smack_shm_free_security), + LSM_HOOK_INIT(shm_associate, smack_shm_associate), + LSM_HOOK_INIT(shm_shmctl, smack_shm_shmctl), + LSM_HOOK_INIT(shm_shmat, smack_shm_shmat), + + LSM_HOOK_INIT(sem_alloc_security, smack_sem_alloc_security), + LSM_HOOK_INIT(sem_free_security, smack_sem_free_security), + LSM_HOOK_INIT(sem_associate, smack_sem_associate), + LSM_HOOK_INIT(sem_semctl, smack_sem_semctl), + LSM_HOOK_INIT(sem_semop, smack_sem_semop), + + LSM_HOOK_INIT(d_instantiate, smack_d_instantiate), + + LSM_HOOK_INIT(getprocattr, smack_getprocattr), + LSM_HOOK_INIT(setprocattr, smack_setprocattr), + + LSM_HOOK_INIT(unix_stream_connect, smack_unix_stream_connect), + LSM_HOOK_INIT(unix_may_send, smack_unix_may_send), + + LSM_HOOK_INIT(socket_post_create, smack_socket_post_create), #ifndef CONFIG_SECURITY_SMACK_NETFILTER - .socket_bind = smack_socket_bind, + LSM_HOOK_INIT(socket_bind, smack_socket_bind), #endif /* CONFIG_SECURITY_SMACK_NETFILTER */ - .socket_connect = smack_socket_connect, - .socket_sendmsg = smack_socket_sendmsg, - .socket_sock_rcv_skb = smack_socket_sock_rcv_skb, - .socket_getpeersec_stream = smack_socket_getpeersec_stream, - .socket_getpeersec_dgram = smack_socket_getpeersec_dgram, - .sk_alloc_security = smack_sk_alloc_security, - .sk_free_security = smack_sk_free_security, - .sock_graft = smack_sock_graft, - .inet_conn_request = smack_inet_conn_request, - .inet_csk_clone = smack_inet_csk_clone, + LSM_HOOK_INIT(socket_connect, smack_socket_connect), + LSM_HOOK_INIT(socket_sendmsg, smack_socket_sendmsg), + LSM_HOOK_INIT(socket_sock_rcv_skb, smack_socket_sock_rcv_skb), + LSM_HOOK_INIT(socket_getpeersec_stream, smack_socket_getpeersec_stream), + LSM_HOOK_INIT(socket_getpeersec_dgram, smack_socket_getpeersec_dgram), + LSM_HOOK_INIT(sk_alloc_security, smack_sk_alloc_security), + LSM_HOOK_INIT(sk_free_security, smack_sk_free_security), + LSM_HOOK_INIT(sock_graft, smack_sock_graft), + LSM_HOOK_INIT(inet_conn_request, smack_inet_conn_request), + LSM_HOOK_INIT(inet_csk_clone, smack_inet_csk_clone), /* key management security hooks */ #ifdef CONFIG_KEYS - .key_alloc = smack_key_alloc, - .key_free = smack_key_free, - .key_permission = smack_key_permission, - .key_getsecurity = smack_key_getsecurity, + LSM_HOOK_INIT(key_alloc, smack_key_alloc), + LSM_HOOK_INIT(key_free, smack_key_free), + LSM_HOOK_INIT(key_permission, smack_key_permission), + LSM_HOOK_INIT(key_getsecurity, smack_key_getsecurity), #endif /* CONFIG_KEYS */ /* Audit hooks */ #ifdef CONFIG_AUDIT - .audit_rule_init = smack_audit_rule_init, - .audit_rule_known = smack_audit_rule_known, - .audit_rule_match = smack_audit_rule_match, - .audit_rule_free = smack_audit_rule_free, + LSM_HOOK_INIT(audit_rule_init, smack_audit_rule_init), + LSM_HOOK_INIT(audit_rule_known, smack_audit_rule_known), + LSM_HOOK_INIT(audit_rule_match, smack_audit_rule_match), + LSM_HOOK_INIT(audit_rule_free, smack_audit_rule_free), #endif /* CONFIG_AUDIT */ - .ismaclabel = smack_ismaclabel, - .secid_to_secctx = smack_secid_to_secctx, - .secctx_to_secid = smack_secctx_to_secid, - .release_secctx = smack_release_secctx, - .inode_notifysecctx = smack_inode_notifysecctx, - .inode_setsecctx = smack_inode_setsecctx, - .inode_getsecctx = smack_inode_getsecctx, + LSM_HOOK_INIT(ismaclabel, smack_ismaclabel), + LSM_HOOK_INIT(secid_to_secctx, smack_secid_to_secctx), + LSM_HOOK_INIT(secctx_to_secid, smack_secctx_to_secid), + LSM_HOOK_INIT(release_secctx, smack_release_secctx), + LSM_HOOK_INIT(inode_notifysecctx, smack_inode_notifysecctx), + LSM_HOOK_INIT(inode_setsecctx, smack_inode_setsecctx), + LSM_HOOK_INIT(inode_getsecctx, smack_inode_getsecctx), }; @@ -4451,7 +4437,7 @@ static __init int smack_init(void) struct cred *cred; struct task_smack *tsp; - if (!security_module_enable(&smack_ops)) + if (!security_module_enable("smack")) return 0; smack_enabled = 1; @@ -4481,8 +4467,7 @@ static __init int smack_init(void) /* * Register with LSM */ - if (register_security(&smack_ops)) - panic("smack: Unable to register with kernel.\n"); + security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks)); return 0; } diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index d9682985349e..5e0a64ebdf23 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -87,16 +87,6 @@ int smack_cipso_direct = SMACK_CIPSO_DIRECT_DEFAULT; */ int smack_cipso_mapped = SMACK_CIPSO_MAPPED_DEFAULT; -/* - * Unless a process is running with this label even - * having CAP_MAC_OVERRIDE isn't enough to grant - * privilege to violate MAC policy. If no label is - * designated (the NULL case) capabilities apply to - * everyone. It is expected that the hat (^) label - * will be used if any label is used. - */ -struct smack_known *smack_onlycap; - #ifdef CONFIG_SECURITY_SMACK_BRINGUP /* * Allow one label to be unconfined. This is for @@ -338,8 +328,7 @@ static int smk_perm_from_str(const char *string) * @import: if non-zero, import labels * @len: label length limit * - * Returns 0 on success, -EINVAL on failure and -ENOENT when either subject - * or object is missing. + * Returns 0 on success, appropriate error code on failure. */ static int smk_fill_rule(const char *subject, const char *object, const char *access1, const char *access2, @@ -351,16 +340,16 @@ static int smk_fill_rule(const char *subject, const char *object, if (import) { rule->smk_subject = smk_import_entry(subject, len); - if (rule->smk_subject == NULL) - return -EINVAL; + if (IS_ERR(rule->smk_subject)) + return PTR_ERR(rule->smk_subject); rule->smk_object = smk_import_entry(object, len); - if (rule->smk_object == NULL) - return -EINVAL; + if (IS_ERR(rule->smk_object)) + return PTR_ERR(rule->smk_object); } else { cp = smk_parse_smack(subject, len); - if (cp == NULL) - return -EINVAL; + if (IS_ERR(cp)) + return PTR_ERR(cp); skp = smk_find_entry(cp); kfree(cp); if (skp == NULL) @@ -368,8 +357,8 @@ static int smk_fill_rule(const char *subject, const char *object, rule->smk_subject = skp; cp = smk_parse_smack(object, len); - if (cp == NULL) - return -EINVAL; + if (IS_ERR(cp)) + return PTR_ERR(cp); skp = smk_find_entry(cp); kfree(cp); if (skp == NULL) @@ -412,7 +401,7 @@ static int smk_parse_rule(const char *data, struct smack_parsed_rule *rule, * @import: if non-zero, import labels * @tokens: numer of substrings expected in data * - * Returns number of processed bytes on success, -1 on failure. + * Returns number of processed bytes on success, -ERRNO on failure. */ static ssize_t smk_parse_long_rule(char *data, struct smack_parsed_rule *rule, int import, int tokens) @@ -431,7 +420,7 @@ static ssize_t smk_parse_long_rule(char *data, struct smack_parsed_rule *rule, if (data[cnt] == '\0') /* Unexpected end of data */ - return -1; + return -EINVAL; tok[i] = data + cnt; @@ -529,14 +518,14 @@ static ssize_t smk_write_rules_list(struct file *file, const char __user *buf, while (cnt < count) { if (format == SMK_FIXED24_FMT) { rc = smk_parse_rule(data, &rule, 1); - if (rc != 0) { - rc = -EINVAL; + if (rc < 0) goto out; - } cnt = count; } else { rc = smk_parse_long_rule(data + cnt, &rule, 1, tokens); - if (rc <= 0) { + if (rc < 0) + goto out; + if (rc == 0) { rc = -EINVAL; goto out; } @@ -567,23 +556,17 @@ static void *smk_seq_start(struct seq_file *s, loff_t *pos, struct list_head *head) { struct list_head *list; + int i = *pos; + + rcu_read_lock(); + for (list = rcu_dereference(list_next_rcu(head)); + list != head; + list = rcu_dereference(list_next_rcu(list))) { + if (i-- == 0) + return list; + } - /* - * This is 0 the first time through. - */ - if (s->index == 0) - s->private = head; - - if (s->private == NULL) - return NULL; - - list = s->private; - if (list_empty(list)) - return NULL; - - if (s->index == 0) - return list->next; - return list; + return NULL; } static void *smk_seq_next(struct seq_file *s, void *v, loff_t *pos, @@ -591,17 +574,15 @@ static void *smk_seq_next(struct seq_file *s, void *v, loff_t *pos, { struct list_head *list = v; - if (list_is_last(list, head)) { - s->private = NULL; - return NULL; - } - s->private = list->next; - return list->next; + ++*pos; + list = rcu_dereference(list_next_rcu(list)); + + return (list == head) ? NULL : list; } static void smk_seq_stop(struct seq_file *s, void *v) { - /* No-op */ + rcu_read_unlock(); } static void smk_rule_show(struct seq_file *s, struct smack_rule *srp, int max) @@ -661,7 +642,7 @@ static int load_seq_show(struct seq_file *s, void *v) { struct list_head *list = v; struct smack_master_list *smlp = - list_entry(list, struct smack_master_list, list); + list_entry_rcu(list, struct smack_master_list, list); smk_rule_show(s, smlp->smk_rule, SMK_LABELLEN); @@ -809,7 +790,7 @@ static int cipso_seq_show(struct seq_file *s, void *v) { struct list_head *list = v; struct smack_known *skp = - list_entry(list, struct smack_known, list); + list_entry_rcu(list, struct smack_known, list); struct netlbl_lsm_catmap *cmp = skp->smk_netlabel.attr.mls.cat; char sep = '/'; int i; @@ -915,8 +896,10 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf, mutex_lock(&smack_cipso_lock); skp = smk_import_entry(rule, 0); - if (skp == NULL) + if (IS_ERR(skp)) { + rc = PTR_ERR(skp); goto out; + } if (format == SMK_FIXED24_FMT) rule += SMK_LABELLEN; @@ -998,7 +981,7 @@ static int cipso2_seq_show(struct seq_file *s, void *v) { struct list_head *list = v; struct smack_known *skp = - list_entry(list, struct smack_known, list); + list_entry_rcu(list, struct smack_known, list); struct netlbl_lsm_catmap *cmp = skp->smk_netlabel.attr.mls.cat; char sep = '/'; int i; @@ -1082,7 +1065,7 @@ static int netlbladdr_seq_show(struct seq_file *s, void *v) { struct list_head *list = v; struct smk_netlbladdr *skp = - list_entry(list, struct smk_netlbladdr, list); + list_entry_rcu(list, struct smk_netlbladdr, list); unsigned char *hp = (char *) &skp->smk_host.sin_addr.s_addr; int maskn; u32 temp_mask = be32_to_cpu(skp->smk_mask.s_addr); @@ -1237,8 +1220,8 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf, */ if (smack[0] != '-') { skp = smk_import_entry(smack, 0); - if (skp == NULL) { - rc = -EINVAL; + if (IS_ERR(skp)) { + rc = PTR_ERR(skp); goto free_out; } } else { @@ -1619,8 +1602,8 @@ static ssize_t smk_write_ambient(struct file *file, const char __user *buf, } skp = smk_import_entry(data, count); - if (skp == NULL) { - rc = -EINVAL; + if (IS_ERR(skp)) { + rc = PTR_ERR(skp); goto out; } @@ -1643,34 +1626,79 @@ static const struct file_operations smk_ambient_ops = { .llseek = default_llseek, }; -/** - * smk_read_onlycap - read() for smackfs/onlycap - * @filp: file pointer, not actually used - * @buf: where to put the result - * @cn: maximum to send along - * @ppos: where to start - * - * Returns number of bytes read or error code, as appropriate +/* + * Seq_file operations for /smack/onlycap */ -static ssize_t smk_read_onlycap(struct file *filp, char __user *buf, - size_t cn, loff_t *ppos) +static void *onlycap_seq_start(struct seq_file *s, loff_t *pos) { - char *smack = ""; - ssize_t rc = -EINVAL; - int asize; + return smk_seq_start(s, pos, &smack_onlycap_list); +} - if (*ppos != 0) - return 0; +static void *onlycap_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + return smk_seq_next(s, v, pos, &smack_onlycap_list); +} - if (smack_onlycap != NULL) - smack = smack_onlycap->smk_known; +static int onlycap_seq_show(struct seq_file *s, void *v) +{ + struct list_head *list = v; + struct smack_onlycap *sop = + list_entry_rcu(list, struct smack_onlycap, list); - asize = strlen(smack) + 1; + seq_puts(s, sop->smk_label->smk_known); + seq_putc(s, ' '); - if (cn >= asize) - rc = simple_read_from_buffer(buf, cn, ppos, smack, asize); + return 0; +} - return rc; +static const struct seq_operations onlycap_seq_ops = { + .start = onlycap_seq_start, + .next = onlycap_seq_next, + .show = onlycap_seq_show, + .stop = smk_seq_stop, +}; + +static int smk_open_onlycap(struct inode *inode, struct file *file) +{ + return seq_open(file, &onlycap_seq_ops); +} + +/** + * smk_list_swap_rcu - swap public list with a private one in RCU-safe way + * The caller must hold appropriate mutex to prevent concurrent modifications + * to the public list. + * Private list is assumed to be not accessible to other threads yet. + * + * @public: public list + * @private: private list + */ +static void smk_list_swap_rcu(struct list_head *public, + struct list_head *private) +{ + struct list_head *first, *last; + + if (list_empty(public)) { + list_splice_init_rcu(private, public, synchronize_rcu); + } else { + /* Remember public list before replacing it */ + first = public->next; + last = public->prev; + + /* Publish private list in place of public in RCU-safe way */ + private->prev->next = public; + private->next->prev = public; + rcu_assign_pointer(public->next, private->next); + public->prev = private->prev; + + synchronize_rcu(); + + /* When all readers are done with the old public list, + * attach it in place of private */ + private->next = first; + private->prev = last; + first->prev = private; + last->next = private; + } } /** @@ -1686,47 +1714,79 @@ static ssize_t smk_write_onlycap(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char *data; - struct smack_known *skp = smk_of_task(current->cred->security); + char *data_parse; + char *tok; + struct smack_known *skp; + struct smack_onlycap *sop; + struct smack_onlycap *sop2; + LIST_HEAD(list_tmp); int rc = count; if (!smack_privileged(CAP_MAC_ADMIN)) return -EPERM; - /* - * This can be done using smk_access() but is done - * explicitly for clarity. The smk_access() implementation - * would use smk_access(smack_onlycap, MAY_WRITE) - */ - if (smack_onlycap != NULL && smack_onlycap != skp) - return -EPERM; - data = kzalloc(count + 1, GFP_KERNEL); if (data == NULL) return -ENOMEM; + if (copy_from_user(data, buf, count) != 0) { + kfree(data); + return -EFAULT; + } + + data_parse = data; + while ((tok = strsep(&data_parse, " ")) != NULL) { + if (!*tok) + continue; + + skp = smk_import_entry(tok, 0); + if (IS_ERR(skp)) { + rc = PTR_ERR(skp); + break; + } + + sop = kzalloc(sizeof(*sop), GFP_KERNEL); + if (sop == NULL) { + rc = -ENOMEM; + break; + } + + sop->smk_label = skp; + list_add_rcu(&sop->list, &list_tmp); + } + kfree(data); + /* - * Should the null string be passed in unset the onlycap value. - * This seems like something to be careful with as usually - * smk_import only expects to return NULL for errors. It - * is usually the case that a nullstring or "\n" would be - * bad to pass to smk_import but in fact this is useful here. + * Clear the smack_onlycap on invalid label errors. This means + * that we can pass a null string to unset the onlycap value. * - * smk_import will also reject a label beginning with '-', + * Importing will also reject a label beginning with '-', * so "-usecapabilities" will also work. + * + * But do so only on invalid label, not on system errors. + * The invalid label must be first to count as clearing attempt. */ - if (copy_from_user(data, buf, count) != 0) - rc = -EFAULT; - else - smack_onlycap = smk_import_entry(data, count); + if (rc == -EINVAL && list_empty(&list_tmp)) + rc = count; + + if (rc >= 0) { + mutex_lock(&smack_onlycap_lock); + smk_list_swap_rcu(&smack_onlycap_list, &list_tmp); + mutex_unlock(&smack_onlycap_lock); + } + + list_for_each_entry_safe(sop, sop2, &list_tmp, list) + kfree(sop); - kfree(data); return rc; } static const struct file_operations smk_onlycap_ops = { - .read = smk_read_onlycap, + .open = smk_open_onlycap, + .read = seq_read, .write = smk_write_onlycap, - .llseek = default_llseek, + .llseek = seq_lseek, + .release = seq_release, }; #ifdef CONFIG_SECURITY_SMACK_BRINGUP @@ -1773,6 +1833,7 @@ static ssize_t smk_write_unconfined(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char *data; + struct smack_known *skp; int rc = count; if (!smack_privileged(CAP_MAC_ADMIN)) @@ -1782,21 +1843,31 @@ static ssize_t smk_write_unconfined(struct file *file, const char __user *buf, if (data == NULL) return -ENOMEM; + if (copy_from_user(data, buf, count) != 0) { + rc = -EFAULT; + goto freeout; + } + /* - * Should the null string be passed in unset the unconfined value. - * This seems like something to be careful with as usually - * smk_import only expects to return NULL for errors. It - * is usually the case that a nullstring or "\n" would be - * bad to pass to smk_import but in fact this is useful here. + * Clear the smack_unconfined on invalid label errors. This means + * that we can pass a null string to unset the unconfined value. * - * smk_import will also reject a label beginning with '-', + * Importing will also reject a label beginning with '-', * so "-confine" will also work. + * + * But do so only on invalid label, not on system errors. */ - if (copy_from_user(data, buf, count) != 0) - rc = -EFAULT; - else - smack_unconfined = smk_import_entry(data, count); + skp = smk_import_entry(data, count); + if (PTR_ERR(skp) == -EINVAL) + skp = NULL; + else if (IS_ERR(skp)) { + rc = PTR_ERR(skp); + goto freeout; + } + smack_unconfined = skp; + +freeout: kfree(data); return rc; } @@ -1895,7 +1966,7 @@ static int load_self_seq_show(struct seq_file *s, void *v) { struct list_head *list = v; struct smack_rule *srp = - list_entry(list, struct smack_rule, list); + list_entry_rcu(list, struct smack_rule, list); smk_rule_show(s, srp, SMK_LABELLEN); @@ -1980,7 +2051,7 @@ static ssize_t smk_user_access(struct file *file, const char __user *buf, res = smk_access(rule.smk_subject, rule.smk_object, rule.smk_access1, NULL); else if (res != -ENOENT) - return -EINVAL; + return res; /* * smk_access() can return a value > 0 in the "bringup" case. @@ -2024,7 +2095,7 @@ static int load2_seq_show(struct seq_file *s, void *v) { struct list_head *list = v; struct smack_master_list *smlp = - list_entry(list, struct smack_master_list, list); + list_entry_rcu(list, struct smack_master_list, list); smk_rule_show(s, smlp->smk_rule, SMK_LONGLABEL); @@ -2101,7 +2172,7 @@ static int load_self2_seq_show(struct seq_file *s, void *v) { struct list_head *list = v; struct smack_rule *srp = - list_entry(list, struct smack_rule, list); + list_entry_rcu(list, struct smack_rule, list); smk_rule_show(s, srp, SMK_LONGLABEL); @@ -2182,8 +2253,8 @@ static const struct file_operations smk_access2_ops = { static ssize_t smk_write_revoke_subj(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - char *data = NULL; - const char *cp = NULL; + char *data; + const char *cp; struct smack_known *skp; struct smack_rule *sp; struct list_head *rule_list; @@ -2205,18 +2276,18 @@ static ssize_t smk_write_revoke_subj(struct file *file, const char __user *buf, if (copy_from_user(data, buf, count) != 0) { rc = -EFAULT; - goto free_out; + goto out_data; } cp = smk_parse_smack(data, count); - if (cp == NULL) { - rc = -EINVAL; - goto free_out; + if (IS_ERR(cp)) { + rc = PTR_ERR(cp); + goto out_data; } skp = smk_find_entry(cp); if (skp == NULL) - goto free_out; + goto out_cp; rule_list = &skp->smk_rules; rule_lock = &skp->smk_rules_lock; @@ -2228,9 +2299,11 @@ static ssize_t smk_write_revoke_subj(struct file *file, const char __user *buf, mutex_unlock(rule_lock); -free_out: - kfree(data); +out_cp: kfree(cp); +out_data: + kfree(data); + return rc; } @@ -2341,10 +2414,10 @@ static ssize_t smk_write_syslog(struct file *file, const char __user *buf, rc = -EFAULT; else { skp = smk_import_entry(data, count); - if (skp == NULL) - rc = -EINVAL; + if (IS_ERR(skp)) + rc = PTR_ERR(skp); else - smack_syslog_label = smk_import_entry(data, count); + smack_syslog_label = skp; } kfree(data); @@ -2547,7 +2620,7 @@ static int __init init_smk_fs(void) int err; int rc; - if (!security_module_enable(&smack_ops)) + if (!security_module_enable("smack")) return 0; err = smk_init_sysfs(); diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 57c88d52ffa5..cbf3df422c87 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -4,7 +4,7 @@ * Copyright (C) 2005-2011 NTT DATA CORPORATION */ -#include <linux/security.h> +#include <linux/lsm_hooks.h> #include "common.h" /** @@ -72,12 +72,6 @@ static void tomoyo_cred_free(struct cred *cred) */ static int tomoyo_bprm_set_creds(struct linux_binprm *bprm) { - int rc; - - rc = cap_bprm_set_creds(bprm); - if (rc) - return rc; - /* * Do only if this function is called for the first time of an execve * operation. @@ -502,36 +496,35 @@ static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg, * tomoyo_security_ops is a "struct security_operations" which is used for * registering TOMOYO. */ -static struct security_operations tomoyo_security_ops = { - .name = "tomoyo", - .cred_alloc_blank = tomoyo_cred_alloc_blank, - .cred_prepare = tomoyo_cred_prepare, - .cred_transfer = tomoyo_cred_transfer, - .cred_free = tomoyo_cred_free, - .bprm_set_creds = tomoyo_bprm_set_creds, - .bprm_check_security = tomoyo_bprm_check_security, - .file_fcntl = tomoyo_file_fcntl, - .file_open = tomoyo_file_open, - .path_truncate = tomoyo_path_truncate, - .path_unlink = tomoyo_path_unlink, - .path_mkdir = tomoyo_path_mkdir, - .path_rmdir = tomoyo_path_rmdir, - .path_symlink = tomoyo_path_symlink, - .path_mknod = tomoyo_path_mknod, - .path_link = tomoyo_path_link, - .path_rename = tomoyo_path_rename, - .inode_getattr = tomoyo_inode_getattr, - .file_ioctl = tomoyo_file_ioctl, - .path_chmod = tomoyo_path_chmod, - .path_chown = tomoyo_path_chown, - .path_chroot = tomoyo_path_chroot, - .sb_mount = tomoyo_sb_mount, - .sb_umount = tomoyo_sb_umount, - .sb_pivotroot = tomoyo_sb_pivotroot, - .socket_bind = tomoyo_socket_bind, - .socket_connect = tomoyo_socket_connect, - .socket_listen = tomoyo_socket_listen, - .socket_sendmsg = tomoyo_socket_sendmsg, +static struct security_hook_list tomoyo_hooks[] = { + LSM_HOOK_INIT(cred_alloc_blank, tomoyo_cred_alloc_blank), + LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare), + LSM_HOOK_INIT(cred_transfer, tomoyo_cred_transfer), + LSM_HOOK_INIT(cred_free, tomoyo_cred_free), + LSM_HOOK_INIT(bprm_set_creds, tomoyo_bprm_set_creds), + LSM_HOOK_INIT(bprm_check_security, tomoyo_bprm_check_security), + LSM_HOOK_INIT(file_fcntl, tomoyo_file_fcntl), + LSM_HOOK_INIT(file_open, tomoyo_file_open), + LSM_HOOK_INIT(path_truncate, tomoyo_path_truncate), + LSM_HOOK_INIT(path_unlink, tomoyo_path_unlink), + LSM_HOOK_INIT(path_mkdir, tomoyo_path_mkdir), + LSM_HOOK_INIT(path_rmdir, tomoyo_path_rmdir), + LSM_HOOK_INIT(path_symlink, tomoyo_path_symlink), + LSM_HOOK_INIT(path_mknod, tomoyo_path_mknod), + LSM_HOOK_INIT(path_link, tomoyo_path_link), + LSM_HOOK_INIT(path_rename, tomoyo_path_rename), + LSM_HOOK_INIT(inode_getattr, tomoyo_inode_getattr), + LSM_HOOK_INIT(file_ioctl, tomoyo_file_ioctl), + LSM_HOOK_INIT(path_chmod, tomoyo_path_chmod), + LSM_HOOK_INIT(path_chown, tomoyo_path_chown), + LSM_HOOK_INIT(path_chroot, tomoyo_path_chroot), + LSM_HOOK_INIT(sb_mount, tomoyo_sb_mount), + LSM_HOOK_INIT(sb_umount, tomoyo_sb_umount), + LSM_HOOK_INIT(sb_pivotroot, tomoyo_sb_pivotroot), + LSM_HOOK_INIT(socket_bind, tomoyo_socket_bind), + LSM_HOOK_INIT(socket_connect, tomoyo_socket_connect), + LSM_HOOK_INIT(socket_listen, tomoyo_socket_listen), + LSM_HOOK_INIT(socket_sendmsg, tomoyo_socket_sendmsg), }; /* Lock for GC. */ @@ -546,11 +539,10 @@ static int __init tomoyo_init(void) { struct cred *cred = (struct cred *) current_cred(); - if (!security_module_enable(&tomoyo_security_ops)) + if (!security_module_enable("tomoyo")) return 0; /* register ourselves with the security framework */ - if (register_security(&tomoyo_security_ops)) - panic("Failure registering TOMOYO Linux"); + security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks)); printk(KERN_INFO "TOMOYO Linux initialized\n"); cred->security = &tomoyo_kernel_domain; tomoyo_mm_init(); diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 24aae2ae2b30..9ed32502470e 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -12,7 +12,7 @@ * */ -#include <linux/security.h> +#include <linux/lsm_hooks.h> #include <linux/sysctl.h> #include <linux/ptrace.h> #include <linux/prctl.h> @@ -154,13 +154,9 @@ void yama_task_free(struct task_struct *task) int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { - int rc; + int rc = -ENOSYS; struct task_struct *myself = current; - rc = cap_task_prctl(option, arg2, arg3, arg4, arg5); - if (rc != -ENOSYS) - return rc; - switch (option) { case PR_SET_PTRACER: /* Since a thread can call prctl(), find the group leader @@ -279,17 +275,10 @@ static int ptracer_exception_found(struct task_struct *tracer, * * Returns 0 if following the ptrace is allowed, -ve on error. */ -int yama_ptrace_access_check(struct task_struct *child, +static int yama_ptrace_access_check(struct task_struct *child, unsigned int mode) { - int rc; - - /* If standard caps disallows it, so does Yama. We should - * only tighten restrictions further. - */ - rc = cap_ptrace_access_check(child, mode); - if (rc) - return rc; + int rc = 0; /* require ptrace target be a child of ptracer on attach */ if (mode == PTRACE_MODE_ATTACH) { @@ -335,14 +324,7 @@ int yama_ptrace_access_check(struct task_struct *child, */ int yama_ptrace_traceme(struct task_struct *parent) { - int rc; - - /* If standard caps disallows it, so does Yama. We should - * only tighten restrictions further. - */ - rc = cap_ptrace_traceme(parent); - if (rc) - return rc; + int rc = 0; /* Only disallow PTRACE_TRACEME on more aggressive settings. */ switch (ptrace_scope) { @@ -364,16 +346,17 @@ int yama_ptrace_traceme(struct task_struct *parent) return rc; } -#ifndef CONFIG_SECURITY_YAMA_STACKED -static struct security_operations yama_ops = { - .name = "yama", - - .ptrace_access_check = yama_ptrace_access_check, - .ptrace_traceme = yama_ptrace_traceme, - .task_prctl = yama_task_prctl, - .task_free = yama_task_free, +static struct security_hook_list yama_hooks[] = { + LSM_HOOK_INIT(ptrace_access_check, yama_ptrace_access_check), + LSM_HOOK_INIT(ptrace_traceme, yama_ptrace_traceme), + LSM_HOOK_INIT(task_prctl, yama_task_prctl), + LSM_HOOK_INIT(task_free, yama_task_free), }; -#endif + +void __init yama_add_hooks(void) +{ + security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks)); +} #ifdef CONFIG_SYSCTL static int yama_dointvec_minmax(struct ctl_table *table, int write, @@ -418,16 +401,13 @@ static struct ctl_table yama_sysctl_table[] = { static __init int yama_init(void) { #ifndef CONFIG_SECURITY_YAMA_STACKED - if (!security_module_enable(&yama_ops)) + /* + * If yama is being stacked this is already taken care of. + */ + if (!security_module_enable("yama")) return 0; #endif - - printk(KERN_INFO "Yama: becoming mindful.\n"); - -#ifndef CONFIG_SECURITY_YAMA_STACKED - if (register_security(&yama_ops)) - panic("Yama: kernel registration failed.\n"); -#endif + pr_info("Yama: becoming mindful.\n"); #ifdef CONFIG_SYSCTL if (!register_sysctl_paths(yama_sysctl_path, yama_sysctl_table)) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild new file mode 100644 index 000000000000..8e9b64520ec1 --- /dev/null +++ b/tools/testing/nvdimm/Kbuild @@ -0,0 +1,40 @@ +ldflags-y += --wrap=ioremap_cache +ldflags-y += --wrap=ioremap_nocache +ldflags-y += --wrap=iounmap +ldflags-y += --wrap=__request_region +ldflags-y += --wrap=__release_region + +DRIVERS := ../../../drivers +NVDIMM_SRC := $(DRIVERS)/nvdimm +ACPI_SRC := $(DRIVERS)/acpi + +obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o +obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o +obj-$(CONFIG_ND_BTT) += nd_btt.o +obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_ACPI_NFIT) += nfit.o + +nfit-y := $(ACPI_SRC)/nfit.o +nfit-y += config_check.o + +nd_pmem-y := $(NVDIMM_SRC)/pmem.o +nd_pmem-y += config_check.o + +nd_btt-y := $(NVDIMM_SRC)/btt.o +nd_btt-y += config_check.o + +nd_blk-y := $(NVDIMM_SRC)/blk.o +nd_blk-y += config_check.o + +libnvdimm-y := $(NVDIMM_SRC)/core.o +libnvdimm-y += $(NVDIMM_SRC)/bus.o +libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o +libnvdimm-y += $(NVDIMM_SRC)/dimm.o +libnvdimm-y += $(NVDIMM_SRC)/region_devs.o +libnvdimm-y += $(NVDIMM_SRC)/region.o +libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o +libnvdimm-y += $(NVDIMM_SRC)/label.o +libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o +libnvdimm-y += config_check.o + +obj-m += test/ diff --git a/tools/testing/nvdimm/Makefile b/tools/testing/nvdimm/Makefile new file mode 100644 index 000000000000..3dfe024b4e7e --- /dev/null +++ b/tools/testing/nvdimm/Makefile @@ -0,0 +1,7 @@ +KDIR ?= ../../../ + +default: + $(MAKE) -C $(KDIR) M=$$PWD + +install: default + $(MAKE) -C $(KDIR) M=$$PWD modules_install diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c new file mode 100644 index 000000000000..f2c7615554eb --- /dev/null +++ b/tools/testing/nvdimm/config_check.c @@ -0,0 +1,15 @@ +#include <linux/kconfig.h> +#include <linux/bug.h> + +void check(void) +{ + /* + * These kconfig symbols must be set to "m" for nfit_test to + * load and operate. + */ + BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); +} diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild new file mode 100644 index 000000000000..9241064970fe --- /dev/null +++ b/tools/testing/nvdimm/test/Kbuild @@ -0,0 +1,8 @@ +ccflags-y := -I$(src)/../../../../drivers/nvdimm/ +ccflags-y += -I$(src)/../../../../drivers/acpi/ + +obj-m += nfit_test.o +obj-m += nfit_test_iomap.o + +nfit_test-y := nfit.o +nfit_test_iomap-y := iomap.o diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c new file mode 100644 index 000000000000..c85a6f6ba559 --- /dev/null +++ b/tools/testing/nvdimm/test/iomap.c @@ -0,0 +1,151 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/rculist.h> +#include <linux/export.h> +#include <linux/ioport.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/io.h> +#include "nfit_test.h" + +static LIST_HEAD(iomap_head); + +static struct iomap_ops { + nfit_test_lookup_fn nfit_test_lookup; + struct list_head list; +} iomap_ops = { + .list = LIST_HEAD_INIT(iomap_ops.list), +}; + +void nfit_test_setup(nfit_test_lookup_fn lookup) +{ + iomap_ops.nfit_test_lookup = lookup; + list_add_rcu(&iomap_ops.list, &iomap_head); +} +EXPORT_SYMBOL(nfit_test_setup); + +void nfit_test_teardown(void) +{ + list_del_rcu(&iomap_ops.list); + synchronize_rcu(); +} +EXPORT_SYMBOL(nfit_test_teardown); + +static struct nfit_test_resource *get_nfit_res(resource_size_t resource) +{ + struct iomap_ops *ops; + + ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list); + if (ops) + return ops->nfit_test_lookup(resource); + return NULL; +} + +void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, + void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return fallback_fn(offset, size); +} + +void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_cache); +} +EXPORT_SYMBOL(__wrap_ioremap_cache); + +void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_nocache); +} +EXPORT_SYMBOL(__wrap_ioremap_nocache); + +void __wrap_iounmap(volatile void __iomem *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return iounmap(addr); +} +EXPORT_SYMBOL(__wrap_iounmap); + +struct resource *__wrap___request_region(struct resource *parent, + resource_size_t start, resource_size_t n, const char *name, + int flags) +{ + struct nfit_test_resource *nfit_res; + + if (parent == &iomem_resource) { + rcu_read_lock(); + nfit_res = get_nfit_res(start); + rcu_read_unlock(); + if (nfit_res) { + struct resource *res = nfit_res->res + 1; + + if (start + n > nfit_res->res->start + + resource_size(nfit_res->res)) { + pr_debug("%s: start: %llx n: %llx overflow: %pr\n", + __func__, start, n, + nfit_res->res); + return NULL; + } + + res->start = start; + res->end = start + n - 1; + res->name = name; + res->flags = resource_type(parent); + res->flags |= IORESOURCE_BUSY | flags; + pr_debug("%s: %pr\n", __func__, res); + return res; + } + } + return __request_region(parent, start, n, name, flags); +} +EXPORT_SYMBOL(__wrap___request_region); + +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n) +{ + struct nfit_test_resource *nfit_res; + + if (parent == &iomem_resource) { + rcu_read_lock(); + nfit_res = get_nfit_res(start); + rcu_read_unlock(); + if (nfit_res) { + struct resource *res = nfit_res->res + 1; + + if (start != res->start || resource_size(res) != n) + pr_info("%s: start: %llx n: %llx mismatch: %pr\n", + __func__, start, n, res); + else + memset(res, 0, sizeof(*res)); + return; + } + } + __release_region(parent, start, n); +} +EXPORT_SYMBOL(__wrap___release_region); + +MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c new file mode 100644 index 000000000000..4b69b8368de0 --- /dev/null +++ b/tools/testing/nvdimm/test/nfit.c @@ -0,0 +1,1116 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <linux/libnvdimm.h> +#include <linux/vmalloc.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/ndctl.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <nfit.h> +#include <nd.h> +#include "nfit_test.h" + +/* + * Generate an NFIT table to describe the following topology: + * + * BUS0: Interleaved PMEM regions, and aliasing with BLK regions + * + * (a) (b) DIMM BLK-REGION + * +----------+--------------+----------+---------+ + * +------+ | blk2.0 | pm0.0 | blk2.1 | pm1.0 | 0 region2 + * | imc0 +--+- - - - - region0 - - - -+----------+ + + * +--+---+ | blk3.0 | pm0.0 | blk3.1 | pm1.0 | 1 region3 + * | +----------+--------------v----------v v + * +--+---+ | | + * | cpu0 | region1 + * +--+---+ | | + * | +-------------------------^----------^ ^ + * +--+---+ | blk4.0 | pm1.0 | 2 region4 + * | imc1 +--+-------------------------+----------+ + + * +------+ | blk5.0 | pm1.0 | 3 region5 + * +-------------------------+----------+-+-------+ + * + * *) In this layout we have four dimms and two memory controllers in one + * socket. Each unique interface (BLK or PMEM) to DPA space + * is identified by a region device with a dynamically assigned id. + * + * *) The first portion of dimm0 and dimm1 are interleaved as REGION0. + * A single PMEM namespace "pm0.0" is created using half of the + * REGION0 SPA-range. REGION0 spans dimm0 and dimm1. PMEM namespace + * allocate from from the bottom of a region. The unallocated + * portion of REGION0 aliases with REGION2 and REGION3. That + * unallacted capacity is reclaimed as BLK namespaces ("blk2.0" and + * "blk3.0") starting at the base of each DIMM to offset (a) in those + * DIMMs. "pm0.0", "blk2.0" and "blk3.0" are free-form readable + * names that can be assigned to a namespace. + * + * *) In the last portion of dimm0 and dimm1 we have an interleaved + * SPA range, REGION1, that spans those two dimms as well as dimm2 + * and dimm3. Some of REGION1 allocated to a PMEM namespace named + * "pm1.0" the rest is reclaimed in 4 BLK namespaces (for each + * dimm in the interleave set), "blk2.1", "blk3.1", "blk4.0", and + * "blk5.0". + * + * *) The portion of dimm2 and dimm3 that do not participate in the + * REGION1 interleaved SPA range (i.e. the DPA address below offset + * (b) are also included in the "blk4.0" and "blk5.0" namespaces. + * Note, that BLK namespaces need not be contiguous in DPA-space, and + * can consume aliased capacity from multiple interleave sets. + * + * BUS1: Legacy NVDIMM (single contiguous range) + * + * region2 + * +---------------------+ + * |---------------------| + * || pm2.0 || + * |---------------------| + * +---------------------+ + * + * *) A NFIT-table may describe a simple system-physical-address range + * with no BLK aliasing. This type of region may optionally + * reference an NVDIMM. + */ +enum { + NUM_PM = 2, + NUM_DCR = 4, + NUM_BDW = NUM_DCR, + NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, + NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, + DIMM_SIZE = SZ_32M, + LABEL_SIZE = SZ_128K, + SPA0_SIZE = DIMM_SIZE, + SPA1_SIZE = DIMM_SIZE*2, + SPA2_SIZE = DIMM_SIZE, + BDW_SIZE = 64 << 8, + DCR_SIZE = 12, + NUM_NFITS = 2, /* permit testing multiple NFITs per system */ +}; + +struct nfit_test_dcr { + __le64 bdw_addr; + __le32 bdw_status; + __u8 aperature[BDW_SIZE]; +}; + +#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \ + (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \ + | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf)) + +static u32 handle[NUM_DCR] = { + [0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0), + [1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1), + [2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0), + [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), +}; + +struct nfit_test { + struct acpi_nfit_desc acpi_desc; + struct platform_device pdev; + struct list_head resources; + void *nfit_buf; + dma_addr_t nfit_dma; + size_t nfit_size; + int num_dcr; + int num_pm; + void **dimm; + dma_addr_t *dimm_dma; + void **label; + dma_addr_t *label_dma; + void **spa_set; + dma_addr_t *spa_set_dma; + struct nfit_test_dcr **dcr; + dma_addr_t *dcr_dma; + int (*alloc)(struct nfit_test *t); + void (*setup)(struct nfit_test *t); +}; + +static struct nfit_test *to_nfit_test(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + return container_of(pdev, struct nfit_test, pdev); +} + +static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + int i, rc; + + if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) + return -ENXIO; + + /* lookup label space for the given dimm */ + for (i = 0; i < ARRAY_SIZE(handle); i++) + if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) + break; + if (i >= ARRAY_SIZE(handle)) + return -ENXIO; + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: { + struct nd_cmd_get_config_size *nd_cmd = buf; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + nd_cmd->status = 0; + nd_cmd->config_size = LABEL_SIZE; + nd_cmd->max_xfer = SZ_4K; + rc = 0; + break; + } + case ND_CMD_GET_CONFIG_DATA: { + struct nd_cmd_get_config_data_hdr *nd_cmd = buf; + unsigned int len, offset = nd_cmd->in_offset; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + if (offset >= LABEL_SIZE) + return -EINVAL; + if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len) + return -EINVAL; + + nd_cmd->status = 0; + len = min(nd_cmd->in_length, LABEL_SIZE - offset); + memcpy(nd_cmd->out_buf, t->label[i] + offset, len); + rc = buf_len - sizeof(*nd_cmd) - len; + break; + } + case ND_CMD_SET_CONFIG_DATA: { + struct nd_cmd_set_config_hdr *nd_cmd = buf; + unsigned int len, offset = nd_cmd->in_offset; + u32 *status; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + if (offset >= LABEL_SIZE) + return -EINVAL; + if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len) + return -EINVAL; + + status = buf + nd_cmd->in_length + sizeof(*nd_cmd); + *status = 0; + len = min(nd_cmd->in_length, LABEL_SIZE - offset); + memcpy(t->label[i] + offset, nd_cmd->in_buf, len); + rc = buf_len - sizeof(*nd_cmd) - (len + 4); + break; + } + default: + return -ENOTTY; + } + + return rc; +} + +static DEFINE_SPINLOCK(nfit_test_lock); +static struct nfit_test *instances[NUM_NFITS]; + +static void release_nfit_res(void *data) +{ + struct nfit_test_resource *nfit_res = data; + struct resource *res = nfit_res->res; + + spin_lock(&nfit_test_lock); + list_del(&nfit_res->list); + spin_unlock(&nfit_test_lock); + + if (is_vmalloc_addr(nfit_res->buf)) + vfree(nfit_res->buf); + else + dma_free_coherent(nfit_res->dev, resource_size(res), + nfit_res->buf, res->start); + kfree(res); + kfree(nfit_res); +} + +static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, + void *buf) +{ + struct device *dev = &t->pdev.dev; + struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL); + struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res), + GFP_KERNEL); + int rc; + + if (!res || !buf || !nfit_res) + goto err; + rc = devm_add_action(dev, release_nfit_res, nfit_res); + if (rc) + goto err; + INIT_LIST_HEAD(&nfit_res->list); + memset(buf, 0, size); + nfit_res->dev = dev; + nfit_res->buf = buf; + nfit_res->res = res; + res->start = *dma; + res->end = *dma + size - 1; + res->name = "NFIT"; + spin_lock(&nfit_test_lock); + list_add(&nfit_res->list, &t->resources); + spin_unlock(&nfit_test_lock); + + return nfit_res->buf; + err: + if (buf && !is_vmalloc_addr(buf)) + dma_free_coherent(dev, size, buf, *dma); + else if (buf) + vfree(buf); + kfree(res); + kfree(nfit_res); + return NULL; +} + +static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) +{ + void *buf = vmalloc(size); + + *dma = (unsigned long) buf; + return __test_alloc(t, size, dma, buf); +} + +static void *test_alloc_coherent(struct nfit_test *t, size_t size, + dma_addr_t *dma) +{ + struct device *dev = &t->pdev.dev; + void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL); + + return __test_alloc(t, size, dma, buf); +} + +static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(instances); i++) { + struct nfit_test_resource *n, *nfit_res = NULL; + struct nfit_test *t = instances[i]; + + if (!t) + continue; + spin_lock(&nfit_test_lock); + list_for_each_entry(n, &t->resources, list) { + if (addr >= n->res->start && (addr < n->res->start + + resource_size(n->res))) { + nfit_res = n; + break; + } else if (addr >= (unsigned long) n->buf + && (addr < (unsigned long) n->buf + + resource_size(n->res))) { + nfit_res = n; + break; + } + } + spin_unlock(&nfit_test_lock); + if (nfit_res) + return nfit_res; + } + + return NULL; +} + +static int nfit_test0_alloc(struct nfit_test *t) +{ + size_t nfit_size = sizeof(struct acpi_table_nfit) + + sizeof(struct acpi_nfit_system_address) * NUM_SPA + + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + + sizeof(struct acpi_nfit_control_region) * NUM_DCR + + sizeof(struct acpi_nfit_data_region) * NUM_BDW; + int i; + + t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); + if (!t->nfit_buf) + return -ENOMEM; + t->nfit_size = nfit_size; + + t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]); + if (!t->spa_set[0]) + return -ENOMEM; + + t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]); + if (!t->spa_set[1]) + return -ENOMEM; + + for (i = 0; i < NUM_DCR; i++) { + t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]); + if (!t->dimm[i]) + return -ENOMEM; + + t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]); + if (!t->label[i]) + return -ENOMEM; + sprintf(t->label[i], "label%d", i); + } + + for (i = 0; i < NUM_DCR; i++) { + t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]); + if (!t->dcr[i]) + return -ENOMEM; + } + + return 0; +} + +static int nfit_test1_alloc(struct nfit_test *t) +{ + size_t nfit_size = sizeof(struct acpi_table_nfit) + + sizeof(struct acpi_nfit_system_address) + + sizeof(struct acpi_nfit_memory_map) + + sizeof(struct acpi_nfit_control_region); + + t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); + if (!t->nfit_buf) + return -ENOMEM; + t->nfit_size = nfit_size; + + t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]); + if (!t->spa_set[0]) + return -ENOMEM; + + return 0; +} + +static void nfit_test_init_header(struct acpi_table_nfit *nfit, size_t size) +{ + memcpy(nfit->header.signature, ACPI_SIG_NFIT, 4); + nfit->header.length = size; + nfit->header.revision = 1; + memcpy(nfit->header.oem_id, "LIBND", 6); + memcpy(nfit->header.oem_table_id, "TEST", 5); + nfit->header.oem_revision = 1; + memcpy(nfit->header.asl_compiler_id, "TST", 4); + nfit->header.asl_compiler_revision = 1; +} + +static void nfit_test0_setup(struct nfit_test *t) +{ + struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; + struct acpi_nfit_memory_map *memdev; + void *nfit_buf = t->nfit_buf; + size_t size = t->nfit_size; + struct acpi_nfit_system_address *spa; + struct acpi_nfit_control_region *dcr; + struct acpi_nfit_data_region *bdw; + unsigned int offset; + + nfit_test_init_header(nfit_buf, size); + + /* + * spa0 (interleave first half of dimm0 and dimm1, note storage + * does not actually alias the related block-data-window + * regions) + */ + spa = nfit_buf + sizeof(struct acpi_table_nfit); + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 0+1; + spa->address = t->spa_set_dma[0]; + spa->length = SPA0_SIZE; + + /* + * spa1 (interleave last half of the 4 DIMMS, note storage + * does not actually alias the related block-data-window + * regions) + */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa); + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 1+1; + spa->address = t->spa_set_dma[1]; + spa->length = SPA1_SIZE; + + /* spa2 (dcr0) dimm0 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 2; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 2+1; + spa->address = t->dcr_dma[0]; + spa->length = DCR_SIZE; + + /* spa3 (dcr1) dimm1 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 3; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 3+1; + spa->address = t->dcr_dma[1]; + spa->length = DCR_SIZE; + + /* spa4 (dcr2) dimm2 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 4; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 4+1; + spa->address = t->dcr_dma[2]; + spa->length = DCR_SIZE; + + /* spa5 (dcr3) dimm3 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 5; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 5+1; + spa->address = t->dcr_dma[3]; + spa->length = DCR_SIZE; + + /* spa6 (bdw for dcr0) dimm0 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 6; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 6+1; + spa->address = t->dimm_dma[0]; + spa->length = DIMM_SIZE; + + /* spa7 (bdw for dcr1) dimm1 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 7; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 7+1; + spa->address = t->dimm_dma[1]; + spa->length = DIMM_SIZE; + + /* spa8 (bdw for dcr2) dimm2 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 8; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 8+1; + spa->address = t->dimm_dma[2]; + spa->length = DIMM_SIZE; + + /* spa9 (bdw for dcr3) dimm3 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 9; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 9+1; + spa->address = t->dimm_dma[3]; + spa->length = DIMM_SIZE; + + offset = sizeof(struct acpi_table_nfit) + sizeof(*spa) * 10; + /* mem-region0 (spa0, dimm0) */ + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 0+1; + memdev->region_size = SPA0_SIZE/2; + memdev->region_offset = t->spa_set_dma[0]; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 2; + + /* mem-region1 (spa0, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map); + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 1+1; + memdev->region_size = SPA0_SIZE/2; + memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 2; + + /* mem-region2 (spa1, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 1; + memdev->range_index = 1+1; + memdev->region_index = 0+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1]; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region3 (spa1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 1; + memdev->range_index = 1+1; + memdev->region_index = 1+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region4 (spa1, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 1+1; + memdev->region_index = 2+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region5 (spa1, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 1+1; + memdev->region_index = 3+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region6 (spa/dcr0, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 2+1; + memdev->region_index = 0+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region7 (spa/dcr1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 3+1; + memdev->region_index = 1+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region8 (spa/dcr2, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 4+1; + memdev->region_index = 2+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region9 (spa/dcr3, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 5+1; + memdev->region_index = 3+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region10 (spa/bdw0, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 6+1; + memdev->region_index = 0+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region11 (spa/bdw1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 7+1; + memdev->region_index = 1+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region12 (spa/bdw2, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 8+1; + memdev->region_index = 2+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region13 (spa/dcr3, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 9+1; + memdev->region_index = 3+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; + /* dcr-descriptor0 */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 0+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[0]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor1 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 1+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[1]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor2 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 2+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[2]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor3 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 3+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[3]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + offset = offset + sizeof(struct acpi_nfit_control_region) * 4; + /* bdw0 (spa/dcr0, dimm0) */ + bdw = nfit_buf + offset; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 0+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw1 (spa/dcr1, dimm1) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region); + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 1+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw2 (spa/dcr2, dimm2) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 2+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw3 (spa/dcr3, dimm3) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 3+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + acpi_desc = &t->acpi_desc; + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + nd_desc = &acpi_desc->nd_desc; + nd_desc->ndctl = nfit_test_ctl; +} + +static void nfit_test1_setup(struct nfit_test *t) +{ + size_t size = t->nfit_size, offset; + void *nfit_buf = t->nfit_buf; + struct acpi_nfit_memory_map *memdev; + struct acpi_nfit_control_region *dcr; + struct acpi_nfit_system_address *spa; + + nfit_test_init_header(nfit_buf, size); + + offset = sizeof(struct acpi_table_nfit); + /* spa0 (flat range with no bdw aliasing) */ + spa = nfit_buf + offset; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 0+1; + spa->address = t->spa_set_dma[0]; + spa->length = SPA2_SIZE; + + offset += sizeof(*spa); + /* mem-region0 (spa0, dimm0) */ + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = 0; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 0+1; + memdev->region_size = SPA2_SIZE; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED + | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED + | ACPI_NFIT_MEM_ARMED; + + offset += sizeof(*memdev); + /* dcr-descriptor0 */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 0+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~0; + dcr->code = 0x201; + dcr->windows = 0; + dcr->window_size = 0; + dcr->command_offset = 0; + dcr->command_size = 0; + dcr->status_offset = 0; + dcr->status_size = 0; +} + +static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw) +{ + struct nfit_blk *nfit_blk = ndbr->blk_provider_data; + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW]; + struct nd_region *nd_region = &ndbr->nd_region; + unsigned int lane; + + lane = nd_region_acquire_lane(nd_region); + if (rw) + memcpy(mmio->base + dpa, iobuf, len); + else + memcpy(iobuf, mmio->base + dpa, len); + nd_region_release_lane(nd_region, lane); + + return 0; +} + +static int nfit_test_probe(struct platform_device *pdev) +{ + struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; + struct device *dev = &pdev->dev; + struct nfit_test *nfit_test; + int rc; + + nfit_test = to_nfit_test(&pdev->dev); + + /* common alloc */ + if (nfit_test->num_dcr) { + int num = nfit_test->num_dcr; + + nfit_test->dimm = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); + nfit_test->label = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->label_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->dcr = devm_kcalloc(dev, num, + sizeof(struct nfit_test_dcr *), GFP_KERNEL); + nfit_test->dcr_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label + && nfit_test->label_dma && nfit_test->dcr + && nfit_test->dcr_dma) + /* pass */; + else + return -ENOMEM; + } + + if (nfit_test->num_pm) { + int num = nfit_test->num_pm; + + nfit_test->spa_set = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->spa_set_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + if (nfit_test->spa_set && nfit_test->spa_set_dma) + /* pass */; + else + return -ENOMEM; + } + + /* per-nfit specific alloc */ + if (nfit_test->alloc(nfit_test)) + return -ENOMEM; + + nfit_test->setup(nfit_test); + acpi_desc = &nfit_test->acpi_desc; + acpi_desc->dev = &pdev->dev; + acpi_desc->nfit = nfit_test->nfit_buf; + acpi_desc->blk_do_io = nfit_test_blk_do_io; + nd_desc = &acpi_desc->nd_desc; + nd_desc->attr_groups = acpi_nfit_attribute_groups; + acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENXIO; + + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); + if (rc) { + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return rc; + } + + return 0; +} + +static int nfit_test_remove(struct platform_device *pdev) +{ + struct nfit_test *nfit_test = to_nfit_test(&pdev->dev); + struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc; + + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + + return 0; +} + +static void nfit_test_release(struct device *dev) +{ + struct nfit_test *nfit_test = to_nfit_test(dev); + + kfree(nfit_test); +} + +static const struct platform_device_id nfit_test_id[] = { + { KBUILD_MODNAME }, + { }, +}; + +static struct platform_driver nfit_test_driver = { + .probe = nfit_test_probe, + .remove = nfit_test_remove, + .driver = { + .name = KBUILD_MODNAME, + }, + .id_table = nfit_test_id, +}; + +#ifdef CONFIG_CMA_SIZE_MBYTES +#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES +#else +#define CMA_SIZE_MBYTES 0 +#endif + +static __init int nfit_test_init(void) +{ + int rc, i; + + nfit_test_setup(nfit_test_lookup); + + for (i = 0; i < NUM_NFITS; i++) { + struct nfit_test *nfit_test; + struct platform_device *pdev; + static int once; + + nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL); + if (!nfit_test) { + rc = -ENOMEM; + goto err_register; + } + INIT_LIST_HEAD(&nfit_test->resources); + switch (i) { + case 0: + nfit_test->num_pm = NUM_PM; + nfit_test->num_dcr = NUM_DCR; + nfit_test->alloc = nfit_test0_alloc; + nfit_test->setup = nfit_test0_setup; + break; + case 1: + nfit_test->num_pm = 1; + nfit_test->alloc = nfit_test1_alloc; + nfit_test->setup = nfit_test1_setup; + break; + default: + rc = -EINVAL; + goto err_register; + } + pdev = &nfit_test->pdev; + pdev->name = KBUILD_MODNAME; + pdev->id = i; + pdev->dev.release = nfit_test_release; + rc = platform_device_register(pdev); + if (rc) { + put_device(&pdev->dev); + goto err_register; + } + + rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + goto err_register; + + instances[i] = nfit_test; + + if (!once++) { + dma_addr_t dma; + void *buf; + + buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma, + GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; + dev_warn(&pdev->dev, "need 128M of free cma\n"); + goto err_register; + } + dma_free_coherent(&pdev->dev, SZ_128M, buf, dma); + } + } + + rc = platform_driver_register(&nfit_test_driver); + if (rc) + goto err_register; + return 0; + + err_register: + for (i = 0; i < NUM_NFITS; i++) + if (instances[i]) + platform_device_unregister(&instances[i]->pdev); + nfit_test_teardown(); + return rc; +} + +static __exit void nfit_test_exit(void) +{ + int i; + + platform_driver_unregister(&nfit_test_driver); + for (i = 0; i < NUM_NFITS; i++) + platform_device_unregister(&instances[i]->pdev); + nfit_test_teardown(); +} + +module_init(nfit_test_init); +module_exit(nfit_test_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h new file mode 100644 index 000000000000..96c5e16d7db9 --- /dev/null +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -0,0 +1,29 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __NFIT_TEST_H__ +#define __NFIT_TEST_H__ + +struct nfit_test_resource { + struct list_head list; + struct resource *res; + struct device *dev; + void *buf; +}; + +typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); +void __iomem *__wrap_ioremap_nocache(resource_size_t offset, + unsigned long size); +void __wrap_iounmap(volatile void __iomem *addr); +void nfit_test_setup(nfit_test_lookup_fn lookup); +void nfit_test_teardown(void); +#endif diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 95abddcd7839..24ae9e829e9a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -4,6 +4,7 @@ TARGETS += efivarfs TARGETS += exec TARGETS += firmware TARGETS += ftrace +TARGETS += futex TARGETS += kcmp TARGETS += memfd TARGETS += memory-hotplug @@ -12,13 +13,18 @@ TARGETS += mqueue TARGETS += net TARGETS += powerpc TARGETS += ptrace +TARGETS += seccomp TARGETS += size TARGETS += sysctl +ifneq (1, $(quicktest)) TARGETS += timers +endif TARGETS += user TARGETS += vm TARGETS += x86 #Please keep the TARGETS list alphabetically sorted +# Run "make quicktest=1 run_tests" or +# "make quicktest=1 kselftest from top level Makefile TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug @@ -27,7 +33,7 @@ TARGETS_HOTPLUG += memory-hotplug # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. ifeq (1,$(MAKELEVEL)) -undefine LDFLAGS +override LDFLAGS = override MAKEFLAGS = endif diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 4edb7d0da29b..6b76bfdc847e 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,6 +1,6 @@ CFLAGS = -Wall BINARIES = execveat -DEPS = execveat.symlink execveat.denatured script subdir +DEPS = execveat.symlink execveat.denatured script all: $(BINARIES) $(DEPS) subdir: diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index 346720639d1d..0acbeca47225 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -1,6 +1,7 @@ all: TEST_PROGS := ftracetest +TEST_DIRS := test.d/ include ../lib.mk diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile new file mode 100644 index 000000000000..6a1752956283 --- /dev/null +++ b/tools/testing/selftests/futex/Makefile @@ -0,0 +1,29 @@ +SUBDIRS := functional + +TEST_PROGS := run.sh + +.PHONY: all clean +all: + for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done + +include ../lib.mk + +override define RUN_TESTS + ./run.sh +endef + +override define INSTALL_RULE + mkdir -p $(INSTALL_PATH) + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) + + @for SUBDIR in $(SUBDIRS); do \ + $(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \ + done; +endef + +override define EMIT_TESTS + echo "./run.sh" +endef + +clean: + for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README new file mode 100644 index 000000000000..3224a049b196 --- /dev/null +++ b/tools/testing/selftests/futex/README @@ -0,0 +1,62 @@ +Futex Test +========== +Futex Test is intended to thoroughly test the Linux kernel futex system call +API. + +Functional tests shall test the documented behavior of the futex operation +code under test. This includes checking for proper behavior under normal use, +odd corner cases, regression tests, and abject abuse and misuse. + +Futextest will also provide example implementation of mutual exclusion +primitives. These can be used as is in user applications or can serve as +examples for system libraries. These will likely be added to either a new lib/ +directory or purely as header files under include/, I'm leaning toward the +latter. + +Quick Start +----------- +# make +# ./run.sh + +Design and Implementation Goals +------------------------------- +o Tests should be as self contained as is practical so as to facilitate sharing + the individual tests on mailing list discussions and bug reports. +o The build system shall remain as simple as possible, avoiding any archive or + shared object building and linking. +o Where possible, any helper functions or other package-wide code shall be + implemented in header files, avoiding the need to compile intermediate object + files. +o External dependendencies shall remain as minimal as possible. Currently gcc + and glibc are the only dependencies. +o Tests return 0 for success and < 0 for failure. + +Output Formatting +----------------- +Test output shall be easily parsable by both human and machine. Title and +results are printed to stdout, while intermediate ERROR or FAIL messages are +sent to stderr. Tests shall support the -c option to print PASS, FAIL, and +ERROR strings in color for easy visual parsing. Output shall conform to the +following format: + +test_name: Description of the test + Arguments: arg1=val1 #units specified for clarity where appropriate + ERROR: Description of unexpected error + FAIL: Reason for test failure + # FIXME: Perhaps an " INFO: informational message" option would be + # useful here. Using -v to toggle it them on and off, as with -c. + # there may be multiple ERROR or FAIL messages +Result: (PASS|FAIL|ERROR) + +Naming +------ +o FIXME: decide on a sane test naming scheme. Currently the tests are named + based on the primary futex operation they test. Eventually this will become a + problem as we intend to write multiple tests which collide in this namespace. + Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the + detailed description in the test source and the output. + +Coding Style +------------ +o The Futex Test project adheres to the coding standards set forth by Linux + kernel as defined in the Linux source Documentation/CodingStyle. diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore new file mode 100644 index 000000000000..a09f57061902 --- /dev/null +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -0,0 +1,7 @@ +futex_requeue_pi +futex_requeue_pi_mismatched_ops +futex_requeue_pi_signal_restart +futex_wait_private_mapped_file +futex_wait_timeout +futex_wait_uninitialized_heap +futex_wait_wouldblock diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile new file mode 100644 index 000000000000..9d6b75ef7b5d --- /dev/null +++ b/tools/testing/selftests/futex/functional/Makefile @@ -0,0 +1,25 @@ +INCLUDES := -I../include -I../../ +CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) +LDFLAGS := $(LDFLAGS) -pthread -lrt + +HEADERS := ../include/futextest.h +TARGETS := \ + futex_wait_timeout \ + futex_wait_wouldblock \ + futex_requeue_pi \ + futex_requeue_pi_signal_restart \ + futex_requeue_pi_mismatched_ops \ + futex_wait_uninitialized_heap \ + futex_wait_private_mapped_file + +TEST_PROGS := $(TARGETS) run.sh + +.PHONY: all clean +all: $(TARGETS) + +$(TARGETS): $(HEADERS) + +include ../../lib.mk + +clean: + rm -f $(TARGETS) diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c new file mode 100644 index 000000000000..3da06ad23996 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c @@ -0,0 +1,409 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2006-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * This test excercises the futex syscall op codes needed for requeuing + * priority inheritance aware POSIX condition variables and mutexes. + * + * AUTHORS + * Sripathi Kodi <[email protected]> + * Darren Hart <[email protected]> + * + * HISTORY + * 2008-Jan-13: Initial version by Sripathi Kodi <[email protected]> + * 2009-Nov-6: futex test adaptation by Darren Hart <[email protected]> + * + *****************************************************************************/ + +#include <errno.h> +#include <limits.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <string.h> +#include "atomic.h" +#include "futextest.h" +#include "logging.h" + +#define MAX_WAKE_ITERS 1000 +#define THREAD_MAX 10 +#define SIGNAL_PERIOD_US 100 + +atomic_t waiters_blocked = ATOMIC_INITIALIZER; +atomic_t waiters_woken = ATOMIC_INITIALIZER; + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +futex_t wake_complete = FUTEX_INITIALIZER; + +/* Test option defaults */ +static long timeout_ns; +static int broadcast; +static int owner; +static int locked; + +struct thread_arg { + long id; + struct timespec *timeout; + int lock; + int ret; +}; +#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 } + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -b Broadcast wakeup (all waiters)\n"); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -l Lock the pi futex across requeue\n"); + printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n"); + printf(" -t N Timeout in nanoseconds (default: 0)\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, + int policy, int prio) +{ + int ret; + struct sched_param schedp; + pthread_attr_t attr; + + pthread_attr_init(&attr); + memset(&schedp, 0, sizeof(schedp)); + + ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (ret) { + error("pthread_attr_setinheritsched\n", ret); + return -1; + } + + ret = pthread_attr_setschedpolicy(&attr, policy); + if (ret) { + error("pthread_attr_setschedpolicy\n", ret); + return -1; + } + + schedp.sched_priority = prio; + ret = pthread_attr_setschedparam(&attr, &schedp); + if (ret) { + error("pthread_attr_setschedparam\n", ret); + return -1; + } + + ret = pthread_create(pth, &attr, func, arg); + if (ret) { + error("pthread_create\n", ret); + return -1; + } + return 0; +} + + +void *waiterfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + futex_t old_val; + + info("Waiter %ld: running\n", args->id); + /* Each thread sleeps for a different amount of time + * This is to avoid races, because we don't lock the + * external mutex here */ + usleep(1000 * (long)args->id); + + old_val = f1; + atomic_inc(&waiters_blocked); + info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", + &f1, f1, &f2); + args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout, + FUTEX_PRIVATE_FLAG); + + info("waiter %ld woke with %d %s\n", args->id, args->ret, + args->ret < 0 ? strerror(errno) : ""); + atomic_inc(&waiters_woken); + if (args->ret < 0) { + if (args->timeout && errno == ETIMEDOUT) + args->ret = 0; + else { + args->ret = RET_ERROR; + error("futex_wait_requeue_pi\n", errno); + } + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + info("Waiter %ld: exiting with %d\n", args->id, args->ret); + pthread_exit((void *)&args->ret); +} + +void *broadcast_wakerfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + int nr_requeue = INT_MAX; + int task_count = 0; + futex_t old_val; + int nr_wake = 1; + int i = 0; + + info("Waker: waiting for waiters to block\n"); + while (waiters_blocked.val < THREAD_MAX) + usleep(1000); + usleep(1000); + + info("Waker: Calling broadcast\n"); + if (args->lock) { + info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + continue_requeue: + old_val = f1; + args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue, + FUTEX_PRIVATE_FLAG); + if (args->ret < 0) { + args->ret = RET_ERROR; + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + } else if (++i < MAX_WAKE_ITERS) { + task_count += args->ret; + if (task_count < THREAD_MAX - waiters_woken.val) + goto continue_requeue; + } else { + error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", + 0, MAX_WAKE_ITERS, task_count, THREAD_MAX); + args->ret = RET_ERROR; + } + + futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); + + if (args->lock) + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + if (args->ret > 0) + args->ret = task_count; + + info("Waker: exiting with %d\n", args->ret); + pthread_exit((void *)&args->ret); +} + +void *signal_wakerfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + unsigned int old_val; + int nr_requeue = 0; + int task_count = 0; + int nr_wake = 1; + int i = 0; + + info("Waker: waiting for waiters to block\n"); + while (waiters_blocked.val < THREAD_MAX) + usleep(1000); + usleep(1000); + + while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) { + info("task_count: %d, waiters_woken: %d\n", + task_count, waiters_woken.val); + if (args->lock) { + info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", + f2, &f2); + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + info("Waker: Calling signal\n"); + /* cond_signal */ + old_val = f1; + args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, + nr_wake, nr_requeue, + FUTEX_PRIVATE_FLAG); + if (args->ret < 0) + args->ret = -errno; + info("futex: %x\n", f2); + if (args->lock) { + info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", + f2, &f2); + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + } + info("futex: %x\n", f2); + if (args->ret < 0) { + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + args->ret = RET_ERROR; + break; + } + + task_count += args->ret; + usleep(SIGNAL_PERIOD_US); + i++; + /* we have to loop at least THREAD_MAX times */ + if (i > MAX_WAKE_ITERS + THREAD_MAX) { + error("max signaling iterations (%d) reached, giving up on pending waiters.\n", + 0, MAX_WAKE_ITERS + THREAD_MAX); + args->ret = RET_ERROR; + break; + } + } + + futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); + + if (args->ret >= 0) + args->ret = task_count; + + info("Waker: exiting with %d\n", args->ret); + info("Waker: waiters_woken: %d\n", waiters_woken.val); + pthread_exit((void *)&args->ret); +} + +void *third_party_blocker(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + int ret2 = 0; + + args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + if (args->ret) + goto out; + args->ret = futex_wait(&wake_complete, wake_complete, NULL, + FUTEX_PRIVATE_FLAG); + ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + out: + if (args->ret || ret2) { + error("third_party_blocker() futex error", 0); + args->ret = RET_ERROR; + } + + pthread_exit((void *)&args->ret); +} + +int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) +{ + void *(*wakerfn)(void *) = signal_wakerfn; + struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER; + struct thread_arg waker_arg = THREAD_ARG_INITIALIZER; + pthread_t waiter[THREAD_MAX], waker, blocker; + struct timespec ts, *tsp = NULL; + struct thread_arg args[THREAD_MAX]; + int *waiter_ret; + int i, ret = RET_PASS; + + if (timeout_ns) { + time_t secs; + + info("timeout_ns = %ld\n", timeout_ns); + ret = clock_gettime(CLOCK_MONOTONIC, &ts); + secs = (ts.tv_nsec + timeout_ns) / 1000000000; + ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000; + ts.tv_sec += secs; + info("ts.tv_sec = %ld\n", ts.tv_sec); + info("ts.tv_nsec = %ld\n", ts.tv_nsec); + tsp = &ts; + } + + if (broadcast) + wakerfn = broadcast_wakerfn; + + if (third_party_owner) { + if (create_rt_thread(&blocker, third_party_blocker, + (void *)&blocker_arg, SCHED_FIFO, 1)) { + error("Creating third party blocker thread failed\n", + errno); + ret = RET_ERROR; + goto out; + } + } + + atomic_set(&waiters_woken, 0); + for (i = 0; i < THREAD_MAX; i++) { + args[i].id = i; + args[i].timeout = tsp; + info("Starting thread %d\n", i); + if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i], + SCHED_FIFO, 1)) { + error("Creating waiting thread failed\n", errno); + ret = RET_ERROR; + goto out; + } + } + waker_arg.lock = lock; + if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg, + SCHED_FIFO, 1)) { + error("Creating waker thread failed\n", errno); + ret = RET_ERROR; + goto out; + } + + /* Wait for threads to finish */ + /* Store the first error or failure encountered in waiter_ret */ + waiter_ret = &args[0].ret; + for (i = 0; i < THREAD_MAX; i++) + pthread_join(waiter[i], + *waiter_ret ? NULL : (void **)&waiter_ret); + + if (third_party_owner) + pthread_join(blocker, NULL); + pthread_join(waker, NULL); + +out: + if (!ret) { + if (*waiter_ret) + ret = *waiter_ret; + else if (waker_arg.ret < 0) + ret = waker_arg.ret; + else if (blocker_arg.ret) + ret = blocker_arg.ret; + } + + return ret; +} + +int main(int argc, char *argv[]) +{ + int c, ret; + + while ((c = getopt(argc, argv, "bchlot:v:")) != -1) { + switch (c) { + case 'b': + broadcast = 1; + break; + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'l': + locked = 1; + break; + case 'o': + owner = 1; + locked = 0; + break; + case 't': + timeout_ns = atoi(optarg); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test requeue functionality\n", basename(argv[0])); + printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, locked, owner, timeout_ns); + + /* + * FIXME: unit_test is obsolete now that we parse options and the + * various style of runs are done by run.sh - simplify the code and move + * unit_test into main() + */ + ret = unit_test(broadcast, locked, owner, timeout_ns); + + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c new file mode 100644 index 000000000000..d5e4f2c4da2a --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -0,0 +1,135 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * 1. Block a thread using FUTEX_WAIT + * 2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1. + * 3. The kernel must detect the mismatch and return -EINVAL. + * + * AUTHOR + * Darren Hart <[email protected]> + * + * HISTORY + * 2009-Nov-9: Initial version by Darren Hart <[email protected]> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +int child_ret = 0; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *blocking_child(void *arg) +{ + child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG); + if (child_ret < 0) { + child_ret = -errno; + error("futex_wait\n", errno); + } + return (void *)&child_ret; +} + +int main(int argc, char *argv[]) +{ + int ret = RET_PASS; + pthread_t child; + int c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Detect mismatched requeue_pi operations\n", + basename(argv[0])); + + if (pthread_create(&child, NULL, blocking_child, NULL)) { + error("pthread_create\n", errno); + ret = RET_ERROR; + goto out; + } + /* Allow the child to block in the kernel. */ + sleep(1); + + /* + * The kernel should detect the waiter did not setup the + * q->requeue_pi_key and return -EINVAL. If it does not, + * it likely gave the lock to the child, which is now hung + * in the kernel. + */ + ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG); + if (ret < 0) { + if (errno == EINVAL) { + /* + * The kernel correctly detected the mismatched + * requeue_pi target and aborted. Wake the child with + * FUTEX_WAKE. + */ + ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG); + if (ret == 1) { + ret = RET_PASS; + } else if (ret < 0) { + error("futex_wake\n", errno); + ret = RET_ERROR; + } else { + error("futex_wake did not wake the child\n", 0); + ret = RET_ERROR; + } + } else { + error("futex_cmp_requeue_pi\n", errno); + ret = RET_ERROR; + } + } else if (ret > 0) { + fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); + ret = RET_FAIL; + } else { + error("futex_cmp_requeue_pi found no waiters\n", 0); + ret = RET_ERROR; + } + + pthread_join(child, NULL); + + if (!ret) + ret = child_ret; + + out: + /* If the kernel crashes, we shouldn't return at all. */ + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c new file mode 100644 index 000000000000..7f0c756993af --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -0,0 +1,223 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2006-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * This test exercises the futex_wait_requeue_pi() signal handling both + * before and after the requeue. The first should be restarted by the + * kernel. The latter should return EWOULDBLOCK to the waiter. + * + * AUTHORS + * Darren Hart <[email protected]> + * + * HISTORY + * 2008-May-5: Initial version by Darren Hart <[email protected]> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <pthread.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "atomic.h" +#include "futextest.h" +#include "logging.h" + +#define DELAY_US 100 + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +atomic_t requeued = ATOMIC_INITIALIZER; + +int waiter_ret = 0; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, + int policy, int prio) +{ + struct sched_param schedp; + pthread_attr_t attr; + int ret; + + pthread_attr_init(&attr); + memset(&schedp, 0, sizeof(schedp)); + + ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (ret) { + error("pthread_attr_setinheritsched\n", ret); + return -1; + } + + ret = pthread_attr_setschedpolicy(&attr, policy); + if (ret) { + error("pthread_attr_setschedpolicy\n", ret); + return -1; + } + + schedp.sched_priority = prio; + ret = pthread_attr_setschedparam(&attr, &schedp); + if (ret) { + error("pthread_attr_setschedparam\n", ret); + return -1; + } + + ret = pthread_create(pth, &attr, func, arg); + if (ret) { + error("pthread_create\n", ret); + return -1; + } + return 0; +} + +void handle_signal(int signo) +{ + info("signal received %s requeue\n", + requeued.val ? "after" : "prior to"); +} + +void *waiterfn(void *arg) +{ + unsigned int old_val; + int res; + + waiter_ret = RET_PASS; + + info("Waiter running\n"); + info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + old_val = f1; + res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL, + FUTEX_PRIVATE_FLAG); + if (!requeued.val || errno != EWOULDBLOCK) { + fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", + res, strerror(errno)); + info("w2:futex: %x\n", f2); + if (!res) + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + waiter_ret = RET_FAIL; + } + + info("Waiter exiting with %d\n", waiter_ret); + pthread_exit(NULL); +} + + +int main(int argc, char *argv[]) +{ + unsigned int old_val; + struct sigaction sa; + pthread_t waiter; + int c, res, ret = RET_PASS; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test signal handling during requeue_pi\n", + basename(argv[0])); + printf("\tArguments: <none>\n"); + + sa.sa_handler = handle_signal; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + if (sigaction(SIGUSR1, &sa, NULL)) { + error("sigaction\n", errno); + exit(1); + } + + info("m1:f2: %x\n", f2); + info("Creating waiter\n"); + res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1); + if (res) { + error("Creating waiting thread failed", res); + ret = RET_ERROR; + goto out; + } + + info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + info("m2:f2: %x\n", f2); + futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG); + info("m3:f2: %x\n", f2); + + while (1) { + /* + * signal the waiter before requeue, waiter should automatically + * restart futex_wait_requeue_pi() in the kernel. Wait for the + * waiter to block on f1 again. + */ + info("Issuing SIGUSR1 to waiter\n"); + pthread_kill(waiter, SIGUSR1); + usleep(DELAY_US); + + info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); + old_val = f1; + res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0, + FUTEX_PRIVATE_FLAG); + /* + * If res is non-zero, we either requeued the waiter or hit an + * error, break out and handle it. If it is zero, then the + * signal may have hit before the the waiter was blocked on f1. + * Try again. + */ + if (res > 0) { + atomic_set(&requeued, 1); + break; + } else if (res > 0) { + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + ret = RET_ERROR; + break; + } + } + info("m4:f2: %x\n", f2); + + /* + * Signal the waiter after requeue, waiter should return from + * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the + * futex_unlock_pi() can't happen before the signal wakeup is detected + * in the kernel. + */ + info("Issuing SIGUSR1 to waiter\n"); + pthread_kill(waiter, SIGUSR1); + info("Waiting for waiter to return\n"); + pthread_join(waiter, NULL); + + info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + info("m5:f2: %x\n", f2); + + out: + if (ret == RET_PASS && waiter_ret) + ret = waiter_ret; + + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c new file mode 100644 index 000000000000..5f687f247454 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c @@ -0,0 +1,125 @@ +/****************************************************************************** + * + * Copyright FUJITSU LIMITED 2010 + * Copyright KOSAKI Motohiro <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Internally, Futex has two handling mode, anon and file. The private file + * mapping is special. At first it behave as file, but after write anything + * it behave as anon. This test is intent to test such case. + * + * AUTHOR + * KOSAKI Motohiro <[email protected]> + * + * HISTORY + * 2010-Jan-6: Initial version by KOSAKI Motohiro <[email protected]> + * + *****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <syscall.h> +#include <unistd.h> +#include <errno.h> +#include <linux/futex.h> +#include <pthread.h> +#include <libgen.h> +#include <signal.h> + +#include "logging.h" +#include "futextest.h" + +#define PAGE_SZ 4096 + +char pad[PAGE_SZ] = {1}; +futex_t val = 1; +char pad2[PAGE_SZ] = {1}; + +#define WAKE_WAIT_US 3000000 +struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0}; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *thr_futex_wait(void *arg) +{ + int ret; + + info("futex wait\n"); + ret = futex_wait(&val, 1, &wait_timeout, 0); + if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) { + error("futex error.\n", errno); + print_result(RET_ERROR); + exit(RET_ERROR); + } + + if (ret && errno == ETIMEDOUT) + fail("waiter timedout\n"); + + info("futex_wait: ret = %d, errno = %d\n", ret, errno); + + return NULL; +} + +int main(int argc, char **argv) +{ + pthread_t thr; + int ret = RET_PASS; + int res; + int c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n", + basename(argv[0])); + + ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); + if (ret < 0) { + fprintf(stderr, "pthread_create error\n"); + ret = RET_ERROR; + goto out; + } + + info("wait a while\n"); + usleep(WAKE_WAIT_US); + val = 2; + res = futex_wake(&val, 1, 0); + info("futex_wake %d\n", res); + if (res != 1) { + fail("FUTEX_WAKE didn't find the waiting thread.\n"); + ret = RET_FAIL; + } + + info("join\n"); + pthread_join(thr, NULL); + + out: + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c new file mode 100644 index 000000000000..ab428ca894de --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -0,0 +1,86 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Block on a futex and wait for timeout. + * + * AUTHOR + * Darren Hart <[email protected]> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <[email protected]> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +static long timeout_ns = 100000; /* 100us default timeout */ + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -t N Timeout in nanoseconds (default: 100,000)\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; + int res, ret = RET_PASS; + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 't': + timeout_ns = atoi(optarg); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Block on a futex and wait for timeout\n", + basename(argv[0])); + printf("\tArguments: timeout=%ldns\n", timeout_ns); + + /* initialize timeout */ + to.tv_sec = 0; + to.tv_nsec = timeout_ns; + + info("Calling futex_wait on f1: %u @ %p\n", f1, &f1); + res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG); + if (!res || errno != ETIMEDOUT) { + fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; + } + + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c new file mode 100644 index 000000000000..fe7aee96844b --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright FUJITSU LIMITED 2010 + * Copyright KOSAKI Motohiro <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should + * return immediately. This test is intent to test zero page handling in + * futex. + * + * AUTHOR + * KOSAKI Motohiro <[email protected]> + * + * HISTORY + * 2010-Jan-6: Initial version by KOSAKI Motohiro <[email protected]> + * + *****************************************************************************/ + +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <errno.h> +#include <linux/futex.h> +#include <libgen.h> + +#include "logging.h" +#include "futextest.h" + +#define WAIT_US 5000000 + +static int child_blocked = 1; +static int child_ret; +void *buf; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *wait_thread(void *arg) +{ + int res; + + child_ret = RET_PASS; + res = futex_wait(buf, 1, NULL, 0); + child_blocked = 0; + + if (res != 0 && errno != EWOULDBLOCK) { + error("futex failure\n", errno); + child_ret = RET_ERROR; + } + pthread_exit(NULL); +} + +int main(int argc, char **argv) +{ + int c, ret = RET_PASS; + long page_size; + pthread_t thr; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + page_size = sysconf(_SC_PAGESIZE); + + buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if (buf == (void *)-1) { + error("mmap\n", errno); + exit(1); + } + + printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n", + basename(argv[0])); + + + ret = pthread_create(&thr, NULL, wait_thread, NULL); + if (ret) { + error("pthread_create\n", errno); + ret = RET_ERROR; + goto out; + } + + info("waiting %dus for child to return\n", WAIT_US); + usleep(WAIT_US); + + ret = child_ret; + if (child_blocked) { + fail("child blocked in kernel\n"); + ret = RET_FAIL; + } + + out: + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c new file mode 100644 index 000000000000..b6b027448825 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs + * from the expected one. + * + * AUTHOR + * Gowrishankar <[email protected]> + * + * HISTORY + * 2009-Nov-14: Initial version by Gowrishankar <[email protected]> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +#define timeout_ns 100000 + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; + int res, ret = RET_PASS; + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test the unexpected futex value in FUTEX_WAIT\n", + basename(argv[0])); + + info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); + if (!res || errno != EWOULDBLOCK) { + fail("futex_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); + ret = RET_FAIL; + } + + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh new file mode 100755 index 000000000000..e87dbe2a0b0d --- /dev/null +++ b/tools/testing/selftests/futex/functional/run.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +############################################################################### +# +# Copyright © International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# DESCRIPTION +# Run tests in the current directory. +# +# AUTHOR +# Darren Hart <[email protected]> +# +# HISTORY +# 2009-Nov-9: Initial version by Darren Hart <[email protected]> +# 2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file +# by KOSAKI Motohiro <[email protected]> +# +############################################################################### + +# Test for a color capable console +if [ -z "$USE_COLOR" ]; then + tput setf 7 + if [ $? -eq 0 ]; then + USE_COLOR=1 + tput sgr0 + fi +fi +if [ "$USE_COLOR" -eq 1 ]; then + COLOR="-c" +fi + + +echo +# requeue pi testing +# without timeouts +./futex_requeue_pi $COLOR +./futex_requeue_pi $COLOR -b +./futex_requeue_pi $COLOR -b -l +./futex_requeue_pi $COLOR -b -o +./futex_requeue_pi $COLOR -l +./futex_requeue_pi $COLOR -o +# with timeouts +./futex_requeue_pi $COLOR -b -l -t 5000 +./futex_requeue_pi $COLOR -l -t 5000 +./futex_requeue_pi $COLOR -b -l -t 500000 +./futex_requeue_pi $COLOR -l -t 500000 +./futex_requeue_pi $COLOR -b -t 5000 +./futex_requeue_pi $COLOR -t 5000 +./futex_requeue_pi $COLOR -b -t 500000 +./futex_requeue_pi $COLOR -t 500000 +./futex_requeue_pi $COLOR -b -o -t 5000 +./futex_requeue_pi $COLOR -l -t 5000 +./futex_requeue_pi $COLOR -b -o -t 500000 +./futex_requeue_pi $COLOR -l -t 500000 +# with long timeout +./futex_requeue_pi $COLOR -b -l -t 2000000000 +./futex_requeue_pi $COLOR -l -t 2000000000 + + +echo +./futex_requeue_pi_mismatched_ops $COLOR + +echo +./futex_requeue_pi_signal_restart $COLOR + +echo +./futex_wait_timeout $COLOR + +echo +./futex_wait_wouldblock $COLOR + +echo +./futex_wait_uninitialized_heap $COLOR +./futex_wait_private_mapped_file $COLOR diff --git a/tools/testing/selftests/futex/include/atomic.h b/tools/testing/selftests/futex/include/atomic.h new file mode 100644 index 000000000000..f861da3e31ab --- /dev/null +++ b/tools/testing/selftests/futex/include/atomic.h @@ -0,0 +1,83 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * GCC atomic builtin wrappers + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * + * AUTHOR + * Darren Hart <[email protected]> + * + * HISTORY + * 2009-Nov-17: Initial version by Darren Hart <[email protected]> + * + *****************************************************************************/ + +#ifndef _ATOMIC_H +#define _ATOMIC_H + +typedef struct { + volatile int val; +} atomic_t; + +#define ATOMIC_INITIALIZER { 0 } + +/** + * atomic_cmpxchg() - Atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign the futex + * + * Return the old value of addr->val. + */ +static inline int +atomic_cmpxchg(atomic_t *addr, int oldval, int newval) +{ + return __sync_val_compare_and_swap(&addr->val, oldval, newval); +} + +/** + * atomic_inc() - Atomic incrememnt + * @addr: Address of the variable to increment + * + * Return the new value of addr->val. + */ +static inline int +atomic_inc(atomic_t *addr) +{ + return __sync_add_and_fetch(&addr->val, 1); +} + +/** + * atomic_dec() - Atomic decrement + * @addr: Address of the variable to decrement + * + * Return the new value of addr-val. + */ +static inline int +atomic_dec(atomic_t *addr) +{ + return __sync_sub_and_fetch(&addr->val, 1); +} + +/** + * atomic_set() - Atomic set + * @addr: Address of the variable to set + * @newval: New value for the atomic_t + * + * Return the new value of addr->val. + */ +static inline int +atomic_set(atomic_t *addr, int newval) +{ + addr->val = newval; + return newval; +} + +#endif diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h new file mode 100644 index 000000000000..b98c3aba7102 --- /dev/null +++ b/tools/testing/selftests/futex/include/futextest.h @@ -0,0 +1,266 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Glibc independent futex library for testing kernel functionality. + * + * AUTHOR + * Darren Hart <[email protected]> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <[email protected]> + * + *****************************************************************************/ + +#ifndef _FUTEXTEST_H +#define _FUTEXTEST_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <linux/futex.h> + +typedef volatile u_int32_t futex_t; +#define FUTEX_INITIALIZER 0 + +/* Define the newer op codes if the system header file is not up to date. */ +#ifndef FUTEX_WAIT_BITSET +#define FUTEX_WAIT_BITSET 9 +#endif +#ifndef FUTEX_WAKE_BITSET +#define FUTEX_WAKE_BITSET 10 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI +#define FUTEX_WAIT_REQUEUE_PI 11 +#endif +#ifndef FUTEX_CMP_REQUEUE_PI +#define FUTEX_CMP_REQUEUE_PI 12 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_REQUEUE_PI_PRIVATE +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#endif + +/** + * futex() - SYS_futex syscall wrapper + * @uaddr: address of first futex + * @op: futex op code + * @val: typically expected value of uaddr, but varies by op + * @timeout: typically an absolute struct timespec (except where noted + * otherwise). Overloaded by some ops + * @uaddr2: address of second futex for some ops\ + * @val3: varies by op + * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG + * + * futex() is used by all the following futex op wrappers. It can also be + * used for misuse and abuse testing. Generally, the specific op wrappers + * should be used instead. It is a macro instead of an static inline function as + * some of the types over overloaded (timeout is used for nr_requeue for + * example). + * + * These argument descriptions are the defaults for all + * like-named arguments in the following wrappers except where noted below. + */ +#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ + syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) + +/** + * futex_wait() - block on uaddr with optional timeout + * @timeout: relative timeout + */ +static inline int +futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); +} + +/** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks + */ +static inline int +futex_wake(futex_t *uaddr, int nr_wake, int opflags) +{ + return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); +} + +/** + * futex_wait_bitset() - block on uaddr with bitset + * @bitset: bitset to be used with futex_wake_bitset + */ +static inline int +futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout, + u_int32_t bitset, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset, + opflags); +} + +/** + * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset + * @bitset: bitset to compare with that used in futex_wait_bitset + */ +static inline int +futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags) +{ + return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset, + opflags); +} + +/** + * futex_lock_pi() - block on uaddr as a PI mutex + * @detect: whether (1) or not (0) to perform deadlock detection + */ +static inline int +futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect, + int opflags) +{ + return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter + */ +static inline int +futex_unlock_pi(futex_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); +} + +/** + * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION + */ +static inline int +futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2, + int wake_op, int opflags) +{ + return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op, + opflags); +} + +/** + * futex_requeue() - requeue without expected value comparison, deprecated + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + * + * Due to its inherently racy implementation, futex_requeue() is deprecated in + * favor of futex_cmp_requeue(). + */ +static inline int +futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue, + int opflags) +{ + return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + */ +static inline int +futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +/** + * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2 + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * + * This is the first half of the requeue_pi mechanism. It shall always be + * paired with futex_cmp_requeue_pi(). + */ +static inline int +futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, + struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware) + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + */ +static inline int +futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +/** + * futex_cmpxchg() - atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign the futex + * + * Implement cmpxchg using gcc atomic builtins. + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * + * Return the old futex value. + */ +static inline u_int32_t +futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval) +{ + return __sync_val_compare_and_swap(uaddr, oldval, newval); +} + +/** + * futex_dec() - atomic decrement of the futex value + * @uaddr: The address of the futex to be modified + * + * Return the new futex value. + */ +static inline u_int32_t +futex_dec(futex_t *uaddr) +{ + return __sync_sub_and_fetch(uaddr, 1); +} + +/** + * futex_inc() - atomic increment of the futex value + * @uaddr: the address of the futex to be modified + * + * Return the new futex value. + */ +static inline u_int32_t +futex_inc(futex_t *uaddr) +{ + return __sync_add_and_fetch(uaddr, 1); +} + +/** + * futex_set() - atomic decrement of the futex value + * @uaddr: the address of the futex to be modified + * @newval: New value for the atomic_t + * + * Return the new futex value. + */ +static inline u_int32_t +futex_set(futex_t *uaddr, u_int32_t newval) +{ + *uaddr = newval; + return newval; +} + +#endif diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h new file mode 100644 index 000000000000..014aa01197af --- /dev/null +++ b/tools/testing/selftests/futex/include/logging.h @@ -0,0 +1,153 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Glibc independent futex library for testing kernel functionality. + * + * AUTHOR + * Darren Hart <[email protected]> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <[email protected]> + * + *****************************************************************************/ + +#ifndef _LOGGING_H +#define _LOGGING_H + +#include <string.h> +#include <unistd.h> +#include <linux/futex.h> +#include "kselftest.h" + +/* + * Define PASS, ERROR, and FAIL strings with and without color escape + * sequences, default to no color. + */ +#define ESC 0x1B, '[' +#define BRIGHT '1' +#define GREEN '3', '2' +#define YELLOW '3', '3' +#define RED '3', '1' +#define ESCEND 'm' +#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND +#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND +#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND +#define RESET_COLOR ESC, '0', 'm' +static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S', + RESET_COLOR, 0}; +static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R', + RESET_COLOR, 0}; +static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L', + RESET_COLOR, 0}; +static const char INFO_NORMAL[] = " INFO"; +static const char PASS_NORMAL[] = " PASS"; +static const char ERROR_NORMAL[] = "ERROR"; +static const char FAIL_NORMAL[] = " FAIL"; +const char *INFO = INFO_NORMAL; +const char *PASS = PASS_NORMAL; +const char *ERROR = ERROR_NORMAL; +const char *FAIL = FAIL_NORMAL; + +/* Verbosity setting for INFO messages */ +#define VQUIET 0 +#define VCRITICAL 1 +#define VINFO 2 +#define VMAX VINFO +int _verbose = VCRITICAL; + +/* Functional test return codes */ +#define RET_PASS 0 +#define RET_ERROR -1 +#define RET_FAIL -2 + +/** + * log_color() - Use colored output for PASS, ERROR, and FAIL strings + * @use_color: use color (1) or not (0) + */ +void log_color(int use_color) +{ + if (use_color) { + PASS = PASS_COLOR; + ERROR = ERROR_COLOR; + FAIL = FAIL_COLOR; + } else { + PASS = PASS_NORMAL; + ERROR = ERROR_NORMAL; + FAIL = FAIL_NORMAL; + } +} + +/** + * log_verbosity() - Set verbosity of test output + * @verbose: Enable (1) verbose output or not (0) + * + * Currently setting verbose=1 will enable INFO messages and 0 will disable + * them. FAIL and ERROR messages are always displayed. + */ +void log_verbosity(int level) +{ + if (level > VMAX) + level = VMAX; + else if (level < 0) + level = 0; + _verbose = level; +} + +/** + * print_result() - Print standard PASS | ERROR | FAIL results + * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL + * + * print_result() is primarily intended for functional tests. + */ +void print_result(int ret) +{ + const char *result = "Unknown return code"; + + switch (ret) { + case RET_PASS: + ksft_inc_pass_cnt(); + result = PASS; + break; + case RET_ERROR: + result = ERROR; + break; + case RET_FAIL: + ksft_inc_fail_cnt(); + result = FAIL; + break; + } + printf("Result: %s\n", result); +} + +/* log level macros */ +#define info(message, vargs...) \ +do { \ + if (_verbose >= VINFO) \ + fprintf(stderr, "\t%s: "message, INFO, ##vargs); \ +} while (0) + +#define error(message, err, args...) \ +do { \ + if (_verbose >= VCRITICAL) {\ + if (err) \ + fprintf(stderr, "\t%s: %s: "message, \ + ERROR, strerror(err), ##args); \ + else \ + fprintf(stderr, "\t%s: "message, ERROR, ##args); \ + } \ +} while (0) + +#define fail(message, args...) \ +do { \ + if (_verbose >= VCRITICAL) \ + fprintf(stderr, "\t%s: "message, FAIL, ##args); \ +} while (0) + +#endif diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh new file mode 100755 index 000000000000..4126312ad64e --- /dev/null +++ b/tools/testing/selftests/futex/run.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +############################################################################### +# +# Copyright © International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# DESCRIPTION +# Run all tests under the functional, performance, and stress directories. +# Format and summarize the results. +# +# AUTHOR +# Darren Hart <[email protected]> +# +# HISTORY +# 2009-Nov-9: Initial version by Darren Hart <[email protected]> +# +############################################################################### + +# Test for a color capable shell and pass the result to the subdir scripts +USE_COLOR=0 +tput setf 7 +if [ $? -eq 0 ]; then + USE_COLOR=1 + tput sgr0 +fi +export USE_COLOR + +(cd functional; ./run.sh) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 572c8888167a..ef1c80d67ac7 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -13,6 +13,13 @@ #include <stdlib.h> #include <unistd.h> +/* define kselftest exit codes */ +#define KSFT_PASS 0 +#define KSFT_FAIL 1 +#define KSFT_XFAIL 2 +#define KSFT_XPASS 3 +#define KSFT_SKIP 4 + /* counters */ struct ksft_count { unsigned int ksft_pass; @@ -40,23 +47,23 @@ static inline void ksft_print_cnts(void) static inline int ksft_exit_pass(void) { - exit(0); + exit(KSFT_PASS); } static inline int ksft_exit_fail(void) { - exit(1); + exit(KSFT_FAIL); } static inline int ksft_exit_xfail(void) { - exit(2); + exit(KSFT_XFAIL); } static inline int ksft_exit_xpass(void) { - exit(3); + exit(KSFT_XPASS); } static inline int ksft_exit_skip(void) { - exit(4); + exit(KSFT_SKIP); } #endif /* __KSELFTEST_H */ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 2194155ae62a..ee412bab7ed4 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -13,6 +13,9 @@ run_tests: all define INSTALL_RULE mkdir -p $(INSTALL_PATH) + @for TEST_DIR in $(TEST_DIRS); do\ + cp -r $$TEST_DIR $(INSTALL_PATH); \ + done; install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) endef diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index 95580a97326e..5e35c9c50b72 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -9,7 +9,12 @@ unprivileged-remount-test: unprivileged-remount-test.c include ../lib.mk TEST_PROGS := unprivileged-remount-test -override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi +override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \ + then \ + ./unprivileged-remount-test ; \ + else \ + echo "WARN: No /proc/self/uid_map exist, test skipped." ; \ + fi override EMIT_TESTS := echo "$(RUN_TESTS)" clean: diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore new file mode 100644 index 000000000000..346d83ca8069 --- /dev/null +++ b/tools/testing/selftests/seccomp/.gitignore @@ -0,0 +1 @@ +seccomp_bpf diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile new file mode 100644 index 000000000000..8401e87e34e1 --- /dev/null +++ b/tools/testing/selftests/seccomp/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := seccomp_bpf +CFLAGS += -Wl,-no-as-needed -Wall +LDFLAGS += -lpthread + +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + $(RM) $(TEST_PROGS) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c new file mode 100644 index 000000000000..c5abe7fd7590 --- /dev/null +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -0,0 +1,2109 @@ +/* + * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by the GPLv2 license. + * + * Test code for seccomp bpf. + */ + +#include <asm/siginfo.h> +#define __have_siginfo_t 1 +#define __have_sigval_t 1 +#define __have_sigevent_t 1 + +#include <errno.h> +#include <linux/filter.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/user.h> +#include <linux/prctl.h> +#include <linux/ptrace.h> +#include <linux/seccomp.h> +#include <poll.h> +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/elf.h> +#include <sys/uio.h> + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> + +#include "test_harness.h" + +#ifndef PR_SET_PTRACER +# define PR_SET_PTRACER 0x59616d61 +#endif + +#ifndef PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 +#endif + +#ifndef PR_SECCOMP_EXT +#define PR_SECCOMP_EXT 43 +#endif + +#ifndef SECCOMP_EXT_ACT +#define SECCOMP_EXT_ACT 1 +#endif + +#ifndef SECCOMP_EXT_ACT_TSYNC +#define SECCOMP_EXT_ACT_TSYNC 1 +#endif + +#ifndef SECCOMP_MODE_STRICT +#define SECCOMP_MODE_STRICT 1 +#endif + +#ifndef SECCOMP_MODE_FILTER +#define SECCOMP_MODE_FILTER 2 +#endif + +#ifndef SECCOMP_RET_KILL +#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ + +/* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION 0x7fff0000U +#define SECCOMP_RET_DATA 0x0000ffffU + +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; +#endif + +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) + +#define SIBLING_EXIT_UNKILLED 0xbadbeef +#define SIBLING_EXIT_FAILURE 0xbadface +#define SIBLING_EXIT_NEWPRIVS 0xbadfeed + +TEST(mode_strict_support) +{ + long ret; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SECCOMP"); + } + syscall(__NR_exit, 1); +} + +TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) +{ + long ret; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SECCOMP"); + } + syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, + NULL, NULL, NULL); + EXPECT_FALSE(true) { + TH_LOG("Unreachable!"); + } +} + +/* Note! This doesn't test no new privs behavior */ +TEST(no_new_privs_support) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } +} + +/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */ +TEST(mode_filter_support) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); + } +} + +TEST(mode_filter_without_nnp) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); + ASSERT_LE(0, ret) { + TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); + } + errno = 0; + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + /* Succeeds with CAP_SYS_ADMIN, fails without */ + /* TODO(wad) check caps not euid */ + if (geteuid()) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EACCES, errno); + } else { + EXPECT_EQ(0, ret); + } +} + +#define MAX_INSNS_PER_PATH 32768 + +TEST(filter_size_limits) +{ + int i; + int count = BPF_MAXINSNS + 1; + struct sock_filter allow[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter *filter; + struct sock_fprog prog = { }; + long ret; + + filter = calloc(count, sizeof(*filter)); + ASSERT_NE(NULL, filter); + + for (i = 0; i < count; i++) + filter[i] = allow[0]; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + prog.filter = filter; + prog.len = count; + + /* Too many filter instructions in a single filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_NE(0, ret) { + TH_LOG("Installing %d insn filter was allowed", prog.len); + } + + /* One less is okay, though. */ + prog.len -= 1; + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Installing %d insn filter wasn't allowed", prog.len); + } +} + +TEST(filter_chain_limits) +{ + int i; + int count = BPF_MAXINSNS; + struct sock_filter allow[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter *filter; + struct sock_fprog prog = { }; + long ret; + + filter = calloc(count, sizeof(*filter)); + ASSERT_NE(NULL, filter); + + for (i = 0; i < count; i++) + filter[i] = allow[0]; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + prog.filter = filter; + prog.len = 1; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + prog.len = count; + + /* Too many total filter instructions. */ + for (i = 0; i < MAX_INSNS_PER_PATH; i++) { + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + if (ret != 0) + break; + } + ASSERT_NE(0, ret) { + TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", + i, count, i * (count + 4)); + } +} + +TEST(mode_filter_cannot_move_to_strict) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + + +TEST(mode_filter_get_seccomp) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + EXPECT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + EXPECT_EQ(2, ret); +} + + +TEST(ALLOW_all) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); +} + +TEST(empty_prog) +{ + struct sock_filter filter[] = { + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + +TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, 0x10000000U), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + EXPECT_EQ(0, syscall(__NR_getpid)) { + TH_LOG("getpid() shouldn't ever return"); + } +} + +/* return code >= 0x80000000 is unused. */ +TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, 0x90000000U), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + EXPECT_EQ(0, syscall(__NR_getpid)) { + TH_LOG("getpid() shouldn't ever return"); + } +} + +TEST_SIGNAL(KILL_all, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); +} + +TEST_SIGNAL(KILL_one, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_SIGNAL(KILL_one_arg_one, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + /* Only both with lower 32-bit for now. */ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + pid_t pid = getpid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(pid, syscall(__NR_getpid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE)); +} + +TEST_SIGNAL(KILL_one_arg_six, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + /* Only both with lower 32-bit for now. */ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + pid_t pid = getpid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(pid, syscall(__NR_getpid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE)); +} + +/* TODO(wad) add 64-bit versus 32-bit arg tests. */ +TEST(arg_out_of_range) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + +TEST(ERRNO_valid) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(E2BIG, errno); +} + +TEST(ERRNO_zero) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* "errno" of 0 is ok. */ + EXPECT_EQ(0, read(0, NULL, 0)); +} + +TEST(ERRNO_capped) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(4095, errno); +} + +FIXTURE_DATA(TRAP) { + struct sock_fprog prog; +}; + +FIXTURE_SETUP(TRAP) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); +} + +FIXTURE_TEARDOWN(TRAP) +{ + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F_SIGNAL(TRAP, dfl, SIGSYS) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + syscall(__NR_getpid); +} + +/* Ensure that SIGSYS overrides SIG_IGN */ +TEST_F_SIGNAL(TRAP, ign, SIGSYS) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + signal(SIGSYS, SIG_IGN); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + syscall(__NR_getpid); +} + +static struct siginfo TRAP_info; +static volatile int TRAP_nr; +static void TRAP_action(int nr, siginfo_t *info, void *void_context) +{ + memcpy(&TRAP_info, info, sizeof(TRAP_info)); + TRAP_nr = nr; +} + +TEST_F(TRAP, handler) +{ + int ret, test; + struct sigaction act; + sigset_t mask; + + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + sigaddset(&mask, SIGSYS); + + act.sa_sigaction = &TRAP_action; + act.sa_flags = SA_SIGINFO; + ret = sigaction(SIGSYS, &act, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("sigaction failed"); + } + ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("sigprocmask failed"); + } + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + TRAP_nr = 0; + memset(&TRAP_info, 0, sizeof(TRAP_info)); + /* Expect the registers to be rolled back. (nr = error) may vary + * based on arch. */ + ret = syscall(__NR_getpid); + /* Silence gcc warning about volatile. */ + test = TRAP_nr; + EXPECT_EQ(SIGSYS, test); + struct local_sigsys { + void *_call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } *sigsys = (struct local_sigsys *) +#ifdef si_syscall + &(TRAP_info.si_call_addr); +#else + &TRAP_info.si_pid; +#endif + EXPECT_EQ(__NR_getpid, sigsys->_syscall); + /* Make sure arch is non-zero. */ + EXPECT_NE(0, sigsys->_arch); + EXPECT_NE(0, (unsigned long)sigsys->_call_addr); +} + +FIXTURE_DATA(precedence) { + struct sock_fprog allow; + struct sock_fprog trace; + struct sock_fprog error; + struct sock_fprog trap; + struct sock_fprog kill; +}; + +FIXTURE_SETUP(precedence) +{ + struct sock_filter allow_insns[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter trace_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), + }; + struct sock_filter error_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), + }; + struct sock_filter trap_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), + }; + struct sock_filter kill_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + }; + + memset(self, 0, sizeof(*self)); +#define FILTER_ALLOC(_x) \ + self->_x.filter = malloc(sizeof(_x##_insns)); \ + ASSERT_NE(NULL, self->_x.filter); \ + memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ + self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) + FILTER_ALLOC(allow); + FILTER_ALLOC(trace); + FILTER_ALLOC(error); + FILTER_ALLOC(trap); + FILTER_ALLOC(kill); +} + +FIXTURE_TEARDOWN(precedence) +{ +#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) + FILTER_FREE(allow); + FILTER_FREE(trace); + FILTER_FREE(error); + FILTER_FREE(trap); + FILTER_FREE(kill); +} + +TEST_F(precedence, allow_ok) +{ + pid_t parent, res = 0; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + res = syscall(__NR_getppid); + EXPECT_EQ(parent, res); +} + +TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) +{ + pid_t parent, res = 0; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + res = syscall(__NR_getppid); + EXPECT_EQ(parent, res); + /* getpid() should never return. */ + res = syscall(__NR_getpid); + EXPECT_EQ(0, res); +} + +TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, errno_is_third) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, errno_is_third_in_any_order) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, trace_is_fourth) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* No ptracer */ + EXPECT_EQ(-1, syscall(__NR_getpid)); +} + +TEST_F(precedence, trace_is_fourth_in_any_order) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* No ptracer */ + EXPECT_EQ(-1, syscall(__NR_getpid)); +} + +#ifndef PTRACE_O_TRACESECCOMP +#define PTRACE_O_TRACESECCOMP 0x00000080 +#endif + +/* Catch the Ubuntu 12.04 value error. */ +#if PTRACE_EVENT_SECCOMP != 7 +#undef PTRACE_EVENT_SECCOMP +#endif + +#ifndef PTRACE_EVENT_SECCOMP +#define PTRACE_EVENT_SECCOMP 7 +#endif + +#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) +bool tracer_running; +void tracer_stop(int sig) +{ + tracer_running = false; +} + +typedef void tracer_func_t(struct __test_metadata *_metadata, + pid_t tracee, int status, void *args); + +void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, + tracer_func_t tracer_func, void *args) +{ + int ret = -1; + struct sigaction action = { + .sa_handler = tracer_stop, + }; + + /* Allow external shutdown. */ + tracer_running = true; + ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); + + errno = 0; + while (ret == -1 && errno != EINVAL) + ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); + ASSERT_EQ(0, ret) { + kill(tracee, SIGKILL); + } + /* Wait for attach stop */ + wait(NULL); + + ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP); + ASSERT_EQ(0, ret) { + TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); + kill(tracee, SIGKILL); + } + ptrace(PTRACE_CONT, tracee, NULL, 0); + + /* Unblock the tracee */ + ASSERT_EQ(1, write(fd, "A", 1)); + ASSERT_EQ(0, close(fd)); + + /* Run until we're shut down. Must assert to stop execution. */ + while (tracer_running) { + int status; + + if (wait(&status) != tracee) + continue; + if (WIFSIGNALED(status) || WIFEXITED(status)) + /* Child is dead. Time to go. */ + return; + + /* Make sure this is a seccomp event. */ + ASSERT_EQ(true, IS_SECCOMP_EVENT(status)); + + tracer_func(_metadata, tracee, status, args); + + ret = ptrace(PTRACE_CONT, tracee, NULL, NULL); + ASSERT_EQ(0, ret); + } + /* Directly report the status of our test harness results. */ + syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* Common tracer setup/teardown functions. */ +void cont_handler(int num) +{ } +pid_t setup_trace_fixture(struct __test_metadata *_metadata, + tracer_func_t func, void *args) +{ + char sync; + int pipefd[2]; + pid_t tracer_pid; + pid_t tracee = getpid(); + + /* Setup a pipe for clean synchronization. */ + ASSERT_EQ(0, pipe(pipefd)); + + /* Fork a child which we'll promote to tracer */ + tracer_pid = fork(); + ASSERT_LE(0, tracer_pid); + signal(SIGALRM, cont_handler); + if (tracer_pid == 0) { + close(pipefd[0]); + tracer(_metadata, pipefd[1], tracee, func, args); + syscall(__NR_exit, 0); + } + close(pipefd[1]); + prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); + read(pipefd[0], &sync, 1); + close(pipefd[0]); + + return tracer_pid; +} +void teardown_trace_fixture(struct __test_metadata *_metadata, + pid_t tracer) +{ + if (tracer) { + int status; + /* + * Extract the exit code from the other process and + * adopt it for ourselves in case its asserts failed. + */ + ASSERT_EQ(0, kill(tracer, SIGUSR1)); + ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); + if (WEXITSTATUS(status)) + _metadata->passed = 0; + } +} + +/* "poke" tracer arguments and function. */ +struct tracer_args_poke_t { + unsigned long poke_addr; +}; + +void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, + void *args) +{ + int ret; + unsigned long msg; + struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; + + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + /* If this fails, don't try to recover. */ + ASSERT_EQ(0x1001, msg) { + kill(tracee, SIGKILL); + } + /* + * Poke in the message. + * Registers are not touched to try to keep this relatively arch + * agnostic. + */ + ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); + EXPECT_EQ(0, ret); +} + +FIXTURE_DATA(TRACE_poke) { + struct sock_fprog prog; + pid_t tracer; + long poked; + struct tracer_args_poke_t tracer_args; +}; + +FIXTURE_SETUP(TRACE_poke) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + self->poked = 0; + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); + + /* Set up tracer args. */ + self->tracer_args.poke_addr = (unsigned long)&self->poked; + + /* Launch tracer. */ + self->tracer = setup_trace_fixture(_metadata, tracer_poke, + &self->tracer_args); +} + +FIXTURE_TEARDOWN(TRACE_poke) +{ + teardown_trace_fixture(_metadata, self->tracer); + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F(TRACE_poke, read_has_side_effects) +{ + ssize_t ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + EXPECT_EQ(0, self->poked); + ret = read(-1, NULL, 0); + EXPECT_EQ(-1, ret); + EXPECT_EQ(0x1001, self->poked); +} + +TEST_F(TRACE_poke, getpid_runs_normally) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + EXPECT_EQ(0, self->poked); + EXPECT_NE(0, syscall(__NR_getpid)); + EXPECT_EQ(0, self->poked); +} + +#if defined(__x86_64__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM orig_rax +# define SYSCALL_RET rax +#elif defined(__i386__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM orig_eax +# define SYSCALL_RET eax +#elif defined(__arm__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM ARM_r7 +# define SYSCALL_RET ARM_r0 +#elif defined(__aarch64__) +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM regs[8] +# define SYSCALL_RET regs[0] +#else +# error "Do not know how to find your architecture's registers and syscalls" +#endif + +/* Architecture-specific syscall fetching routine. */ +int get_syscall(struct __test_metadata *_metadata, pid_t tracee) +{ + struct iovec iov; + ARCH_REGS regs; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { + TH_LOG("PTRACE_GETREGSET failed"); + return -1; + } + + return regs.SYSCALL_NUM; +} + +/* Architecture-specific syscall changing routine. */ +void change_syscall(struct __test_metadata *_metadata, + pid_t tracee, int syscall) +{ + struct iovec iov; + int ret; + ARCH_REGS regs; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); + EXPECT_EQ(0, ret); + +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) + { + regs.SYSCALL_NUM = syscall; + } + +#elif defined(__arm__) +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif + { + ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); + EXPECT_EQ(0, ret); + } + +#else + ASSERT_EQ(1, 0) { + TH_LOG("How is the syscall changed on this architecture?"); + } +#endif + + /* If syscall is skipped, change return value. */ + if (syscall == -1) + regs.SYSCALL_RET = 1; + + ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); + EXPECT_EQ(0, ret); +} + +void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, + int status, void *args) +{ + int ret; + unsigned long msg; + + /* Make sure we got the right message. */ + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + + switch (msg) { + case 0x1002: + /* change getpid to getppid. */ + change_syscall(_metadata, tracee, __NR_getppid); + break; + case 0x1003: + /* skip gettid. */ + change_syscall(_metadata, tracee, -1); + break; + case 0x1004: + /* do nothing (allow getppid) */ + break; + default: + EXPECT_EQ(0, msg) { + TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); + kill(tracee, SIGKILL); + } + } + +} + +FIXTURE_DATA(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; +}; + +FIXTURE_SETUP(TRACE_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); + + /* Prepare some testable syscall results. */ + self->mytid = syscall(__NR_gettid); + ASSERT_GT(self->mytid, 0); + ASSERT_NE(self->mytid, 1) { + TH_LOG("Running this test as init is not supported. :)"); + } + + self->mypid = getpid(); + ASSERT_GT(self->mypid, 0); + ASSERT_EQ(self->mytid, self->mypid); + + self->parent = getppid(); + ASSERT_GT(self->parent, 0); + ASSERT_NE(self->parent, self->mypid); + + /* Launch tracer. */ + self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL); +} + +FIXTURE_TEARDOWN(TRACE_syscall) +{ + teardown_trace_fixture(_metadata, self->tracer); + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F(TRACE_syscall, syscall_allowed) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* getppid works as expected (no changes). */ + EXPECT_EQ(self->parent, syscall(__NR_getppid)); + EXPECT_NE(self->mypid, syscall(__NR_getppid)); +} + +TEST_F(TRACE_syscall, syscall_redirected) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* getpid has been redirected to getppid as expected. */ + EXPECT_EQ(self->parent, syscall(__NR_getpid)); + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + +TEST_F(TRACE_syscall, syscall_dropped) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* gettid has been skipped and an altered return value stored. */ + EXPECT_EQ(1, syscall(__NR_gettid)); + EXPECT_NE(self->mytid, syscall(__NR_gettid)); +} + +#ifndef __NR_seccomp +# if defined(__i386__) +# define __NR_seccomp 354 +# elif defined(__x86_64__) +# define __NR_seccomp 317 +# elif defined(__arm__) +# define __NR_seccomp 383 +# elif defined(__aarch64__) +# define __NR_seccomp 277 +# else +# warning "seccomp syscall number unknown for this architecture" +# define __NR_seccomp 0xffff +# endif +#endif + +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif + +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +#ifndef SECCOMP_FLAG_FILTER_TSYNC +#define SECCOMP_FLAG_FILTER_TSYNC 1 +#endif + +#ifndef seccomp +int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter) +{ + errno = 0; + return syscall(__NR_seccomp, op, flags, filter); +} +#endif + +TEST(seccomp_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Reject insane operation. */ + ret = seccomp(-1, 0, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject crazy op value!"); + } + + /* Reject strict with flags or pointer. */ + ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject mode strict with flags!"); + } + ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject mode strict with uargs!"); + } + + /* Reject insane args for filter. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject crazy filter flags!"); + } + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Did not reject NULL filter!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + EXPECT_EQ(0, errno) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", + strerror(errno)); + } +} + +TEST(seccomp_syscall_mode_lock) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + EXPECT_EQ(0, ret) { + TH_LOG("Could not install filter!"); + } + + /* Make sure neither entry point will switch to strict. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Switched to mode strict!"); + } + + ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Switched to mode strict!"); + } +} + +TEST(TSYNC_first) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &prog); + EXPECT_EQ(0, ret) { + TH_LOG("Could not install initial filter with TSYNC!"); + } +} + +#define TSYNC_SIBLINGS 2 +struct tsync_sibling { + pthread_t tid; + pid_t system_tid; + sem_t *started; + pthread_cond_t *cond; + pthread_mutex_t *mutex; + int diverge; + int num_waits; + struct sock_fprog *prog; + struct __test_metadata *metadata; +}; + +FIXTURE_DATA(TSYNC) { + struct sock_fprog root_prog, apply_prog; + struct tsync_sibling sibling[TSYNC_SIBLINGS]; + sem_t started; + pthread_cond_t cond; + pthread_mutex_t mutex; + int sibling_count; +}; + +FIXTURE_SETUP(TSYNC) +{ + struct sock_filter root_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter apply_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->root_prog, 0, sizeof(self->root_prog)); + memset(&self->apply_prog, 0, sizeof(self->apply_prog)); + memset(&self->sibling, 0, sizeof(self->sibling)); + self->root_prog.filter = malloc(sizeof(root_filter)); + ASSERT_NE(NULL, self->root_prog.filter); + memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); + self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); + + self->apply_prog.filter = malloc(sizeof(apply_filter)); + ASSERT_NE(NULL, self->apply_prog.filter); + memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); + self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); + + self->sibling_count = 0; + pthread_mutex_init(&self->mutex, NULL); + pthread_cond_init(&self->cond, NULL); + sem_init(&self->started, 0, 0); + self->sibling[0].tid = 0; + self->sibling[0].cond = &self->cond; + self->sibling[0].started = &self->started; + self->sibling[0].mutex = &self->mutex; + self->sibling[0].diverge = 0; + self->sibling[0].num_waits = 1; + self->sibling[0].prog = &self->root_prog; + self->sibling[0].metadata = _metadata; + self->sibling[1].tid = 0; + self->sibling[1].cond = &self->cond; + self->sibling[1].started = &self->started; + self->sibling[1].mutex = &self->mutex; + self->sibling[1].diverge = 0; + self->sibling[1].prog = &self->root_prog; + self->sibling[1].num_waits = 1; + self->sibling[1].metadata = _metadata; +} + +FIXTURE_TEARDOWN(TSYNC) +{ + int sib = 0; + + if (self->root_prog.filter) + free(self->root_prog.filter); + if (self->apply_prog.filter) + free(self->apply_prog.filter); + + for ( ; sib < self->sibling_count; ++sib) { + struct tsync_sibling *s = &self->sibling[sib]; + void *status; + + if (!s->tid) + continue; + if (pthread_kill(s->tid, 0)) { + pthread_cancel(s->tid); + pthread_join(s->tid, &status); + } + } + pthread_mutex_destroy(&self->mutex); + pthread_cond_destroy(&self->cond); + sem_destroy(&self->started); +} + +void *tsync_sibling(void *data) +{ + long ret = 0; + struct tsync_sibling *me = data; + + me->system_tid = syscall(__NR_gettid); + + pthread_mutex_lock(me->mutex); + if (me->diverge) { + /* Just re-apply the root prog to fork the tree */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, + me->prog, 0, 0); + } + sem_post(me->started); + /* Return outside of started so parent notices failures. */ + if (ret) { + pthread_mutex_unlock(me->mutex); + return (void *)SIBLING_EXIT_FAILURE; + } + do { + pthread_cond_wait(me->cond, me->mutex); + me->num_waits = me->num_waits - 1; + } while (me->num_waits); + pthread_mutex_unlock(me->mutex); + + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + if (!ret) + return (void *)SIBLING_EXIT_NEWPRIVS; + read(0, NULL, 0); + return (void *)SIBLING_EXIT_UNKILLED; +} + +void tsync_start_sibling(struct tsync_sibling *sibling) +{ + pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); +} + +TEST_F(TSYNC, siblings_fail_prctl) +{ + long ret; + void *status; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check prctl failure detection by requesting sib 0 diverge. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + ASSERT_EQ(0, ret) { + TH_LOG("setting filter failed"); + } + + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + /* Signal the threads to clean up*/ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure diverging sibling failed to call prctl. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_ancestor) +{ + long ret; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Could install filter on all threads!"); + } + /* Tell the siblings to test the policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + /* Ensure they are both killed and don't exit cleanly. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(0x0, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(0x0, (long)status); +} + +TEST_F(TSYNC, two_sibling_want_nnp) +{ + void *status; + + /* start siblings before any prctl() operations */ + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + /* Tell the siblings to test no policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both upset about lacking nnp. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_no_filter) +{ + long ret; + void *status; + + /* start siblings before any prctl() operations */ + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Could install filter on all threads!"); + } + + /* Tell the siblings to test the policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both killed and don't exit cleanly. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(0x0, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(0x0, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_one_divergence) +{ + long ret; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(self->sibling[0].system_tid, ret) { + TH_LOG("Did not fail on diverged sibling."); + } + + /* Wake the threads */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both unkilled. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + +TEST_F(TSYNC, two_siblings_not_under_filter) +{ + long ret, sib; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* + * Sibling 0 will have its own seccomp policy + * and Sibling 1 will not be under seccomp at + * all. Sibling 1 will enter seccomp and 0 + * will cause failure. + */ + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(ret, self->sibling[0].system_tid) { + TH_LOG("Did not fail on diverged sibling."); + } + sib = 1; + if (ret == self->sibling[0].system_tid) + sib = 0; + + pthread_mutex_lock(&self->mutex); + + /* Increment the other siblings num_waits so we can clean up + * the one we just saw. + */ + self->sibling[!sib].num_waits += 1; + + /* Signal the thread to clean up*/ + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + pthread_join(self->sibling[sib].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + /* Poll for actual task death. pthread_join doesn't guarantee it. */ + while (!kill(self->sibling[sib].system_tid, 0)) + sleep(0.1); + /* Switch to the remaining sibling */ + sib = !sib; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Expected the remaining sibling to sync"); + }; + + pthread_mutex_lock(&self->mutex); + + /* If remaining sibling didn't have a chance to wake up during + * the first broadcast, manually reduce the num_waits now. + */ + if (self->sibling[sib].num_waits > 1) + self->sibling[sib].num_waits = 1; + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + pthread_join(self->sibling[sib].tid, &status); + EXPECT_EQ(0, (long)status); + /* Poll for actual task death. pthread_join doesn't guarantee it. */ + while (!kill(self->sibling[sib].system_tid, 0)) + sleep(0.1); + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret); /* just us chickens */ +} + +/* Make sure restarted syscalls are seen directly as "restart_syscall". */ +TEST(syscall_restart) +{ + long ret; + unsigned long msg; + pid_t child_pid; + int pipefd[2]; + int status; + siginfo_t info = { }; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + +#ifdef __NR_sigreturn + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0), +#endif + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), + + /* Allow __NR_write for easy logging. */ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */ + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, pipe(pipefd)); + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + /* Child uses EXPECT not ASSERT to deliver status correctly. */ + char buf = ' '; + struct pollfd fds = { + .fd = pipefd[0], + .events = POLLIN, + }; + + /* Attach parent as tracer and stop. */ + EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); + EXPECT_EQ(0, raise(SIGSTOP)); + + EXPECT_EQ(0, close(pipefd[1])); + + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Failed to install filter!"); + } + + EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { + TH_LOG("Failed to read() sync from parent"); + } + EXPECT_EQ('.', buf) { + TH_LOG("Failed to get sync data from read()"); + } + + /* Start poll to be interrupted. */ + errno = 0; + EXPECT_EQ(1, poll(&fds, 1, -1)) { + TH_LOG("Call to poll() failed (errno %d)", errno); + } + + /* Read final sync from parent. */ + EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { + TH_LOG("Failed final read() from parent"); + } + EXPECT_EQ('!', buf) { + TH_LOG("Failed to get final data from read()"); + } + + /* Directly report the status of our test harness results. */ + syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS + : EXIT_FAILURE); + } + EXPECT_EQ(0, close(pipefd[0])); + + /* Attach to child, setup options, and release. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, + PTRACE_O_TRACESECCOMP)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(1, write(pipefd[1], ".", 1)); + + /* Wait for poll() to start. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); + ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); + ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); + ASSERT_EQ(0x100, msg); + EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid)); + + /* Might as well check siginfo for sanity while we're here. */ + ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); + ASSERT_EQ(SIGTRAP, info.si_signo); + ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); + EXPECT_EQ(0, info.si_errno); + EXPECT_EQ(getuid(), info.si_uid); + /* Verify signal delivery came from child (seccomp-triggered). */ + EXPECT_EQ(child_pid, info.si_pid); + + /* Interrupt poll with SIGSTOP (which we'll need to handle). */ + ASSERT_EQ(0, kill(child_pid, SIGSTOP)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); + /* Verify signal delivery came from parent now. */ + ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); + EXPECT_EQ(getpid(), info.si_pid); + + /* Restart poll with SIGCONT, which triggers restart_syscall. */ + ASSERT_EQ(0, kill(child_pid, SIGCONT)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGCONT, WSTOPSIG(status)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + + /* Wait for restart_syscall() to start. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); + ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); + ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); + ASSERT_EQ(0x200, msg); + ret = get_syscall(_metadata, child_pid); +#if defined(__arm__) + /* FIXME: ARM does not expose true syscall in registers. */ + EXPECT_EQ(__NR_poll, ret); +#else + EXPECT_EQ(__NR_restart_syscall, ret); +#endif + + /* Write again to end poll. */ + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(1, write(pipefd[1], "!", 1)); + EXPECT_EQ(0, close(pipefd[1])); + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + if (WIFSIGNALED(status) || WEXITSTATUS(status)) + _metadata->passed = 0; +} + +/* + * TODO: + * - add microbenchmarks + * - expand NNP testing + * - better arch-specific TRACE and TRAP handlers. + * - endianness checking when appropriate + * - 64-bit arg prodding + * - arch value testing (x86 modes especially) + * - ... + */ + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/seccomp/test_harness.h b/tools/testing/selftests/seccomp/test_harness.h new file mode 100644 index 000000000000..977a6afc4489 --- /dev/null +++ b/tools/testing/selftests/seccomp/test_harness.h @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by the GPLv2 license. + * + * test_harness.h: simple C unit test helper. + * + * Usage: + * #include "test_harness.h" + * TEST(standalone_test) { + * do_some_stuff; + * EXPECT_GT(10, stuff) { + * stuff_state_t state; + * enumerate_stuff_state(&state); + * TH_LOG("expectation failed with state: %s", state.msg); + * } + * more_stuff; + * ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!"); + * last_stuff; + * EXPECT_EQ(0, last_stuff); + * } + * + * FIXTURE(my_fixture) { + * mytype_t *data; + * int awesomeness_level; + * }; + * FIXTURE_SETUP(my_fixture) { + * self->data = mytype_new(); + * ASSERT_NE(NULL, self->data); + * } + * FIXTURE_TEARDOWN(my_fixture) { + * mytype_free(self->data); + * } + * TEST_F(my_fixture, data_is_good) { + * EXPECT_EQ(1, is_my_data_good(self->data)); + * } + * + * TEST_HARNESS_MAIN + * + * API inspired by code.google.com/p/googletest + */ +#ifndef TEST_HARNESS_H_ +#define TEST_HARNESS_H_ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +/* All exported functionality should be declared through this macro. */ +#define TEST_API(x) _##x + +/* + * Exported APIs + */ + +/* TEST(name) { implementation } + * Defines a test by name. + * Names must be unique and tests must not be run in parallel. The + * implementation containing block is a function and scoping should be treated + * as such. Returning early may be performed with a bare "return;" statement. + * + * EXPECT_* and ASSERT_* are valid in a TEST() { } context. + */ +#define TEST TEST_API(TEST) + +/* TEST_SIGNAL(name, signal) { implementation } + * Defines a test by name and the expected term signal. + * Names must be unique and tests must not be run in parallel. The + * implementation containing block is a function and scoping should be treated + * as such. Returning early may be performed with a bare "return;" statement. + * + * EXPECT_* and ASSERT_* are valid in a TEST() { } context. + */ +#define TEST_SIGNAL TEST_API(TEST_SIGNAL) + +/* FIXTURE(datatype name) { + * type property1; + * ... + * }; + * Defines the data provided to TEST_F()-defined tests as |self|. It should be + * populated and cleaned up using FIXTURE_SETUP and FIXTURE_TEARDOWN. + */ +#define FIXTURE TEST_API(FIXTURE) + +/* FIXTURE_DATA(datatype name) + * This call may be used when the type of the fixture data + * is needed. In general, this should not be needed unless + * the |self| is being passed to a helper directly. + */ +#define FIXTURE_DATA TEST_API(FIXTURE_DATA) + +/* FIXTURE_SETUP(fixture name) { implementation } + * Populates the required "setup" function for a fixture. An instance of the + * datatype defined with _FIXTURE_DATA will be exposed as |self| for the + * implementation. + * + * ASSERT_* are valid for use in this context and will prempt the execution + * of any dependent fixture tests. + * + * A bare "return;" statement may be used to return early. + */ +#define FIXTURE_SETUP TEST_API(FIXTURE_SETUP) + +/* FIXTURE_TEARDOWN(fixture name) { implementation } + * Populates the required "teardown" function for a fixture. An instance of the + * datatype defined with _FIXTURE_DATA will be exposed as |self| for the + * implementation to clean up. + * + * A bare "return;" statement may be used to return early. + */ +#define FIXTURE_TEARDOWN TEST_API(FIXTURE_TEARDOWN) + +/* TEST_F(fixture, name) { implementation } + * Defines a test that depends on a fixture (e.g., is part of a test case). + * Very similar to TEST() except that |self| is the setup instance of fixture's + * datatype exposed for use by the implementation. + */ +#define TEST_F TEST_API(TEST_F) + +#define TEST_F_SIGNAL TEST_API(TEST_F_SIGNAL) + +/* Use once to append a main() to the test file. E.g., + * TEST_HARNESS_MAIN + */ +#define TEST_HARNESS_MAIN TEST_API(TEST_HARNESS_MAIN) + +/* + * Operators for use in TEST and TEST_F. + * ASSERT_* calls will stop test execution immediately. + * EXPECT_* calls will emit a failure warning, note it, and continue. + */ + +/* ASSERT_EQ(expected, measured): expected == measured */ +#define ASSERT_EQ TEST_API(ASSERT_EQ) +/* ASSERT_NE(expected, measured): expected != measured */ +#define ASSERT_NE TEST_API(ASSERT_NE) +/* ASSERT_LT(expected, measured): expected < measured */ +#define ASSERT_LT TEST_API(ASSERT_LT) +/* ASSERT_LE(expected, measured): expected <= measured */ +#define ASSERT_LE TEST_API(ASSERT_LE) +/* ASSERT_GT(expected, measured): expected > measured */ +#define ASSERT_GT TEST_API(ASSERT_GT) +/* ASSERT_GE(expected, measured): expected >= measured */ +#define ASSERT_GE TEST_API(ASSERT_GE) +/* ASSERT_NULL(measured): NULL == measured */ +#define ASSERT_NULL TEST_API(ASSERT_NULL) +/* ASSERT_TRUE(measured): measured != 0 */ +#define ASSERT_TRUE TEST_API(ASSERT_TRUE) +/* ASSERT_FALSE(measured): measured == 0 */ +#define ASSERT_FALSE TEST_API(ASSERT_FALSE) +/* ASSERT_STREQ(expected, measured): !strcmp(expected, measured) */ +#define ASSERT_STREQ TEST_API(ASSERT_STREQ) +/* ASSERT_STRNE(expected, measured): strcmp(expected, measured) */ +#define ASSERT_STRNE TEST_API(ASSERT_STRNE) +/* EXPECT_EQ(expected, measured): expected == measured */ +#define EXPECT_EQ TEST_API(EXPECT_EQ) +/* EXPECT_NE(expected, measured): expected != measured */ +#define EXPECT_NE TEST_API(EXPECT_NE) +/* EXPECT_LT(expected, measured): expected < measured */ +#define EXPECT_LT TEST_API(EXPECT_LT) +/* EXPECT_LE(expected, measured): expected <= measured */ +#define EXPECT_LE TEST_API(EXPECT_LE) +/* EXPECT_GT(expected, measured): expected > measured */ +#define EXPECT_GT TEST_API(EXPECT_GT) +/* EXPECT_GE(expected, measured): expected >= measured */ +#define EXPECT_GE TEST_API(EXPECT_GE) +/* EXPECT_NULL(measured): NULL == measured */ +#define EXPECT_NULL TEST_API(EXPECT_NULL) +/* EXPECT_TRUE(measured): 0 != measured */ +#define EXPECT_TRUE TEST_API(EXPECT_TRUE) +/* EXPECT_FALSE(measured): 0 == measured */ +#define EXPECT_FALSE TEST_API(EXPECT_FALSE) +/* EXPECT_STREQ(expected, measured): !strcmp(expected, measured) */ +#define EXPECT_STREQ TEST_API(EXPECT_STREQ) +/* EXPECT_STRNE(expected, measured): strcmp(expected, measured) */ +#define EXPECT_STRNE TEST_API(EXPECT_STRNE) + +/* TH_LOG(format, ...) + * Optional debug logging function available for use in tests. + * Logging may be enabled or disabled by defining TH_LOG_ENABLED. + * E.g., #define TH_LOG_ENABLED 1 + * If no definition is provided, logging is enabled by default. + */ +#define TH_LOG TEST_API(TH_LOG) + +/* + * Internal implementation. + * + */ + +/* Utilities exposed to the test definitions */ +#ifndef TH_LOG_STREAM +# define TH_LOG_STREAM stderr +#endif + +#ifndef TH_LOG_ENABLED +# define TH_LOG_ENABLED 1 +#endif + +#define _TH_LOG(fmt, ...) do { \ + if (TH_LOG_ENABLED) \ + __TH_LOG(fmt, ##__VA_ARGS__); \ +} while (0) + +/* Unconditional logger for internal use. */ +#define __TH_LOG(fmt, ...) \ + fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \ + __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) + +/* Defines the test function and creates the registration stub. */ +#define _TEST(test_name) __TEST_IMPL(test_name, -1) + +#define _TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal) + +#define __TEST_IMPL(test_name, _signal) \ + static void test_name(struct __test_metadata *_metadata); \ + static struct __test_metadata _##test_name##_object = \ + { name: "global." #test_name, \ + fn: &test_name, termsig: _signal }; \ + static void __attribute__((constructor)) _register_##test_name(void) \ + { \ + __register_test(&_##test_name##_object); \ + } \ + static void test_name( \ + struct __test_metadata __attribute__((unused)) *_metadata) + +/* Wraps the struct name so we have one less argument to pass around. */ +#define _FIXTURE_DATA(fixture_name) struct _test_data_##fixture_name + +/* Called once per fixture to setup the data and register. */ +#define _FIXTURE(fixture_name) \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_data(void) \ + { \ + __fixture_count++; \ + } \ + _FIXTURE_DATA(fixture_name) + +/* Prepares the setup function for the fixture. |_metadata| is included + * so that ASSERT_* work as a convenience. + */ +#define _FIXTURE_SETUP(fixture_name) \ + void fixture_name##_setup( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) +#define _FIXTURE_TEARDOWN(fixture_name) \ + void fixture_name##_teardown( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + +/* Emits test registration and helpers for fixture-based test + * cases. + * TODO(wad) register fixtures on dedicated test lists. + */ +#define _TEST_F(fixture_name, test_name) \ + __TEST_F_IMPL(fixture_name, test_name, -1) + +#define _TEST_F_SIGNAL(fixture_name, test_name, signal) \ + __TEST_F_IMPL(fixture_name, test_name, signal) + +#define __TEST_F_IMPL(fixture_name, test_name, signal) \ + static void fixture_name##_##test_name( \ + struct __test_metadata *_metadata, \ + _FIXTURE_DATA(fixture_name) *self); \ + static inline void wrapper_##fixture_name##_##test_name( \ + struct __test_metadata *_metadata) \ + { \ + /* fixture data is alloced, setup, and torn down per call. */ \ + _FIXTURE_DATA(fixture_name) self; \ + memset(&self, 0, sizeof(_FIXTURE_DATA(fixture_name))); \ + fixture_name##_setup(_metadata, &self); \ + /* Let setup failure terminate early. */ \ + if (!_metadata->passed) \ + return; \ + fixture_name##_##test_name(_metadata, &self); \ + fixture_name##_teardown(_metadata, &self); \ + } \ + static struct __test_metadata \ + _##fixture_name##_##test_name##_object = { \ + name: #fixture_name "." #test_name, \ + fn: &wrapper_##fixture_name##_##test_name, \ + termsig: signal, \ + }; \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_##test_name(void) \ + { \ + __register_test(&_##fixture_name##_##test_name##_object); \ + } \ + static void fixture_name##_##test_name( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + +/* Exports a simple wrapper to run the test harness. */ +#define _TEST_HARNESS_MAIN \ + static void __attribute__((constructor)) \ + __constructor_order_last(void) \ + { \ + if (!__constructor_order) \ + __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \ + } \ + int main(int argc, char **argv) { \ + return test_harness_run(argc, argv); \ + } + +#define _ASSERT_EQ(_expected, _seen) \ + __EXPECT(_expected, _seen, ==, 1) +#define _ASSERT_NE(_expected, _seen) \ + __EXPECT(_expected, _seen, !=, 1) +#define _ASSERT_LT(_expected, _seen) \ + __EXPECT(_expected, _seen, <, 1) +#define _ASSERT_LE(_expected, _seen) \ + __EXPECT(_expected, _seen, <=, 1) +#define _ASSERT_GT(_expected, _seen) \ + __EXPECT(_expected, _seen, >, 1) +#define _ASSERT_GE(_expected, _seen) \ + __EXPECT(_expected, _seen, >=, 1) +#define _ASSERT_NULL(_seen) \ + __EXPECT(NULL, _seen, ==, 1) + +#define _ASSERT_TRUE(_seen) \ + _ASSERT_NE(0, _seen) +#define _ASSERT_FALSE(_seen) \ + _ASSERT_EQ(0, _seen) +#define _ASSERT_STREQ(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, ==, 1) +#define _ASSERT_STRNE(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, !=, 1) + +#define _EXPECT_EQ(_expected, _seen) \ + __EXPECT(_expected, _seen, ==, 0) +#define _EXPECT_NE(_expected, _seen) \ + __EXPECT(_expected, _seen, !=, 0) +#define _EXPECT_LT(_expected, _seen) \ + __EXPECT(_expected, _seen, <, 0) +#define _EXPECT_LE(_expected, _seen) \ + __EXPECT(_expected, _seen, <=, 0) +#define _EXPECT_GT(_expected, _seen) \ + __EXPECT(_expected, _seen, >, 0) +#define _EXPECT_GE(_expected, _seen) \ + __EXPECT(_expected, _seen, >=, 0) + +#define _EXPECT_NULL(_seen) \ + __EXPECT(NULL, _seen, ==, 0) +#define _EXPECT_TRUE(_seen) \ + _EXPECT_NE(0, _seen) +#define _EXPECT_FALSE(_seen) \ + _EXPECT_EQ(0, _seen) + +#define _EXPECT_STREQ(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, ==, 0) +#define _EXPECT_STRNE(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, !=, 0) + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is + * not thread-safe, but it should be fine in most sane test scenarios. + * + * Using __bail(), which optionally abort()s, is the easiest way to early + * return while still providing an optional block to the API consumer. + */ +#define OPTIONAL_HANDLER(_assert) \ + for (; _metadata->trigger; _metadata->trigger = __bail(_assert)) + +#define __EXPECT(_expected, _seen, _t, _assert) do { \ + /* Avoid multiple evaluation of the cases */ \ + __typeof__(_expected) __exp = (_expected); \ + __typeof__(_seen) __seen = (_seen); \ + if (!(__exp _t __seen)) { \ + unsigned long long __exp_print = 0; \ + unsigned long long __seen_print = 0; \ + /* Avoid casting complaints the scariest way we can. */ \ + memcpy(&__exp_print, &__exp, sizeof(__exp)); \ + memcpy(&__seen_print, &__seen, sizeof(__seen)); \ + __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ + #_expected, __exp_print, #_t, \ + #_seen, __seen_print); \ + _metadata->passed = 0; \ + /* Ensure the optional handler is triggered */ \ + _metadata->trigger = 1; \ + } \ +} while (0); OPTIONAL_HANDLER(_assert) + +#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \ + const char *__exp = (_expected); \ + const char *__seen = (_seen); \ + if (!(strcmp(__exp, __seen) _t 0)) { \ + __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ + _metadata->passed = 0; \ + _metadata->trigger = 1; \ + } \ +} while (0); OPTIONAL_HANDLER(_assert) + +/* Contains all the information for test execution and status checking. */ +struct __test_metadata { + const char *name; + void (*fn)(struct __test_metadata *); + int termsig; + int passed; + int trigger; /* extra handler after the evaluation */ + struct __test_metadata *prev, *next; +}; + +/* Storage for the (global) tests to be run. */ +static struct __test_metadata *__test_list; +static unsigned int __test_count; +static unsigned int __fixture_count; +static int __constructor_order; + +#define _CONSTRUCTOR_ORDER_FORWARD 1 +#define _CONSTRUCTOR_ORDER_BACKWARD -1 + +/* + * Since constructors are called in reverse order, reverse the test + * list so tests are run in source declaration order. + * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html + * However, it seems not all toolchains do this correctly, so use + * __constructor_order to detect which direction is called first + * and adjust list building logic to get things running in the right + * direction. + */ +static inline void __register_test(struct __test_metadata *t) +{ + __test_count++; + /* Circular linked list where only prev is circular. */ + if (__test_list == NULL) { + __test_list = t; + t->next = NULL; + t->prev = t; + return; + } + if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { + t->next = NULL; + t->prev = __test_list->prev; + t->prev->next = t; + __test_list->prev = t; + } else { + t->next = __test_list; + t->next->prev = t; + t->prev = t; + __test_list = t; + } +} + +static inline int __bail(int for_realz) +{ + if (for_realz) + abort(); + return 0; +} + +void __run_test(struct __test_metadata *t) +{ + pid_t child_pid; + int status; + + t->passed = 1; + t->trigger = 0; + printf("[ RUN ] %s\n", t->name); + child_pid = fork(); + if (child_pid < 0) { + printf("ERROR SPAWNING TEST CHILD\n"); + t->passed = 0; + } else if (child_pid == 0) { + t->fn(t); + _exit(t->passed); + } else { + /* TODO(wad) add timeout support. */ + waitpid(child_pid, &status, 0); + if (WIFEXITED(status)) { + t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0; + if (t->termsig != -1) { + fprintf(TH_LOG_STREAM, + "%s: Test exited normally " + "instead of by signal (code: %d)\n", + t->name, + WEXITSTATUS(status)); + } + } else if (WIFSIGNALED(status)) { + t->passed = 0; + if (WTERMSIG(status) == SIGABRT) { + fprintf(TH_LOG_STREAM, + "%s: Test terminated by assertion\n", + t->name); + } else if (WTERMSIG(status) == t->termsig) { + t->passed = 1; + } else { + fprintf(TH_LOG_STREAM, + "%s: Test terminated unexpectedly " + "by signal %d\n", + t->name, + WTERMSIG(status)); + } + } else { + fprintf(TH_LOG_STREAM, + "%s: Test ended in some other way [%u]\n", + t->name, + status); + } + } + printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name); +} + +static int test_harness_run(int __attribute__((unused)) argc, + char __attribute__((unused)) **argv) +{ + struct __test_metadata *t; + int ret = 0; + unsigned int count = 0; + unsigned int pass_count = 0; + + /* TODO(wad) add optional arguments similar to gtest. */ + printf("[==========] Running %u tests from %u test cases.\n", + __test_count, __fixture_count + 1); + for (t = __test_list; t; t = t->next) { + count++; + __run_test(t); + if (t->passed) + pass_count++; + else + ret = 1; + } + printf("[==========] %u / %u tests passed.\n", pass_count, count); + printf("[ %s ]\n", (ret ? "FAILED" : "PASSED")); + return ret; +} + +static void __attribute__((constructor)) __constructor_order_first(void) +{ + if (!__constructor_order) + __constructor_order = _CONSTRUCTOR_ORDER_FORWARD; +} + +#endif /* TEST_HARNESS_H_ */ diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore new file mode 100644 index 000000000000..ced998151bc4 --- /dev/null +++ b/tools/testing/selftests/timers/.gitignore @@ -0,0 +1,18 @@ +alarmtimer-suspend +change_skew +clocksource-switch +inconsistency-check +leap-a-day +leapcrash +mqueue-lat +nanosleep +nsleep-lat +posix_timers +raw_skew +rtctest +set-2038 +set-tai +set-timer-lat +skew_consistency +threadtest +valid-adjtimex diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index aaffbde1d5ee..72cacf5383dd 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -57,7 +57,7 @@ static inline int ksft_exit_fail(void) #define NSEC_PER_SEC 1000000000ULL -#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */ +#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */ #define SUSPEND_SECS 15 int alarmcount; @@ -152,7 +152,11 @@ int main(void) alarm_clock_id++) { alarmcount = 0; - timer_create(alarm_clock_id, &se, &tm1); + if (timer_create(alarm_clock_id, &se, &tm1) == -1) { + printf("timer_create failled, %s unspported?\n", + clockstring(alarm_clock_id)); + break; + } clock_gettime(alarm_clock_id, &start_time); printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id), @@ -172,7 +176,7 @@ int main(void) while (alarmcount < 10) { int ret; - sleep(1); + sleep(3); ret = system("echo mem > /sys/power/state"); if (ret) break; diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index a5ce9534eb15..231b9a031f6a 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,7 +1,12 @@ # Makefile for vm selftests CFLAGS = -Wall -BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest +BINARIES = compaction_test +BINARIES += hugepage-mmap +BINARIES += hugepage-shm +BINARIES += hugetlbfstest +BINARIES += map_hugetlb +BINARIES += thuge-gen BINARIES += transhuge-stress all: $(BINARIES) diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c new file mode 100644 index 000000000000..932ff577ffc0 --- /dev/null +++ b/tools/testing/selftests/vm/compaction_test.c @@ -0,0 +1,225 @@ +/* + * + * A test for the patch "Allow compaction of unevictable pages". + * With this patch we should be able to allocate at least 1/4 + * of RAM in huge pages. Without the patch much less is + * allocated. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> + +#define MAP_SIZE 1048576 + +struct map_list { + void *map; + struct map_list *next; +}; + +int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) +{ + char buffer[256] = {0}; + char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'"; + FILE *cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + + *memfree = atoll(buffer); + cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'"; + cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + *hugepagesize = atoll(buffer); + + return 0; +} + +int prereq(void) +{ + char allowed; + int fd; + + fd = open("/proc/sys/vm/compact_unevictable_allowed", + O_RDONLY | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + return -1; + } + + if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { + perror("Failed to read from\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + close(fd); + return -1; + } + + close(fd); + if (allowed == '1') + return 0; + + return -1; +} + +int check_compaction(unsigned long mem_free, unsigned int hugepage_size) +{ + int fd; + int compaction_index = 0; + char initial_nr_hugepages[10] = {0}; + char nr_hugepages[10] = {0}; + + /* We want to test with 80% of available memory. Else, OOM killer comes + in to play */ + mem_free = mem_free * 0.8; + + fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open /proc/sys/vm/nr_hugepages"); + return -1; + } + + if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages"); + goto close_fd; + } + + /* Start with the initial condition of 0 huge pages*/ + if (write(fd, "0", sizeof(char)) != sizeof(char)) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + /* Request a large number of huge pages. The Kernel will allocate + as much as it can */ + if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + /* We should have been able to request at least 1/3 rd of the memory in + huge pages */ + compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); + + if (compaction_index > 3) { + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" + "as huge pages\n", compaction_index); + goto close_fd; + } + + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + + if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) + != strlen(initial_nr_hugepages)) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + close(fd); + return 0; + + close_fd: + close(fd); + printf("Not OK. Compaction test failed."); + return -1; +} + + +int main(int argc, char **argv) +{ + struct rlimit lim; + struct map_list *list, *entry; + size_t page_size, i; + void *map = NULL; + unsigned long mem_free = 0; + unsigned long hugepage_size = 0; + unsigned long mem_fragmentable = 0; + + if (prereq() != 0) { + printf("Either the sysctl compact_unevictable_allowed is not\n" + "set to 1 or couldn't read the proc file.\n" + "Skipping the test\n"); + return 0; + } + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; + if (setrlimit(RLIMIT_MEMLOCK, &lim)) { + perror("Failed to set rlimit:\n"); + return -1; + } + + page_size = getpagesize(); + + list = NULL; + + if (read_memory_info(&mem_free, &hugepage_size) != 0) { + printf("ERROR: Cannot read meminfo\n"); + return -1; + } + + mem_fragmentable = mem_free * 0.8 / 1024; + + while (mem_fragmentable > 0) { + map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); + if (map == MAP_FAILED) + break; + + entry = malloc(sizeof(struct map_list)); + if (!entry) { + munmap(map, MAP_SIZE); + break; + } + entry->map = map; + entry->next = list; + list = entry; + + /* Write something (in this case the address of the map) to + * ensure that KSM can't merge the mapped pages + */ + for (i = 0; i < MAP_SIZE; i += page_size) + *(unsigned long *)(map + i) = (unsigned long)map + i; + + mem_fragmentable--; + } + + for (entry = list; entry != NULL; entry = entry->next) { + munmap(entry->map, MAP_SIZE); + if (!entry->next) + break; + entry = entry->next; + } + + if (check_compaction(mem_free, hugepage_size) == 0) + return 0; + + return -1; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index c87b6812300d..49ece11ff7fd 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -90,4 +90,16 @@ fi umount $mnt rm -rf $mnt echo $nr_hugepgs > /proc/sys/vm/nr_hugepages + +echo "-----------------------" +echo "running compaction_test" +echo "-----------------------" +./compaction_test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c index b994946c40fb..05c6a41b3671 100644 --- a/tools/testing/selftests/x86/trivial_64bit_program.c +++ b/tools/testing/selftests/x86/trivial_64bit_program.c @@ -1,5 +1,5 @@ /* - * Trivial program to check that we have a valid 32-bit build environment. + * Trivial program to check that we have a valid 64-bit build environment. * Copyright (c) 2015 Andy Lutomirski * GPL v2 */ |