diff options
847 files changed, 15758 insertions, 8792 deletions
@@ -2,11 +2,16 @@ # This list is used by git-shortlog to fix a few botched name translations # in the git archive, either because the author's full name was messed up # and/or not always written the same way, making contributions from the -# same person appearing not to be so or badly displayed. +# same person appearing not to be so or badly displayed. Also allows for +# old email addresses to map to new email addresses. # +# For format details, see "MAPPING AUTHORS" in "man git-shortlog". +# +# Please keep this list dictionary sorted. +# +# This comment is parsed by git-shortlog: # repo-abbrev: /pub/scm/linux/kernel/git/ # - Aaron Durbin <[email protected]> Adam Oldham <[email protected]> Adam Radford <[email protected]> diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index fa4018afa5a4..6be43781ec7f 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1274,6 +1274,10 @@ PAGE_SIZE multiple when read back. Amount of memory used for storing in-kernel data structures. + percpu + Amount of memory used for storing per-cpu kernel + data structures. + sock Amount of memory used in network transmission buffers diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 98ea67f27809..bdc1f33fd3d1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -724,7 +724,7 @@ memory region [offset, offset + size] for that kernel image. If '@offset' is omitted, then a suitable offset is selected automatically. - [KNL, x86_64] select a region under 4G first, and + [KNL, X86-64] Select a region under 4G first, and fall back to reserve region above 4G when '@offset' hasn't been specified. See Documentation/admin-guide/kdump/kdump.rst for further details. @@ -737,14 +737,14 @@ Documentation/admin-guide/kdump/kdump.rst for an example. crashkernel=size[KMG],high - [KNL, x86_64] range could be above 4G. Allow kernel + [KNL, X86-64] range could be above 4G. Allow kernel to allocate physical memory region from top, so could be above 4G if system have more than 4G ram installed. Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. crashkernel=size[KMG],low - [KNL, x86_64] range under 4G. When crashkernel=X,high + [KNL, X86-64] range under 4G. When crashkernel=X,high is passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb @@ -1427,7 +1427,7 @@ gamma= [HW,DRM] - gart_fix_e820= [X86_64] disable the fix e820 for K8 GART + gart_fix_e820= [X86-64] disable the fix e820 for K8 GART Format: off | on default: on @@ -1814,7 +1814,7 @@ Format: 0 | 1 Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON. - init_pkru= [x86] Specify the default memory protection keys rights + init_pkru= [X86] Specify the default memory protection keys rights register contents for all processes. 0x55555554 by default (disallow access to all but pkey 0). Can override in debugfs after boot. @@ -1822,7 +1822,7 @@ inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver Format: <irq> - int_pln_enable [x86] Enable power limit notification interrupt + int_pln_enable [X86] Enable power limit notification interrupt integrity_audit=[IMA] Format: { "0" | "1" } @@ -1840,7 +1840,7 @@ bypassed by not enabling DMAR with this option. In this case, gfx device will use physical address for DMA. - forcedac [x86_64] + forcedac [X86-64] With this option iommu will not optimize to look for io virtual address below 32-bit forcing dual address cycle on pci bus for cards supporting greater @@ -1925,7 +1925,7 @@ strict regions from userspace. relaxed - iommu= [x86] + iommu= [X86] off force noforce @@ -1935,8 +1935,8 @@ merge nomerge soft - pt [x86] - nopt [x86] + pt [X86] + nopt [X86] nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. @@ -2079,21 +2079,21 @@ iucv= [HW,NET] - ivrs_ioapic [HW,X86_64] + ivrs_ioapic [HW,X86-64] Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 - ivrs_hpet [HW,X86_64] + ivrs_hpet [HW,X86-64] Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 - ivrs_acpihid [HW,X86_64] + ivrs_acpihid [HW,X86-64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. For example, to map UART-HID:UID AMD0020:0 to @@ -2370,7 +2370,7 @@ lapic [X86-32,APIC] Enable the local APIC even if BIOS disabled it. - lapic= [x86,APIC] "notscdeadline" Do not use TSC deadline + lapic= [X86,APIC] "notscdeadline" Do not use TSC deadline value for LAPIC timer one-shot implementation. Default back to the programmable timer unit in the LAPIC. @@ -3188,12 +3188,12 @@ register save and restore. The kernel will only save legacy floating-point registers on task switch. - nohugeiomap [KNL,x86,PPC] Disable kernel huge I/O mappings. + nohugeiomap [KNL,X86,PPC] Disable kernel huge I/O mappings. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. - [KNL,x86] Disable symmetric multithreading (SMT). + [KNL,X86] Disable symmetric multithreading (SMT). nosmt=force: Force disable SMT, cannot be undone via the sysfs control file. @@ -3955,7 +3955,7 @@ pt. [PARIDE] See Documentation/admin-guide/blockdev/paride.rst. - pti= [X86_64] Control Page Table Isolation of user and + pti= [X86-64] Control Page Table Isolation of user and kernel address spaces. Disabling this feature removes hardening, but improves performance of system calls and interrupts. @@ -3967,7 +3967,7 @@ Not specifying this option is equivalent to pti=auto. - nopti [X86_64] + nopti [X86-64] Equivalent to pti=off pty.legacy_count= diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 2ae9669eb22c..d4b32cc32bb7 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -164,7 +164,8 @@ core_pattern %s signal number %t UNIX time of dump %h hostname - %e executable filename (may be shortened) + %e executable filename (may be shortened, could be changed by prctl etc) + %f executable filename %E executable path %c maximum size of core file by resource limit RLIMIT_CORE %<OTHER> both are dropped diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index d997cc3c26d0..4b9d2e8e9142 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -119,6 +119,21 @@ all zones are compacted such that free memory is available in contiguous blocks where possible. This can be important for example in the allocation of huge pages although processes will also directly compact memory as required. +compaction_proactiveness +======================== + +This tunable takes a value in the range [0, 100] with a default value of +20. This tunable determines how aggressively compaction is done in the +background. Setting it to 0 disables proactive compaction. + +Note that compaction has a non-trivial system-wide impact as pages +belonging to different processes are moved around, which could also lead +to latency spikes in unsuspecting applications. The kernel employs +various heuristics to avoid wasting CPU cycles if it detects that +proactive compaction is not being effective. + +Be careful when setting it to extreme values like 100, as that may +cause excessive background compaction activity. compact_unevictable_allowed =========================== diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst index 12a246fcf6cb..2df7b067ab93 100644 --- a/Documentation/bpf/bpf_design_QA.rst +++ b/Documentation/bpf/bpf_design_QA.rst @@ -246,17 +246,6 @@ program is loaded the kernel will print warning message, so this helper is only useful for experiments and prototypes. Tracing BPF programs are root only. -Q: bpf_trace_printk() helper warning ------------------------------------- -Q: When bpf_trace_printk() helper is used the kernel prints nasty -warning message. Why is that? - -A: This is done to nudge program authors into better interfaces when -programs need to pass data to user space. Like bpf_perf_event_output() -can be used to efficiently stream data via perf ring buffer. -BPF maps can be used for asynchronous data sharing between kernel -and user space. bpf_trace_printk() should only be used for debugging. - Q: New functionality via kernel modules? ---------------------------------------- Q: Can BPF functionality such as new program or map types, new diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst index 2de905810590..70500b189cc8 100644 --- a/Documentation/cdrom/cdrom-standard.rst +++ b/Documentation/cdrom/cdrom-standard.rst @@ -557,7 +557,7 @@ phase. Currently, the capabilities are any of:: CDC_DRIVE_STATUS /* driver implements drive status */ The capability flag is declared *const*, to prevent drivers from -accidentally tampering with the contents. The capability fags actually +accidentally tampering with the contents. The capability flags actually inform `cdrom.c` of what the driver can do. If the drive found by the driver does not have the capability, is can be masked out by the *cdrom_device_info* variable *mask*. For instance, the SCSI CD-ROM @@ -736,7 +736,7 @@ Description of routines in `cdrom.c` Only a few routines in `cdrom.c` are exported to the drivers. In this new section we will discuss these, as well as the functions that `take -over' the CD-ROM interface to the kernel. The header file belonging +over` the CD-ROM interface to the kernel. The header file belonging to `cdrom.c` is called `cdrom.h`. Formerly, some of the contents of this file were placed in the file `ucdrom.h`, but this file has now been merged back into `cdrom.h`. diff --git a/Documentation/core-api/idr.rst b/Documentation/core-api/idr.rst index a2738050c4f0..2eb5afdb9931 100644 --- a/Documentation/core-api/idr.rst +++ b/Documentation/core-api/idr.rst @@ -20,48 +20,48 @@ only ID allocation, and as a result is much more memory-efficient. IDR usage ========= -Start by initialising an IDR, either with :c:func:`DEFINE_IDR` -for statically allocated IDRs or :c:func:`idr_init` for dynamically +Start by initialising an IDR, either with DEFINE_IDR() +for statically allocated IDRs or idr_init() for dynamically allocated IDRs. -You can call :c:func:`idr_alloc` to allocate an unused ID. Look up -the pointer you associated with the ID by calling :c:func:`idr_find` -and free the ID by calling :c:func:`idr_remove`. +You can call idr_alloc() to allocate an unused ID. Look up +the pointer you associated with the ID by calling idr_find() +and free the ID by calling idr_remove(). If you need to change the pointer associated with an ID, you can call -:c:func:`idr_replace`. One common reason to do this is to reserve an +idr_replace(). One common reason to do this is to reserve an ID by passing a ``NULL`` pointer to the allocation function; initialise the object with the reserved ID and finally insert the initialised object into the IDR. Some users need to allocate IDs larger than ``INT_MAX``. So far all of these users have been content with a ``UINT_MAX`` limit, and they use -:c:func:`idr_alloc_u32`. If you need IDs that will not fit in a u32, +idr_alloc_u32(). If you need IDs that will not fit in a u32, we will work with you to address your needs. If you need to allocate IDs sequentially, you can use -:c:func:`idr_alloc_cyclic`. The IDR becomes less efficient when dealing +idr_alloc_cyclic(). The IDR becomes less efficient when dealing with larger IDs, so using this function comes at a slight cost. To perform an action on all pointers used by the IDR, you can -either use the callback-based :c:func:`idr_for_each` or the -iterator-style :c:func:`idr_for_each_entry`. You may need to use -:c:func:`idr_for_each_entry_continue` to continue an iteration. You can -also use :c:func:`idr_get_next` if the iterator doesn't fit your needs. +either use the callback-based idr_for_each() or the +iterator-style idr_for_each_entry(). You may need to use +idr_for_each_entry_continue() to continue an iteration. You can +also use idr_get_next() if the iterator doesn't fit your needs. -When you have finished using an IDR, you can call :c:func:`idr_destroy` +When you have finished using an IDR, you can call idr_destroy() to release the memory used by the IDR. This will not free the objects pointed to from the IDR; if you want to do that, use one of the iterators to do it. -You can use :c:func:`idr_is_empty` to find out whether there are any +You can use idr_is_empty() to find out whether there are any IDs currently allocated. If you need to take a lock while allocating a new ID from the IDR, you may need to pass a restrictive set of GFP flags, which can lead to the IDR being unable to allocate memory. To work around this, -you can call :c:func:`idr_preload` before taking the lock, and then -:c:func:`idr_preload_end` after the allocation. +you can call idr_preload() before taking the lock, and then +idr_preload_end() after the allocation. .. kernel-doc:: include/linux/idr.h :doc: idr sync diff --git a/Documentation/dev-tools/coccinelle.rst b/Documentation/dev-tools/coccinelle.rst index 6c791af1c859..74c5e6aeeff5 100644 --- a/Documentation/dev-tools/coccinelle.rst +++ b/Documentation/dev-tools/coccinelle.rst @@ -175,13 +175,20 @@ For example, to check drivers/net/wireless/ one may write:: make coccicheck M=drivers/net/wireless/ To apply Coccinelle on a file basis, instead of a directory basis, the -following command may be used:: +C variable is used by the makefile to select which files to work with. +This variable can be used to run scripts for the entire kernel, a +specific directory, or for a single file. - make C=1 CHECK="scripts/coccicheck" +For example, to check drivers/bluetooth/bfusb.c, the value 1 is +passed to the C variable to check files that make considers +need to be compiled.:: -To check only newly edited code, use the value 2 for the C flag, i.e.:: + make C=1 CHECK=scripts/coccicheck drivers/bluetooth/bfusb.o - make C=2 CHECK="scripts/coccicheck" +The value 2 is passed to the C variable to check files regardless of +whether they need to be compiled or not.:: + + make C=2 CHECK=scripts/coccicheck drivers/bluetooth/bfusb.o In these modes, which work on a file basis, there is no information about semantic patches displayed, and no commit message proposed. diff --git a/Documentation/dev-tools/kgdb.rst b/Documentation/dev-tools/kgdb.rst index 0e52e966a153..c908ef4d3f04 100644 --- a/Documentation/dev-tools/kgdb.rst +++ b/Documentation/dev-tools/kgdb.rst @@ -316,7 +316,7 @@ driver as a loadable kernel module kgdbwait will not do anything. Kernel parameter: ``kgdbcon`` ----------------------------- -The ``kgdbcon`` feature allows you to see :c:func:`printk` messages inside gdb +The ``kgdbcon`` feature allows you to see printk() messages inside gdb while gdb is connected to the kernel. Kdb does not make use of the kgdbcon feature. @@ -432,7 +432,7 @@ This is a quick example of how to use kdb. ``ps`` Displays only the active processes ``ps A`` Shows all the processes ``summary`` Shows kernel version info and memory usage - ``bt`` Get a backtrace of the current process using :c:func:`dump_stack` + ``bt`` Get a backtrace of the current process using dump_stack() ``dmesg`` View the kernel syslog buffer ``go`` Continue the system =========== ================================================================= @@ -724,7 +724,7 @@ The kernel debugger is organized into a number of components: The arch-specific portion implements: - contains an arch-specific trap catcher which invokes - :c:func:`kgdb_handle_exception` to start kgdb about doing its work + kgdb_handle_exception() to start kgdb about doing its work - translation to and from gdb specific packet format to :c:type:`pt_regs` @@ -769,7 +769,7 @@ The kernel debugger is organized into a number of components: config. Later run ``modprobe kdb_hello`` and the next time you enter the kdb shell, you can run the ``hello`` command. - - The implementation for :c:func:`kdb_printf` which emits messages directly + - The implementation for kdb_printf() which emits messages directly to I/O drivers, bypassing the kernel log. - SW / HW breakpoint management for the kdb shell @@ -875,7 +875,7 @@ kernel when ``CONFIG_KDB_KEYBOARD=y`` is set in the kernel configuration. The core polled keyboard driver for PS/2 type keyboards is in ``drivers/char/kdb_keyboard.c``. This driver is hooked into the debug core when kgdboc populates the callback in the array called -:c:type:`kdb_poll_funcs[]`. The :c:func:`kdb_get_kbd_char` is the top-level +:c:type:`kdb_poll_funcs[]`. The kdb_get_kbd_char() is the top-level function which polls hardware for single character input. kgdboc and kms @@ -887,10 +887,10 @@ that you have a video driver which has a frame buffer console and atomic kernel mode setting support. Every time the kernel debugger is entered it calls -:c:func:`kgdboc_pre_exp_handler` which in turn calls :c:func:`con_debug_enter` +kgdboc_pre_exp_handler() which in turn calls con_debug_enter() in the virtual console layer. On resuming kernel execution, the kernel -debugger calls :c:func:`kgdboc_post_exp_handler` which in turn calls -:c:func:`con_debug_leave`. +debugger calls kgdboc_post_exp_handler() which in turn calls +con_debug_leave(). Any video driver that wants to be compatible with the kernel debugger and the atomic kms callbacks must implement the ``mode_set_base_atomic``, diff --git a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml index b48ed875eb8e..17e4f20c8d39 100644 --- a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml @@ -10,6 +10,15 @@ maintainers: - Eric Anholt <[email protected]> - Stefan Wahren <[email protected]> +select: + properties: + compatible: + contains: + const: raspberrypi,bcm2835-firmware + + required: + - compatible + properties: compatible: items: diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt b/Documentation/devicetree/bindings/clock/idt,versaclock5.txt deleted file mode 100644 index 6165b6ddb1a9..000000000000 --- a/Documentation/devicetree/bindings/clock/idt,versaclock5.txt +++ /dev/null @@ -1,125 +0,0 @@ -Binding for IDT VersaClock 5,6 programmable i2c clock generators. - -The IDT VersaClock 5 and VersaClock 6 are programmable i2c clock -generators providing from 3 to 12 output clocks. - -==I2C device node== - -Required properties: -- compatible: shall be one of - "idt,5p49v5923" - "idt,5p49v5925" - "idt,5p49v5933" - "idt,5p49v5935" - "idt,5p49v6901" - "idt,5p49v6965" -- reg: i2c device address, shall be 0x68 or 0x6a. -- #clock-cells: from common clock binding; shall be set to 1. -- clocks: from common clock binding; list of parent clock handles, - - 5p49v5923 and - 5p49v5925 and - 5p49v6901: (required) either or both of XTAL or CLKIN - reference clock. - - 5p49v5933 and - - 5p49v5935: (optional) property not present (internal - Xtal used) or CLKIN reference - clock. -- clock-names: from common clock binding; clock input names, can be - - 5p49v5923 and - 5p49v5925 and - 5p49v6901: (required) either or both of "xin", "clkin". - - 5p49v5933 and - - 5p49v5935: (optional) property not present or "clkin". - -For all output ports, a corresponding, optional child node named OUT1, -OUT2, etc. can represent a each output, and the node can be used to -specify the following: - -- itd,mode: can be one of the following: - - VC5_LVPECL - - VC5_CMOS - - VC5_HCSL33 - - VC5_LVDS - - VC5_CMOS2 - - VC5_CMOSD - - VC5_HCSL25 - -- idt,voltage-microvolts: can be one of the following - - 1800000 - - 2500000 - - 3300000 -- idt,slew-percent: Percent of normal, can be one of - - 80 - - 85 - - 90 - - 100 - -==Mapping between clock specifier and physical pins== - -When referencing the provided clock in the DT using phandle and -clock specifier, the following mapping applies: - -5P49V5923: - 0 -- OUT0_SEL_I2CB - 1 -- OUT1 - 2 -- OUT2 - -5P49V5933: - 0 -- OUT0_SEL_I2CB - 1 -- OUT1 - 2 -- OUT4 - -5P49V5925 and -5P49V5935: - 0 -- OUT0_SEL_I2CB - 1 -- OUT1 - 2 -- OUT2 - 3 -- OUT3 - 4 -- OUT4 - -5P49V6901: - 0 -- OUT0_SEL_I2CB - 1 -- OUT1 - 2 -- OUT2 - 3 -- OUT3 - 4 -- OUT4 - -==Example== - -/* 25MHz reference crystal */ -ref25: ref25m { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <25000000>; -}; - -i2c-master-node { - - /* IDT 5P49V5923 i2c clock generator */ - vc5: clock-generator@6a { - compatible = "idt,5p49v5923"; - reg = <0x6a>; - #clock-cells = <1>; - - /* Connect XIN input to 25MHz reference */ - clocks = <&ref25m>; - clock-names = "xin"; - - OUT1 { - itd,mode = <VC5_CMOS>; - idt,voltage-microvolts = <1800000>; - idt,slew-percent = <80>; - }; - OUT2 { - ... - }; - ... - }; -}; - -/* Consumer referencing the 5P49V5923 pin OUT1 */ -consumer { - ... - clocks = <&vc5 1>; - ... -} diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml new file mode 100644 index 000000000000..3d4e1685cc55 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/idt,versaclock5.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Binding for IDT VersaClock 5 and 6 programmable I2C clock generators + +description: | + The IDT VersaClock 5 and VersaClock 6 are programmable I2C + clock generators providing from 3 to 12 output clocks. + + When referencing the provided clock in the DT using phandle and clock + specifier, the following mapping applies: + + - 5P49V5923: + 0 -- OUT0_SEL_I2CB + 1 -- OUT1 + 2 -- OUT2 + + - 5P49V5933: + 0 -- OUT0_SEL_I2CB + 1 -- OUT1 + 2 -- OUT4 + + - other parts: + 0 -- OUT0_SEL_I2CB + 1 -- OUT1 + 2 -- OUT2 + 3 -- OUT3 + 4 -- OUT4 + +maintainers: + - Luca Ceresoli <[email protected]> + +properties: + compatible: + enum: + - idt,5p49v5923 + - idt,5p49v5925 + - idt,5p49v5933 + - idt,5p49v5935 + - idt,5p49v6901 + - idt,5p49v6965 + + reg: + description: I2C device address + enum: [ 0x68, 0x6a ] + + '#clock-cells': + const: 1 + +patternProperties: + "^OUT[1-4]$": + type: object + description: + Description of one of the outputs (OUT1..OUT4). See "Clock1 Output + Configuration" in the Versaclock 5/6/6E Family Register Description + and Programming Guide. + properties: + idt,mode: + description: + The output drive mode. Values defined in dt-bindings/clk/versaclock.h + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 6 + idt,voltage-microvolt: + description: The output drive voltage. + enum: [ 1800000, 2500000, 3300000 ] + idt,slew-percent: + description: The Slew rate control for CMOS single-ended. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 80, 85, 90, 100 ] + +required: + - compatible + - reg + - '#clock-cells' + +allOf: + - if: + properties: + compatible: + enum: + - idt,5p49v5933 + - idt,5p49v5935 + then: + # Devices with builtin crystal + optional external input + properties: + clock-names: + const: clkin + clocks: + maxItems: 1 + else: + # Devices without builtin crystal + properties: + clock-names: + minItems: 1 + maxItems: 2 + items: + enum: [ xin, clkin ] + clocks: + minItems: 1 + maxItems: 2 + required: + - clock-names + - clocks + +examples: + - | + #include <dt-bindings/clk/versaclock.h> + + /* 25MHz reference crystal */ + ref25: ref25m { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <25000000>; + }; + + i2c@0 { + reg = <0x0 0x100>; + #address-cells = <1>; + #size-cells = <0>; + + /* IDT 5P49V5923 I2C clock generator */ + vc5: clock-generator@6a { + compatible = "idt,5p49v5923"; + reg = <0x6a>; + #clock-cells = <1>; + + /* Connect XIN input to 25MHz reference */ + clocks = <&ref25m>; + clock-names = "xin"; + + OUT1 { + idt,drive-mode = <VC5_CMOSD>; + idt,voltage-microvolts = <1800000>; + idt,slew-percent = <80>; + }; + + OUT4 { + idt,drive-mode = <VC5_LVDS>; + }; + }; + }; + + /* Consumer referencing the 5P49V5923 pin OUT1 */ + consumer { + /* ... */ + clocks = <&vc5 1>; + /* ... */ + }; + +... diff --git a/Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml index 8a0c576ba8b3..df943c4c3234 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sdm845-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml @@ -1,23 +1,31 @@ # SPDX-License-Identifier: GPL-2.0-only %YAML 1.2 --- -$id: http://devicetree.org/schemas/clock/qcom,sdm845-gpucc.yaml# +$id: http://devicetree.org/schemas/clock/qcom,gpucc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Qualcomm Graphics Clock & Reset Controller Binding for SDM845 +title: Qualcomm Graphics Clock & Reset Controller Binding maintainers: - Taniya Das <[email protected]> description: | Qualcomm graphics clock control module which supports the clocks, resets and - power domains on SDM845. + power domains on SDM845/SC7180/SM8150/SM8250. - See also dt-bindings/clock/qcom,gpucc-sdm845.h. + See also: + dt-bindings/clock/qcom,gpucc-sdm845.h + dt-bindings/clock/qcom,gpucc-sc7180.h + dt-bindings/clock/qcom,gpucc-sm8150.h + dt-bindings/clock/qcom,gpucc-sm8250.h properties: compatible: - const: qcom,sdm845-gpucc + enum: + - qcom,sdm845-gpucc + - qcom,sc7180-gpucc + - qcom,sm8150-gpucc + - qcom,sm8250-gpucc clocks: items: diff --git a/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml b/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml index d673edeed98d..a20cb10636dd 100644 --- a/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,msm8996-apcc.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only %YAML 1.2 --- -$id: http://devicetree.org/schemas/clock/qcom,kryocc.yaml# +$id: http://devicetree.org/schemas/clock/qcom,msm8996-apcc.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm clock controller for MSM8996 CPUs @@ -46,11 +46,9 @@ required: additionalProperties: false examples: - # Example for msm8996 - | kryocc: clock-controller@6400000 { compatible = "qcom,msm8996-apcc"; reg = <0x6400000 0x90000>; #clock-cells = <1>; - }; -... + }; diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml deleted file mode 100644 index fe08461fce05..000000000000 --- a/Documentation/devicetree/bindings/clock/qcom,sc7180-gpucc.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/clock/qcom,sc7180-gpucc.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Qualcomm Graphics Clock & Reset Controller Binding for SC7180 - -maintainers: - - Taniya Das <[email protected]> - -description: | - Qualcomm graphics clock control module which supports the clocks, resets and - power domains on SC7180. - - See also dt-bindings/clock/qcom,gpucc-sc7180.h. - -properties: - compatible: - const: qcom,sc7180-gpucc - - clocks: - items: - - description: Board XO source - - description: GPLL0 main branch source - - description: GPLL0 div branch source - - clock-names: - items: - - const: bi_tcxo - - const: gcc_gpu_gpll0_clk_src - - const: gcc_gpu_gpll0_div_clk_src - - '#clock-cells': - const: 1 - - '#reset-cells': - const: 1 - - '#power-domain-cells': - const: 1 - - reg: - maxItems: 1 - -required: - - compatible - - reg - - clocks - - clock-names - - '#clock-cells' - - '#reset-cells' - - '#power-domain-cells' - -additionalProperties: false - -examples: - - | - #include <dt-bindings/clock/qcom,gcc-sc7180.h> - #include <dt-bindings/clock/qcom,rpmh.h> - clock-controller@5090000 { - compatible = "qcom,sc7180-gpucc"; - reg = <0x05090000 0x9000>; - clocks = <&rpmhcc RPMH_CXO_CLK>, - <&gcc GCC_GPU_GPLL0_CLK_SRC>, - <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>; - clock-names = "bi_tcxo", - "gcc_gpu_gpll0_clk_src", - "gcc_gpu_gpll0_div_clk_src"; - #clock-cells = <1>; - #reset-cells = <1>; - #power-domain-cells = <1>; - }; -... diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml new file mode 100644 index 000000000000..c54172fbf29f --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sc7180-lpasscorecc.yaml @@ -0,0 +1,108 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sc7180-lpasscorecc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm LPASS Core Clock Controller Binding for SC7180 + +maintainers: + - Taniya Das <[email protected]> + +description: | + Qualcomm LPASS core clock control module which supports the clocks and + power domains on SC7180. + + See also: + - dt-bindings/clock/qcom,lpasscorecc-sc7180.h + +properties: + compatible: + enum: + - qcom,sc7180-lpasshm + - qcom,sc7180-lpasscorecc + + clocks: + items: + - description: gcc_lpass_sway clock from GCC + - description: Board XO source + + clock-names: + items: + - const: iface + - const: bi_tcxo + + power-domains: + maxItems: 1 + + '#clock-cells': + const: 1 + + '#power-domain-cells': + const: 1 + + reg: + minItems: 1 + items: + - description: lpass core cc register + - description: lpass audio cc register + + reg-names: + items: + - const: lpass_core_cc + - const: lpass_audio_cc + +if: + properties: + compatible: + contains: + const: qcom,sc7180-lpasshm +then: + properties: + reg: + maxItems: 1 + +else: + properties: + reg: + minItems: 2 + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + - '#power-domain-cells' + +additionalProperties: false + +examples: + - | + #include <dt-bindings/clock/qcom,rpmh.h> + #include <dt-bindings/clock/qcom,gcc-sc7180.h> + #include <dt-bindings/clock/qcom,lpasscorecc-sc7180.h> + clock-controller@63000000 { + compatible = "qcom,sc7180-lpasshm"; + reg = <0x63000000 0x28>; + clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>, <&rpmhcc RPMH_CXO_CLK>; + clock-names = "iface", "bi_tcxo"; + #clock-cells = <1>; + #power-domain-cells = <1>; + }; + + - | + #include <dt-bindings/clock/qcom,rpmh.h> + #include <dt-bindings/clock/qcom,gcc-sc7180.h> + #include <dt-bindings/clock/qcom,lpasscorecc-sc7180.h> + clock-controller@62d00000 { + compatible = "qcom,sc7180-lpasscorecc"; + reg = <0x62d00000 0x50000>, <0x62780000 0x30000>; + reg-names = "lpass_core_cc", "lpass_audio_cc"; + clocks = <&gcc GCC_LPASS_CFG_NOC_SWAY_CLK>, <&rpmhcc RPMH_CXO_CLK>; + clock-names = "iface", "bi_tcxo"; + power-domains = <&lpass_hm LPASS_CORE_HM_GDSCR>; + #clock-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt index 8cb47c39ba53..bf3a9ec19241 100644 --- a/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt +++ b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt @@ -4,9 +4,15 @@ The RK3288 clock controller generates and supplies clock to various controllers within the SoC and also implements a reset controller for SoC peripherals. +A revision of this SoC is available: rk3288w. The clock tree is a bit +different so another dt-compatible is available. Noticed that it is only +setting the difference but there is no automatic revision detection. This +should be performed by bootloaders. + Required Properties: -- compatible: should be "rockchip,rk3288-cru" +- compatible: should be "rockchip,rk3288-cru" or "rockchip,rk3288w-cru" in + case of this revision of Rockchip rk3288. - reg: physical base address of the controller and length of memory mapped region. - #clock-cells: should be 1. diff --git a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt index 88b71c1b32c9..7f0194fdd0cc 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt @@ -14,6 +14,7 @@ Required properties: "mediatek,mt7629-i2c", "mediatek,mt2712-i2c": for MediaTek MT7629 "mediatek,mt8173-i2c": for MediaTek MT8173 "mediatek,mt8183-i2c": for MediaTek MT8183 + "mediatek,mt8192-i2c": for MediaTek MT8192 "mediatek,mt8516-i2c", "mediatek,mt2712-i2c": for MediaTek MT8516 - reg: physical base address of the controller and dma base, length of memory mapped region. diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt index 438ae123107e..a21c359b9f02 100644 --- a/Documentation/devicetree/bindings/i2c/i2c.txt +++ b/Documentation/devicetree/bindings/i2c/i2c.txt @@ -72,6 +72,16 @@ wants to support one of the below features, it should adapt these bindings. this information to adapt power management to keep the arbitration awake all the time, for example. Can not be combined with 'single-master'. +- pinctrl + add extra pinctrl to configure SCL/SDA pins to GPIO function for bus + recovery, call it "gpio" or "recovery" (deprecated) state + +- scl-gpios + specify the gpio related to SCL pin. Used for GPIO bus recovery. + +- sda-gpios + specify the gpio related to SDA pin. Optional for GPIO bus recovery. + - single-master states that there is no other master active on this bus. The OS can use this information to detect a stalled bus more reliably, for example. diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml index 2ceb05ba2df5..5b5ae402f97a 100644 --- a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml @@ -27,6 +27,9 @@ properties: - const: allwinner,sun50i-a64-i2c - const: allwinner,sun6i-a31-i2c - items: + - const: allwinner,sun50i-a100-i2c + - const: allwinner,sun6i-a31-i2c + - items: - const: allwinner,sun50i-h6-i2c - const: allwinner,sun6i-a31-i2c diff --git a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt index a03f9f5cb378..96d869ac3839 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,i2c.txt +++ b/Documentation/devicetree/bindings/i2c/renesas,i2c.txt @@ -10,6 +10,7 @@ Required properties: "renesas,i2c-r8a774a1" if the device is a part of a R8A774A1 SoC. "renesas,i2c-r8a774b1" if the device is a part of a R8A774B1 SoC. "renesas,i2c-r8a774c0" if the device is a part of a R8A774C0 SoC. + "renesas,i2c-r8a774e1" if the device is a part of a R8A774E1 SoC. "renesas,i2c-r8a7778" if the device is a part of a R8A7778 SoC. "renesas,i2c-r8a7779" if the device is a part of a R8A7779 SoC. "renesas,i2c-r8a7790" if the device is a part of a R8A7790 SoC. diff --git a/Documentation/devicetree/bindings/i2c/renesas,iic.txt b/Documentation/devicetree/bindings/i2c/renesas,iic.txt index 89facb09337a..93d412832e66 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,iic.txt +++ b/Documentation/devicetree/bindings/i2c/renesas,iic.txt @@ -11,6 +11,7 @@ Required properties: - "renesas,iic-r8a774a1" (RZ/G2M) - "renesas,iic-r8a774b1" (RZ/G2N) - "renesas,iic-r8a774c0" (RZ/G2E) + - "renesas,iic-r8a774e1" (RZ/G2H) - "renesas,iic-r8a7790" (R-Car H2) - "renesas,iic-r8a7791" (R-Car M2-W) - "renesas,iic-r8a7792" (R-Car V2H) diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml index 18cb456752f6..c7d14de214c4 100644 --- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml @@ -52,6 +52,8 @@ properties: - nxp,pcf2127 # Real-time clock - nxp,pcf2129 + # Real-time clock + - nxp,pca2129 # Real-time Clock Module - pericom,pt7c4338 # I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC diff --git a/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt index e60b9a13bdcb..aa10b8ec36e2 100644 --- a/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt @@ -11,8 +11,8 @@ Optional properties: See clock-bindings.txt Documentation: -Davinci DM646x - http://www.ti.com/lit/ug/spruer5b/spruer5b.pdf -Keystone - http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf +Davinci DM646x - https://www.ti.com/lit/ug/spruer5b/spruer5b.pdf +Keystone - https://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf Examples: diff --git a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt deleted file mode 100644 index eb0914420c7c..000000000000 --- a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt +++ /dev/null @@ -1,24 +0,0 @@ -Synopsys Designware Watchdog Timer - -Required Properties: - -- compatible : Should contain "snps,dw-wdt" -- reg : Base address and size of the watchdog timer registers. -- clocks : phandle + clock-specifier for the clock that drives the - watchdog timer. - -Optional Properties: - -- interrupts : The interrupt used for the watchdog timeout warning. -- resets : phandle pointing to the system reset controller with - line index for the watchdog. - -Example: - - watchdog0: wd@ffd02000 { - compatible = "snps,dw-wdt"; - reg = <0xffd02000 0x1000>; - interrupts = <0 171 4>; - clocks = <&per_base_clk>; - resets = <&rst WDT0_RESET>; - }; diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt deleted file mode 100644 index 41aeaa2ff0f8..000000000000 --- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt +++ /dev/null @@ -1,28 +0,0 @@ -Qualcomm Krait Processor Sub-system (KPSS) Watchdog ---------------------------------------------------- - -Required properties : -- compatible : shall contain only one of the following: - - "qcom,kpss-wdt-msm8960" - "qcom,kpss-wdt-apq8064" - "qcom,kpss-wdt-ipq8064" - "qcom,kpss-wdt-ipq4019" - "qcom,kpss-timer" - "qcom,scss-timer" - "qcom,kpss-wdt" - -- reg : shall contain base register location and length -- clocks : shall contain the input clock - -Optional properties : -- timeout-sec : shall contain the default watchdog timeout in seconds, - if unset, the default timeout is 30 seconds - -Example: - watchdog@208a038 { - compatible = "qcom,kpss-wdt-ipq8064"; - reg = <0x0208a038 0x40>; - clocks = <&sleep_clk>; - timeout-sec = <10>; - }; diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml new file mode 100644 index 000000000000..0709ddf0b6a5 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/qcom-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Krait Processor Sub-system (KPSS) Watchdog timer + +maintainers: + - Sai Prakash Ranjan <[email protected]> + +allOf: + - $ref: watchdog.yaml# + +properties: + compatible: + enum: + - qcom,apss-wdt-qcs404 + - qcom,apss-wdt-sc7180 + - qcom,apss-wdt-sdm845 + - qcom,apss-wdt-sm8150 + - qcom,kpss-timer + - qcom,kpss-wdt + - qcom,kpss-wdt-apq8064 + - qcom,kpss-wdt-ipq4019 + - qcom,kpss-wdt-ipq8064 + - qcom,kpss-wdt-msm8960 + - qcom,scss-timer + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - clocks + +examples: + - | + watchdog@208a038 { + compatible = "qcom,kpss-wdt-ipq8064"; + reg = <0x0208a038 0x40>; + clocks = <&sleep_clk>; + timeout-sec = <10>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml index 572f4c912fef..6933005b52bd 100644 --- a/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml @@ -41,6 +41,7 @@ properties: - renesas,r8a774a1-wdt # RZ/G2M - renesas,r8a774b1-wdt # RZ/G2N - renesas,r8a774c0-wdt # RZ/G2E + - renesas,r8a774e1-wdt # RZ/G2H - renesas,r8a7795-wdt # R-Car H3 - renesas,r8a7796-wdt # R-Car M3-W - renesas,r8a77961-wdt # R-Car M3-W+ diff --git a/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml new file mode 100644 index 000000000000..d9fc7bb851b1 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml @@ -0,0 +1,90 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/watchdog/snps,dw-wdt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Synopsys Designware Watchdog Timer + +allOf: + - $ref: "watchdog.yaml#" + +maintainers: + - Jamie Iles <[email protected]> + +properties: + compatible: + const: snps,dw-wdt + + reg: + maxItems: 1 + + interrupts: + description: DW Watchdog pre-timeout interrupt + maxItems: 1 + + clocks: + minItems: 1 + items: + - description: Watchdog timer reference clock + - description: APB3 interface clock + + clock-names: + minItems: 1 + items: + - const: tclk + - const: pclk + + resets: + description: Phandle to the DW Watchdog reset lane + maxItems: 1 + + snps,watchdog-tops: + $ref: /schemas/types.yaml#/definitions/uint32-array + description: | + DW APB Watchdog custom timer intervals - Timeout Period ranges (TOPs). + Each TOP is a number loaded into the watchdog counter at the moment of + the timer restart. The counter decrementing happens each tick of the + reference clock. Therefore the TOPs array is equivalent to an array of + the timer expiration intervals supported by the DW APB Watchdog. Note + DW APB Watchdog IP-core might be synthesized with fixed TOP values, + in which case this property is unnecessary with default TOPs utilized. + default: [0x0001000 0x0002000 0x0004000 0x0008000 + 0x0010000 0x0020000 0x0040000 0x0080000 + 0x0100000 0x0200000 0x0400000 0x0800000 + 0x1000000 0x2000000 0x4000000 0x8000000] + minItems: 16 + maxItems: 16 + +unevaluatedProperties: false + +required: + - compatible + - reg + - clocks + +examples: + - | + watchdog@ffd02000 { + compatible = "snps,dw-wdt"; + reg = <0xffd02000 0x1000>; + interrupts = <0 171 4>; + clocks = <&per_base_clk>; + resets = <&wdt_rst>; + }; + + - | + watchdog@ffd02000 { + compatible = "snps,dw-wdt"; + reg = <0xffd02000 0x1000>; + interrupts = <0 171 4>; + clocks = <&per_base_clk>; + clock-names = "tclk"; + snps,watchdog-tops = <0x000000FF 0x000001FF 0x000003FF + 0x000007FF 0x0000FFFF 0x0001FFFF + 0x0003FFFF 0x0007FFFF 0x000FFFFF + 0x001FFFFF 0x003FFFFF 0x007FFFFF + 0x00FFFFFF 0x01FFFFFF 0x03FFFFFF + 0x07FFFFFF>; + }; +... diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt index b7e4f3608838..2db76807ec6f 100644 --- a/Documentation/features/debug/kmemleak/arch-support.txt +++ b/Documentation/features/debug/kmemleak/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | ok | | sparc: | ok | diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt index 3db4763aa3f5..3329559c8207 100644 --- a/Documentation/features/debug/stackprotector/arch-support.txt +++ b/Documentation/features/debug/stackprotector/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | ok | | sparc: | TODO | diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt index 4f844ecd0680..940b0bd02957 100644 --- a/Documentation/features/locking/lockdep/arch-support.txt +++ b/Documentation/features/locking/lockdep/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | ok | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | ok | | sparc: | ok | diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt index a71f3a945285..266c81e8a721 100644 --- a/Documentation/features/time/context-tracking/arch-support.txt +++ b/Documentation/features/time/context-tracking/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | TODO | | sparc: | ok | diff --git a/Documentation/filesystems/journalling.rst b/Documentation/filesystems/journalling.rst index 58ce6b395206..7e2be2faf653 100644 --- a/Documentation/filesystems/journalling.rst +++ b/Documentation/filesystems/journalling.rst @@ -10,27 +10,27 @@ Details The journalling layer is easy to use. You need to first of all create a journal_t data structure. There are two calls to do this dependent on how you decide to allocate the physical media on which the journal -resides. The :c:func:`jbd2_journal_init_inode` call is for journals stored in -filesystem inodes, or the :c:func:`jbd2_journal_init_dev` call can be used +resides. The jbd2_journal_init_inode() call is for journals stored in +filesystem inodes, or the jbd2_journal_init_dev() call can be used for journal stored on a raw device (in a continuous range of blocks). A journal_t is a typedef for a struct pointer, so when you are finally -finished make sure you call :c:func:`jbd2_journal_destroy` on it to free up +finished make sure you call jbd2_journal_destroy() on it to free up any used kernel memory. Once you have got your journal_t object you need to 'mount' or load the journal file. The journalling layer expects the space for the journal was already allocated and initialized properly by the userspace tools. -When loading the journal you must call :c:func:`jbd2_journal_load` to process +When loading the journal you must call jbd2_journal_load() to process journal contents. If the client file system detects the journal contents does not need to be processed (or even need not have valid contents), it -may call :c:func:`jbd2_journal_wipe` to clear the journal contents before -calling :c:func:`jbd2_journal_load`. +may call jbd2_journal_wipe() to clear the journal contents before +calling jbd2_journal_load(). Note that jbd2_journal_wipe(..,0) calls -:c:func:`jbd2_journal_skip_recovery` for you if it detects any outstanding -transactions in the journal and similarly :c:func:`jbd2_journal_load` will -call :c:func:`jbd2_journal_recover` if necessary. I would advise reading -:c:func:`ext4_load_journal` in fs/ext4/super.c for examples on this stage. +jbd2_journal_skip_recovery() for you if it detects any outstanding +transactions in the journal and similarly jbd2_journal_load() will +call jbd2_journal_recover() if necessary. I would advise reading +ext4_load_journal() in fs/ext4/super.c for examples on this stage. Now you can go ahead and start modifying the underlying filesystem. Almost. @@ -39,57 +39,57 @@ You still need to actually journal your filesystem changes, this is done by wrapping them into transactions. Additionally you also need to wrap the modification of each of the buffers with calls to the journal layer, so it knows what the modifications you are actually making are. To do -this use :c:func:`jbd2_journal_start` which returns a transaction handle. +this use jbd2_journal_start() which returns a transaction handle. -:c:func:`jbd2_journal_start` and its counterpart :c:func:`jbd2_journal_stop`, +jbd2_journal_start() and its counterpart jbd2_journal_stop(), which indicates the end of a transaction are nestable calls, so you can reenter a transaction if necessary, but remember you must call -:c:func:`jbd2_journal_stop` the same number of times as -:c:func:`jbd2_journal_start` before the transaction is completed (or more +jbd2_journal_stop() the same number of times as +jbd2_journal_start() before the transaction is completed (or more accurately leaves the update phase). Ext4/VFS makes use of this feature to simplify handling of inode dirtying, quota support, etc. Inside each transaction you need to wrap the modifications to the individual buffers (blocks). Before you start to modify a buffer you -need to call :c:func:`jbd2_journal_get_create_access()` / -:c:func:`jbd2_journal_get_write_access()` / -:c:func:`jbd2_journal_get_undo_access()` as appropriate, this allows the +need to call jbd2_journal_get_create_access() / +jbd2_journal_get_write_access() / +jbd2_journal_get_undo_access() as appropriate, this allows the journalling layer to copy the unmodified data if it needs to. After all the buffer may be part of a previously uncommitted transaction. At this point you are at last ready to modify a buffer, and once you are have done so you need to call -:c:func:`jbd2_journal_dirty_metadata`. Or if you've asked for access to a +jbd2_journal_dirty_metadata(). Or if you've asked for access to a buffer you now know is now longer required to be pushed back on the -device you can call :c:func:`jbd2_journal_forget` in much the same way as you -might have used :c:func:`bforget` in the past. +device you can call jbd2_journal_forget() in much the same way as you +might have used bforget() in the past. -A :c:func:`jbd2_journal_flush` may be called at any time to commit and +A jbd2_journal_flush() may be called at any time to commit and checkpoint all your transactions. -Then at umount time , in your :c:func:`put_super` you can then call -:c:func:`jbd2_journal_destroy` to clean up your in-core journal object. +Then at umount time , in your put_super() you can then call +jbd2_journal_destroy() to clean up your in-core journal object. Unfortunately there a couple of ways the journal layer can cause a deadlock. The first thing to note is that each task can only have a single outstanding transaction at any one time, remember nothing commits -until the outermost :c:func:`jbd2_journal_stop`. This means you must complete +until the outermost jbd2_journal_stop(). This means you must complete the transaction at the end of each file/inode/address etc. operation you perform, so that the journalling system isn't re-entered on another journal. Since transactions can't be nested/batched across differing journals, and another filesystem other than yours (say ext4) may be modified in a later syscall. -The second case to bear in mind is that :c:func:`jbd2_journal_start` can block +The second case to bear in mind is that jbd2_journal_start() can block if there isn't enough space in the journal for your transaction (based on the passed nblocks param) - when it blocks it merely(!) needs to wait for transactions to complete and be committed from other tasks, so -essentially we are waiting for :c:func:`jbd2_journal_stop`. So to avoid -deadlocks you must treat :c:func:`jbd2_journal_start` / -:c:func:`jbd2_journal_stop` as if they were semaphores and include them in +essentially we are waiting for jbd2_journal_stop(). So to avoid +deadlocks you must treat jbd2_journal_start() / +jbd2_journal_stop() as if they were semaphores and include them in your semaphore ordering rules to prevent -deadlocks. Note that :c:func:`jbd2_journal_extend` has similar blocking -behaviour to :c:func:`jbd2_journal_start` so you can deadlock here just as -easily as on :c:func:`jbd2_journal_start`. +deadlocks. Note that jbd2_journal_extend() has similar blocking +behaviour to jbd2_journal_start() so you can deadlock here just as +easily as on jbd2_journal_start(). Try to reserve the right number of blocks the first time. ;-). This will be the maximum number of blocks you are going to touch in this @@ -116,8 +116,8 @@ called after each transaction commit. You can also use that need processing when the transaction commits. JBD2 also provides a way to block all transaction updates via -:c:func:`jbd2_journal_lock_updates()` / -:c:func:`jbd2_journal_unlock_updates()`. Ext4 uses this when it wants a +jbd2_journal_lock_updates() / +jbd2_journal_unlock_updates(). Ext4 uses this when it wants a window with a clean and stable fs for a moment. E.g. :: diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index e024a9efffd8..533c79e8d2cd 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -1633,9 +1633,6 @@ may allocate from based on an estimation of its current memory and swap use. For example, if a task is using all allowed memory, its badness score will be 1000. If it is using half of its allowed memory, its score will be 500. -There is an additional factor included in the badness score: the current memory -and swap usage is discounted by 3% for root processes. - The amount of "allowed" memory depends on the context in which the oom killer was called. If it is due to the memory assigned to the allocating task's cpuset being exhausted, the allowed memory represents the set of mems assigned to that @@ -1671,11 +1668,6 @@ The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last value set by a CAP_SYS_RESOURCE process. To reduce the value any lower requires CAP_SYS_RESOURCE. -Caveat: when a parent task is selected, the oom killer will sacrifice any first -generation children with separate address spaces instead, if possible. This -avoids servers and important system daemons from being killed and loses the -minimal amount of work. - 3.2 /proc/<pid>/oom_score - Display current oom-killer score ------------------------------------------------------------- @@ -1684,6 +1676,9 @@ This file can be used to check the current score used by the oom-killer for any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which process should be killed in an out-of-memory situation. +Please note that the exported value includes oom_score_adj so it is +effectively in range [0,2000]. + 3.3 /proc/<pid>/io - Display the IO accounting fields ------------------------------------------------------- diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst index b83da0e94184..faf32330c335 100644 --- a/Documentation/i2c/busses/i2c-i801.rst +++ b/Documentation/i2c/busses/i2c-i801.rst @@ -43,6 +43,7 @@ Supported adapters: * Intel Elkhart Lake (PCH) * Intel Tiger Lake (PCH) * Intel Jasper Lake (SOC) + * Intel Emmitsburg (PCH) Datasheets: Publicly available at the Intel website diff --git a/Documentation/i2c/dev-interface.rst b/Documentation/i2c/dev-interface.rst index bdb247f2f11a..73ad34849f99 100644 --- a/Documentation/i2c/dev-interface.rst +++ b/Documentation/i2c/dev-interface.rst @@ -159,6 +159,8 @@ for details) through the following functions:: __s32 i2c_smbus_read_word_data(int file, __u8 command); __s32 i2c_smbus_write_word_data(int file, __u8 command, __u16 value); __s32 i2c_smbus_process_call(int file, __u8 command, __u16 value); + __s32 i2c_smbus_block_process_call(int file, __u8 command, __u8 length, + __u8 *values); __s32 i2c_smbus_read_block_data(int file, __u8 command, __u8 *values); __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length, __u8 *values); diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst index fee4744475df..8a2ad3845191 100644 --- a/Documentation/i2c/index.rst +++ b/Documentation/i2c/index.rst @@ -62,7 +62,6 @@ Legacy documentation .. toctree:: :maxdepth: 1 - upgrading-clients old-module-parameters .. only:: subproject and html diff --git a/Documentation/i2c/upgrading-clients.rst b/Documentation/i2c/upgrading-clients.rst deleted file mode 100644 index 1708090d7b8f..000000000000 --- a/Documentation/i2c/upgrading-clients.rst +++ /dev/null @@ -1,285 +0,0 @@ -================================================= -Upgrading I2C Drivers to the new 2.6 Driver Model -================================================= - -Ben Dooks <[email protected]> - -Introduction ------------- - -This guide outlines how to alter existing Linux 2.6 client drivers from -the old to the new binding methods. - - -Example old-style driver ------------------------- - -:: - - struct example_state { - struct i2c_client client; - .... - }; - - static struct i2c_driver example_driver; - - static unsigned short ignore[] = { I2C_CLIENT_END }; - static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END }; - - I2C_CLIENT_INSMOD; - - static int example_attach(struct i2c_adapter *adap, int addr, int kind) - { - struct example_state *state; - struct device *dev = &adap->dev; /* to use for dev_ reports */ - int ret; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - example->client.addr = addr; - example->client.flags = 0; - example->client.adapter = adap; - - i2c_set_clientdata(&state->i2c_client, state); - strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name)); - - ret = i2c_attach_client(&state->i2c_client); - if (ret < 0) { - dev_err(dev, "failed to attach client\n"); - kfree(state); - return ret; - } - - dev = &state->i2c_client.dev; - - /* rest of the initialisation goes here. */ - - dev_info(dev, "example client created\n"); - - return 0; - } - - static int example_detach(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - i2c_detach_client(client); - kfree(state); - return 0; - } - - static int example_attach_adapter(struct i2c_adapter *adap) - { - return i2c_probe(adap, &addr_data, example_attach); - } - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - .pm = &example_pm_ops, - }, - .attach_adapter = example_attach_adapter, - .detach_client = example_detach, - }; - - -Updating the client -------------------- - -The new style binding model will check against a list of supported -devices and their associated address supplied by the code registering -the busses. This means that the driver .attach_adapter and -.detach_client methods can be removed, along with the addr_data, -as follows:: - - - static struct i2c_driver example_driver; - - - static unsigned short ignore[] = { I2C_CLIENT_END }; - - static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END }; - - - I2C_CLIENT_INSMOD; - - - static int example_attach_adapter(struct i2c_adapter *adap) - - { - - return i2c_probe(adap, &addr_data, example_attach); - - } - - static struct i2c_driver example_driver = { - - .attach_adapter = example_attach_adapter, - - .detach_client = example_detach, - } - -Add the probe and remove methods to the i2c_driver, as so:: - - static struct i2c_driver example_driver = { - + .probe = example_probe, - + .remove = example_remove, - } - -Change the example_attach method to accept the new parameters -which include the i2c_client that it will be working with:: - - - static int example_attach(struct i2c_adapter *adap, int addr, int kind) - + static int example_probe(struct i2c_client *client, - + const struct i2c_device_id *id) - -Change the name of example_attach to example_probe to align it with the -i2c_driver entry names. The rest of the probe routine will now need to be -changed as the i2c_client has already been setup for use. - -The necessary client fields have already been setup before -the probe function is called, so the following client setup -can be removed:: - - - example->client.addr = addr; - - example->client.flags = 0; - - example->client.adapter = adap; - - - - strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name)); - -The i2c_set_clientdata is now:: - - - i2c_set_clientdata(&state->client, state); - + i2c_set_clientdata(client, state); - -The call to i2c_attach_client is no longer needed, if the probe -routine exits successfully, then the driver will be automatically -attached by the core. Change the probe routine as so:: - - - ret = i2c_attach_client(&state->i2c_client); - - if (ret < 0) { - - dev_err(dev, "failed to attach client\n"); - - kfree(state); - - return ret; - - } - - -Remove the storage of 'struct i2c_client' from the 'struct example_state' -as we are provided with the i2c_client in our example_probe. Instead we -store a pointer to it for when it is needed. - -:: - - struct example_state { - - struct i2c_client client; - + struct i2c_client *client; - -the new i2c client as so:: - - - struct device *dev = &adap->dev; /* to use for dev_ reports */ - + struct device *dev = &i2c_client->dev; /* to use for dev_ reports */ - -And remove the change after our client is attached, as the driver no -longer needs to register a new client structure with the core:: - - - dev = &state->i2c_client.dev; - -In the probe routine, ensure that the new state has the client stored -in it:: - - static int example_probe(struct i2c_client *i2c_client, - const struct i2c_device_id *id) - { - struct example_state *state; - struct device *dev = &i2c_client->dev; - int ret; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - + state->client = i2c_client; - -Update the detach method, by changing the name to _remove and -to delete the i2c_detach_client call. It is possible that you -can also remove the ret variable as it is not needed for any -of the core functions. - -:: - - - static int example_detach(struct i2c_client *client) - + static int example_remove(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - - i2c_detach_client(client); - -And finally ensure that we have the correct ID table for the i2c-core -and other utilities:: - - + struct i2c_device_id example_idtable[] = { - + { "example", 0 }, - + { } - +}; - + - +MODULE_DEVICE_TABLE(i2c, example_idtable); - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - }, - + .id_table = example_ids, - - -Our driver should now look like this:: - - struct example_state { - struct i2c_client *client; - .... - }; - - static int example_probe(struct i2c_client *client, - const struct i2c_device_id *id) - { - struct example_state *state; - struct device *dev = &client->dev; - - state = kzalloc(sizeof(struct example_state), GFP_KERNEL); - if (state == NULL) { - dev_err(dev, "failed to create our state\n"); - return -ENOMEM; - } - - state->client = client; - i2c_set_clientdata(client, state); - - /* rest of the initialisation goes here. */ - - dev_info(dev, "example client created\n"); - - return 0; - } - - static int example_remove(struct i2c_client *client) - { - struct example_state *state = i2c_get_clientdata(client); - - kfree(state); - return 0; - } - - static struct i2c_device_id example_idtable[] = { - { "example", 0 }, - { } - }; - - MODULE_DEVICE_TABLE(i2c, example_idtable); - - static struct i2c_driver example_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "example", - .pm = &example_pm_ops, - }, - .id_table = example_idtable, - .probe = example_probe, - .remove = example_remove, - }; diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index 1b577a8bf982..4cefed8048ca 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -10,7 +10,7 @@ Introduction ============ The kernel provides a variety of locking primitives which can be divided -into two categories: +into three categories: - Sleeping locks - CPU local locks diff --git a/Documentation/powerpc/ultravisor.rst b/Documentation/powerpc/ultravisor.rst index df136c8f91fa..ba6b1bf1cc44 100644 --- a/Documentation/powerpc/ultravisor.rst +++ b/Documentation/powerpc/ultravisor.rst @@ -895,6 +895,7 @@ Return values One of the following values: * H_SUCCESS on success. + * H_STATE if the VM is not in a position to switch to secure. Description ~~~~~~~~~~~ @@ -933,6 +934,8 @@ Return values * H_UNSUPPORTED if called from the wrong context (e.g. from an SVM or before an H_SVM_INIT_START hypercall). + * H_STATE if the hypervisor could not successfully + transition the VM to Secure VM. Description ~~~~~~~~~~~ diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst index 3588f48841eb..4ae1e0f600c1 100644 --- a/Documentation/process/2.Process.rst +++ b/Documentation/process/2.Process.rst @@ -113,7 +113,7 @@ than one development cycle past their initial release. So, for example, the 5.2 kernel's history looked like this (all dates in 2019): ============== =============================== - September 15 5.2 stable release + July 7 5.2 stable release July 14 5.2.1 July 21 5.2.2 July 26 5.2.3 diff --git a/Documentation/trace/intel_th.rst b/Documentation/trace/intel_th.rst index 70b7126eaeeb..b31818d5f6c5 100644 --- a/Documentation/trace/intel_th.rst +++ b/Documentation/trace/intel_th.rst @@ -58,7 +58,7 @@ Bus and Subdevices For each Intel TH device in the system a bus of its own is created and assigned an id number that reflects the order in which TH -devices were emumerated. All TH subdevices (devices on intel_th bus) +devices were enumerated. All TH subdevices (devices on intel_th bus) begin with this id: 0-gth, 0-msc0, 0-msc1, 0-pti, 0-sth, which is followed by device's name and an optional index. diff --git a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst index 0116d0477799..c972731c0e57 100644 --- a/Documentation/translations/zh_CN/admin-guide/cpu-load.rst +++ b/Documentation/translations/zh_CN/admin-guide/cpu-load.rst @@ -1,6 +1,6 @@ -======= +======== CPU 负载 -======= +======== Linux通过``/proc/stat``和``/proc/uptime``导出各种信息,用户空间工具 如top(1)使用这些信息计算系统花费在某个特定状态的平均时间。 diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst index 7d502fa5da64..ed5ab7e37f38 100644 --- a/Documentation/translations/zh_CN/admin-guide/index.rst +++ b/Documentation/translations/zh_CN/admin-guide/index.rst @@ -1,6 +1,6 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :ref:`Documentation/admin-guide/index.rst` +:Original: :doc:`../../../admin-guide/index` :Translator: Alex Shi <[email protected]> diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst index 1d6cd7db4e43..68883ac485fa 100644 --- a/Documentation/vm/page_migration.rst +++ b/Documentation/vm/page_migration.rst @@ -253,5 +253,32 @@ which are function pointers of struct address_space_operations. PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag for own purpose. +Monitoring Migration +===================== + +The following events (counters) can be used to monitor page migration. + +1. PGMIGRATE_SUCCESS: Normal page migration success. Each count means that a + page was migrated. If the page was a non-THP page, then this counter is + increased by one. If the page was a THP, then this counter is increased by + the number of THP subpages. For example, migration of a single 2MB THP that + has 4KB-size base pages (subpages) will cause this counter to increase by + 512. + +2. PGMIGRATE_FAIL: Normal page migration failure. Same counting rules as for + _SUCCESS, above: this will be increased by the number of subpages, if it was + a THP. + +3. THP_MIGRATION_SUCCESS: A THP was migrated without being split. + +4. THP_MIGRATION_FAIL: A THP could not be migrated nor it could be split. + +5. THP_MIGRATION_SPLIT: A THP was migrated, but not as such: first, the THP had + to be split. After splitting, a migration retry was used for it's sub-pages. + +THP_MIGRATION_* events also update the appropriate PGMIGRATE_SUCCESS or +PGMIGRATE_FAIL events. For example, a THP migration failure will cause both +THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase. + Christoph Lameter, May 8, 2006. Minchan Kim, Mar 28, 2016. diff --git a/Documentation/watchdog/mlx-wdt.rst b/Documentation/watchdog/mlx-wdt.rst index bf5bafac47f0..35e690dea9db 100644 --- a/Documentation/watchdog/mlx-wdt.rst +++ b/Documentation/watchdog/mlx-wdt.rst @@ -24,10 +24,19 @@ Type 2: Maximum timeout is 255 sec. Get time-left is supported. +Type 3: + Same as Type 2 with extended maximum timeout period. + Maximum timeout is 65535 sec. + Type 1 HW watchdog implementation exist in old systems and all new systems have type 2 HW watchdog. Two types of HW implementation have also different register map. +Type 3 HW watchdog implementation can exist on all Mellanox systems +with new programmer logic device. +It's differentiated by WD capability bit. +Old systems still have only one main watchdog. + Mellanox system can have 2 watchdogs: main and auxiliary. Main and auxiliary watchdog devices can be enabled together on the same system. @@ -54,3 +63,4 @@ The driver checks during initialization if the previous system reset was done by the watchdog. If yes, it makes a notification about this event. Access to HW registers is performed through a generic regmap interface. +Programmable logic device registers have little-endian order. diff --git a/Documentation/watchdog/watchdog-api.rst b/Documentation/watchdog/watchdog-api.rst index c6c1e9fa9f73..800dcd7586f2 100644 --- a/Documentation/watchdog/watchdog-api.rst +++ b/Documentation/watchdog/watchdog-api.rst @@ -168,7 +168,7 @@ the fields returned in the ident struct are: the options field can have the following bits set, and describes what kind of information that the GET_STATUS and GET_BOOT_STATUS ioctls can -return. [FIXME -- Is this correct?] +return. ================ ========================= WDIOF_OVERHEAT Reset due to CPU overheat diff --git a/Documentation/watchdog/watchdog-kernel-api.rst b/Documentation/watchdog/watchdog-kernel-api.rst index 068a55ee0d4a..baf44e986b07 100644 --- a/Documentation/watchdog/watchdog-kernel-api.rst +++ b/Documentation/watchdog/watchdog-kernel-api.rst @@ -336,3 +336,15 @@ an action is taken by a preconfigured pretimeout governor preassigned to the watchdog device. If watchdog pretimeout governor framework is not enabled, watchdog_notify_pretimeout() prints a notification message to the kernel log buffer. + +To set the last known HW keepalive time for a watchdog, the following function +should be used:: + + int watchdog_set_last_hw_keepalive(struct watchdog_device *wdd, + unsigned int last_ping_ms) + +This function must be called immediately after watchdog registration. It +sets the last known hardware heartbeat to have happened last_ping_ms before +current time. Calling this is only needed if the watchdog is already running +when probe is called, and the watchdog can only be pinged after the +min_hw_heartbeat_ms time has passed from the last ping. diff --git a/MAINTAINERS b/MAINTAINERS index e627ed60d75a..2843effadff8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1496,7 +1496,7 @@ ARM SMC WATCHDOG DRIVER M: Julius Werner <[email protected]> R: Evan Benn <[email protected]> S: Maintained -F: devicetree/bindings/watchdog/arm-smc-wdt.yaml +F: Documentation/devicetree/bindings/watchdog/arm-smc-wdt.yaml F: drivers/watchdog/arm_smc_wdt.c ARM SMMU DRIVERS @@ -1540,6 +1540,7 @@ F: drivers/mmc/host/owl-mmc.c F: drivers/pinctrl/actions/* F: drivers/soc/actions/ F: include/dt-bindings/power/owl-* +F: include/dt-bindings/reset/actions,* F: include/linux/soc/actions/ N: owl @@ -8409,8 +8410,9 @@ W: https://github.com/o2genum/ideapad-slidebar F: drivers/input/misc/ideapad_slidebar.c IDT VersaClock 5 CLOCK DRIVER -M: Marek Vasut <[email protected]> +M: Luca Ceresoli <[email protected]> S: Maintained +F: Documentation/devicetree/bindings/clock/idt,versaclock5.yaml F: drivers/clk/clk-versaclock5.c IEEE 802.15.4 SUBSYSTEM @@ -11984,7 +11986,8 @@ F: include/uapi/linux/netrom.h F: net/netrom/ NETRONOME ETHERNET DRIVERS -M: Jakub Kicinski <[email protected]> +M: Simon Horman <[email protected]> +R: Jakub Kicinski <[email protected]> S: Maintained F: drivers/net/ethernet/netronome/ @@ -893,6 +893,10 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS) export CC_FLAGS_SCS endif +ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B +KBUILD_CFLAGS += -falign-functions=32 +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index a4d0c19f1e79..640e1a2f57b4 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -489,10 +489,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) } #endif -#define ioread16be(p) be16_to_cpu(ioread16(p)) -#define ioread32be(p) be32_to_cpu(ioread32(p)) -#define iowrite16be(v,p) iowrite16(cpu_to_be16(v), (p)) -#define iowrite32be(v,p) iowrite32(cpu_to_be32(v), (p)) +#define ioread16be(p) swab16(ioread16(p)) +#define ioread32be(p) swab32(ioread32(p)) +#define iowrite16be(v,p) iowrite16(swab16(v), (p)) +#define iowrite32be(v,p) iowrite32(swab32(v), (p)) #define inb_p inb #define inw_p inw diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index 1fe2b56cb861..1b6f25efa247 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -20,7 +20,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * Is a address valid? This does a straightforward calculation rather diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index c2303a8c2b9f..09172f017efc 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -25,6 +25,7 @@ #include <linux/interrupt.h> #include <linux/extable.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); @@ -116,6 +117,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, #endif if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -148,7 +150,7 @@ retry: /* If for any reason at all we couldn't handle the fault, make sure we exit gracefully rather than endlessly redo the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -164,10 +166,6 @@ retry: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/arc/include/asm/segment.h b/arch/arc/include/asm/segment.h index 6a2a5be5026d..871f8ab11bfd 100644 --- a/arch/arc/include/asm/segment.h +++ b/arch/arc/include/asm/segment.h @@ -14,8 +14,7 @@ typedef unsigned long mm_segment_t; #define KERNEL_DS MAKE_MM_SEG(0) #define USER_DS MAKE_MM_SEG(TASK_SIZE) - -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) #endif /* __ASSEMBLY__ */ #endif /* __ASMARC_SEGMENT_H */ diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e12c80d71b78..efeba1fe7252 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -91,7 +91,7 @@ fault: goto fail; mmap_read_lock(current->mm); - ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr, + ret = fixup_user_fault(current->mm, (unsigned long) uaddr, FAULT_FLAG_WRITE, NULL); mmap_read_unlock(current->mm); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 7287c793d1c9..f5657cb68e4f 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -105,6 +105,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) if (write) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -130,7 +131,7 @@ retry: goto bad_area; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -155,22 +156,9 @@ bad_area: * Major/minor page fault accounting * (in case of retry we only land here once) */ - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - - if (likely(!(fault & VM_FAULT_ERROR))) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - + if (likely(!(fault & VM_FAULT_ERROR))) /* Normal return path: fault Handled Gracefully */ return; - } if (!user_mode(regs)) goto no_context; diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index b5fdd30252f8..a13d90206472 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -76,7 +76,7 @@ static inline void set_fs(mm_segment_t fs) modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * We use 33-bit arithmetic here. Success returns zero, failure returns @@ -267,7 +267,7 @@ extern int __put_user_8(void *, unsigned long long); */ #define USER_DS KERNEL_DS -#define segment_eq(a, b) (1) +#define uaccess_kernel() (true) #define __addr_ok(addr) ((void)(addr), 1) #define __range_ok(addr, size) ((void)(addr), 0) #define get_fs() (KERNEL_DS) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ab2568996ddb..c9dc912b83f0 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -713,7 +713,9 @@ struct page *get_signal_page(void) /* Defer to generic check */ asmlinkage void addr_limit_check_failed(void) { +#ifdef CONFIG_MMU addr_limit_user_check(); +#endif } #ifdef CONFIG_DEBUG_RSEQ diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c6550eddfce1..efa402025031 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -202,7 +202,8 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) static vm_fault_t __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - unsigned int flags, struct task_struct *tsk) + unsigned int flags, struct task_struct *tsk, + struct pt_regs *regs) { struct vm_area_struct *vma; vm_fault_t fault; @@ -224,7 +225,7 @@ good_area: goto out; } - return handle_mm_fault(vma, addr & PAGE_MASK, flags); + return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); check_stack: /* Don't allow expansion below FIRST_USER_ADDRESS */ @@ -266,6 +267,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -290,7 +293,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, fsr, flags, tsk); + fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs); /* If we need to retry but a fatal signal is pending, handle the * signal first. We do not need to release the mmap_lock because @@ -302,23 +305,7 @@ retry: return 0; } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e11b4ea06127..6d232837cbee 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1182,22 +1182,6 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. -config HARDEN_EL2_VECTORS - bool "Harden EL2 vector mapping against system register leak" if EXPERT - default y - help - Speculation attacks against some high-performance processors can - be used to leak privileged information such as the vector base - register, resulting in a potential defeat of the EL2 layout - randomization. - - This config option will map the vectors to a fixed location, - independent of the EL2 code mapping, so that revealing VBAR_EL2 - to an attacker does not give away any extra information. This - only gets enabled on affected CPUs. - - If unsure, say Y. - config ARM64_SSBD bool "Speculative Store Bypass Disable" if EXPERT default y @@ -1520,7 +1504,6 @@ menu "ARMv8.3 architectural features" config ARM64_PTR_AUTH bool "Enable support for pointer authentication" default y - depends on !KVM || ARM64_VHE depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC # Modern compilers insert a .note.gnu.property section note for PAC # which is only understood by binutils starting with version 2.33.1. @@ -1547,8 +1530,7 @@ config ARM64_PTR_AUTH The feature is detected at runtime. If the feature is not present in hardware it will not be advertised to userspace/KVM guest nor will it - be enabled. However, KVM guest also require VHE mode and hence - CONFIG_ARM64_VHE=y option to use this feature. + be enabled. If the feature is present on the boot CPU but not on a late CPU, then the late CPU will be parked. Also, if the boot CPU does not have diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 352aaebf4198..fb1a922b31ba 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,33 +42,81 @@ #include <linux/mm.h> -/* Translate a kernel address of @sym into its equivalent linear mapping */ -#define kvm_ksym_ref(sym) \ +/* + * Translate name of a symbol defined in nVHE hyp to the name seen + * by kernel proper. All nVHE symbols are prefixed by the build system + * to avoid clashes with the VHE variants. + */ +#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym + +#define DECLARE_KVM_VHE_SYM(sym) extern char sym[] +#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] + +/* + * Define a pair of symbols sharing the same name but one defined in + * VHE and the other in nVHE hyp implementations. + */ +#define DECLARE_KVM_HYP_SYM(sym) \ + DECLARE_KVM_VHE_SYM(sym); \ + DECLARE_KVM_NVHE_SYM(sym) + +#define CHOOSE_VHE_SYM(sym) sym +#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) + +#ifndef __KVM_NVHE_HYPERVISOR__ +/* + * BIG FAT WARNINGS: + * + * - Don't be tempted to change the following is_kernel_in_hyp_mode() + * to has_vhe(). has_vhe() is implemented as a *final* capability, + * while this is used early at boot time, when the capabilities are + * not final yet.... + * + * - Don't let the nVHE hypervisor have access to this, as it will + * pick the *wrong* symbol (yes, it runs at EL2...). + */ +#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \ + : CHOOSE_NVHE_SYM(sym)) +#else +/* The nVHE hypervisor shouldn't even try to access anything */ +extern void *__nvhe_undefined_symbol; +#define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol +#endif + +/* Translate a kernel address @ptr into its equivalent linear mapping */ +#define kvm_ksym_ref(ptr) \ ({ \ - void *val = &sym; \ + void *val = (ptr); \ if (!is_kernel_in_hyp_mode()) \ - val = lm_alias(&sym); \ + val = lm_alias((ptr)); \ val; \ }) +#define kvm_ksym_ref_nvhe(sym) kvm_ksym_ref(kvm_nvhe_sym(sym)) struct kvm; struct kvm_vcpu; +struct kvm_s2_mmu; -extern char __kvm_hyp_init[]; -extern char __kvm_hyp_init_end[]; +DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); +DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); +#define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) +#define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern char __kvm_hyp_vector[]; +#ifdef CONFIG_KVM_INDIRECT_VECTORS +extern atomic_t arm64_el2_vector_last_slot; +DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); +#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) +#endif extern void __kvm_flush_vm_context(void); -extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); -extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, + int level); +extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); +extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); -extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); - -extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); +extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_enable_ssbs(void); @@ -143,7 +191,6 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; .macro get_vcpu_ptr vcpu, ctxt get_host_ctxt \ctxt, \vcpu ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] - kern_hyp_va \vcpu .endm #endif diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 454373704b8a..d6bb40122fdb 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -19,14 +19,6 @@ struct kvm_sys_reg_table { size_t num; }; -struct kvm_sys_reg_target_table { - struct kvm_sys_reg_table table64; - struct kvm_sys_reg_table table32; -}; - -void kvm_register_target_sys_reg_table(unsigned int target, - struct kvm_sys_reg_target_table *table); - int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 4d0f8ea600ba..49a55be2b9a2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -124,33 +124,12 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; -} - -static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) -{ - return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; -} - -static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(SYS_ELR); - else - return *__vcpu_elr_el1(vcpu); -} - -static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) -{ - if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, SYS_ELR); - else - *__vcpu_elr_el1(vcpu) = v; + return (unsigned long *)&vcpu_gp_regs(vcpu)->pc; } static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; + return (unsigned long *)&vcpu_gp_regs(vcpu)->pstate; } static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) @@ -179,14 +158,14 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { - return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; + return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs[reg_num]; } static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, unsigned long val) { if (reg_num != 31) - vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; + vcpu_gp_regs(vcpu)->regs[reg_num] = val; } static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) @@ -197,7 +176,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) if (vcpu->arch.sysregs_loaded_on_cpu) return read_sysreg_el1(SYS_SPSR); else - return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; + return __vcpu_sys_reg(vcpu, SPSR_EL1); } static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) @@ -210,7 +189,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) if (vcpu->arch.sysregs_loaded_on_cpu) write_sysreg_el1(v, SYS_SPSR); else - vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; } /* @@ -259,14 +238,14 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) return mode != PSR_MODE_EL0t; } -static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +static __always_inline u32 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; } static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); if (esr & ESR_ELx_CV) return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; @@ -291,64 +270,64 @@ static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_xVC_IMM_MASK; } static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_ISV); } static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); + return kvm_vcpu_get_esr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC); } static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SSE); } static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_SF); } static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { - return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; + return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW); } static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR) || kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ } static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_CM); } static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { - return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); + return 1 << ((kvm_vcpu_get_esr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } /* This one is not specific to Data Abort */ static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); + return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_IL); } static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); + return ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) @@ -358,15 +337,15 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; } static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; } -static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { case FSC_SEA: @@ -387,7 +366,7 @@ static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); return ESR_ELx_SYS64_ISS_RT(esr); } @@ -516,14 +495,14 @@ static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_i * Skip an instruction which has been emulated at hyp while most guest sysregs * are live. */ -static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR); + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f81151ad3d3c..65568b23868a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -66,19 +66,34 @@ struct kvm_vmid { u32 vmid; }; -struct kvm_arch { +struct kvm_s2_mmu { struct kvm_vmid vmid; - /* stage2 entry level table */ - pgd_t *pgd; - phys_addr_t pgd_phys; - - /* VTCR_EL2 value for this VM */ - u64 vtcr; + /* + * stage2 entry level table + * + * Two kvm_s2_mmu structures in the same VM can point to the same + * pgd here. This happens when running a guest using a + * translation regime that isn't affected by its own stage-2 + * translation, such as a non-VHE hypervisor running at vEL2, or + * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the + * canonical stage-2 page tables. + */ + pgd_t *pgd; + phys_addr_t pgd_phys; /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; + struct kvm *kvm; +}; + +struct kvm_arch { + struct kvm_s2_mmu mmu; + + /* VTCR_EL2 value for this VM */ + u64 vtcr; + /* The maximum number of vCPUs depends on the used GIC model */ int max_vcpus; @@ -159,6 +174,16 @@ enum vcpu_sysreg { APGAKEYLO_EL1, APGAKEYHI_EL1, + ELR_EL1, + SP_EL1, + SPSR_EL1, + + CNTVOFF_EL2, + CNTV_CVAL_EL0, + CNTV_CTL_EL0, + CNTP_CVAL_EL0, + CNTP_CTL_EL0, + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -210,7 +235,15 @@ enum vcpu_sysreg { #define NR_COPRO_REGS (NR_SYS_REGS * 2) struct kvm_cpu_context { - struct kvm_regs gp_regs; + struct user_pt_regs regs; /* sp = sp_el0 */ + + u64 spsr_abt; + u64 spsr_und; + u64 spsr_irq; + u64 spsr_fiq; + + struct user_fpsimd_state fp_regs; + union { u64 sys_regs[NR_SYS_REGS]; u32 copro[NR_COPRO_REGS]; @@ -243,6 +276,9 @@ struct kvm_vcpu_arch { void *sve_state; unsigned int sve_max_vl; + /* Stage 2 paging state used by the hardware on next switch */ + struct kvm_s2_mmu *hw_mmu; + /* HYP configuration */ u64 hcr_el2; u32 mdcr_el2; @@ -327,7 +363,7 @@ struct kvm_vcpu_arch { struct vcpu_reset_state reset_state; /* True when deferrable sysregs are loaded on the physical CPU, - * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ + * see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */ bool sysregs_loaded_on_cpu; /* Guest PV state */ @@ -378,15 +414,20 @@ struct kvm_vcpu_arch { #define vcpu_has_ptrauth(vcpu) false #endif -#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) +#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs) /* - * Only use __vcpu_sys_reg if you know you want the memory backed version of a - * register, and not the one most recently accessed by a running VCPU. For - * example, for userspace access or for system registers that are never context - * switched, but only emulated. + * Only use __vcpu_sys_reg/ctxt_sys_reg if you know you want the + * memory backed version of a register, and not the one most recently + * accessed by a running VCPU. For example, for userspace access or + * for system registers that are never context switched, but only + * emulated. */ -#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) +#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)]) + +#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) + +#define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); @@ -442,6 +483,18 @@ void kvm_arm_resume_guest(struct kvm *kvm); u64 __kvm_call_hyp(void *hypfn, ...); +#define kvm_call_hyp_nvhe(f, ...) \ + do { \ + DECLARE_KVM_NVHE_SYM(f); \ + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + } while(0) + +#define kvm_call_hyp_nvhe_ret(f, ...) \ + ({ \ + DECLARE_KVM_NVHE_SYM(f); \ + __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + }) + /* * The couple of isb() below are there to guarantee the same behaviour * on VHE as on !VHE, where the eret to EL1 acts as a context @@ -453,7 +506,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); f(__VA_ARGS__); \ isb(); \ } else { \ - __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ + kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ } while(0) @@ -465,8 +518,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); ret = f(__VA_ARGS__); \ isb(); \ } else { \ - ret = __kvm_call_hyp(kvm_ksym_ref(f), \ - ##__VA_ARGS__); \ + ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \ } \ \ ret; \ @@ -518,7 +570,7 @@ DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ - cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); + ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } static inline bool kvm_arch_requires_vhe(void) @@ -619,8 +671,8 @@ static inline int kvm_arm_have_ssbd(void) } } -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); int kvm_set_ipa_limit(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index ce3080834bfa..46689e7db46c 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -12,8 +12,6 @@ #include <asm/alternative.h> #include <asm/sysreg.h> -#define __hyp_text __section(.hyp.text) notrace __noscs - #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ @@ -63,17 +61,20 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); +#ifdef __KVM_NVHE_HYPERVISOR__ void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_disable_traps(struct kvm_vcpu *vcpu); +#endif +#ifdef __KVM_NVHE_HYPERVISOR__ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); +#else void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); -void __sysreg32_save_state(struct kvm_vcpu *vcpu); -void __sysreg32_restore_state(struct kvm_vcpu *vcpu); +#endif void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); @@ -81,11 +82,17 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +#ifndef __KVM_NVHE_HYPERVISOR__ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); void deactivate_traps_vhe_put(void); +#endif u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt); +#ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(unsigned long, ...); +#endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 40be8f6c7351..189839c3706a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -134,8 +134,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_alloc_stage2_pgd(struct kvm *kvm); -void kvm_free_stage2_pgd(struct kvm *kvm); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); @@ -577,13 +577,13 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); } -static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) +static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) { - struct kvm_vmid *vmid = &kvm->arch.vmid; + struct kvm_vmid *vmid = &mmu->vmid; u64 vmid_field, baddr; u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; - baddr = kvm->arch.pgd_phys; + baddr = mmu->pgd_phys; vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } @@ -592,10 +592,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) * Must be called from hyp code running at EL2 with an updated VTTBR * and interrupts disabled. */ -static __always_inline void __load_guest_stage2(struct kvm *kvm) +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) { - write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); + write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2); + write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* * ARM errata 1165522 and 1530923 require the actual execution of the diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 6301813dcace..0ddf98c3ba9f 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -61,44 +61,36 @@ /* * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will - * check for the presence of one of the cpufeature flag - * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and + * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as + * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and * then proceed ahead with the save/restore of Pointer Authentication - * key registers. + * key registers if enabled for the guest. */ .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 -alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH - b 1000f +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .L__skip_switch\@ alternative_else_nop_endif -alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF - b 1001f -alternative_else_nop_endif -1000: - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) - cbz \reg1, 1001f + cbz \reg1, .L__skip_switch\@ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_restore_state \reg1, \reg2, \reg3 -1001: +.L__skip_switch\@: .endm .macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 -alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH - b 2000f -alternative_else_nop_endif -alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF - b 2001f +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .L__skip_switch\@ alternative_else_nop_endif -2000: - ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] + mrs \reg1, hcr_el2 and \reg1, \reg1, #(HCR_API | HCR_APK) - cbz \reg1, 2001f + cbz \reg1, .L__skip_switch\@ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_save_state \reg1, \reg2, \reg3 add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1 ptrauth_restore_state \reg1, \reg2, \reg3 isb -2001: +.L__skip_switch\@: .endm #else /* !CONFIG_ARM64_PTR_AUTH */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 8444df000181..a7a5ecaa2e83 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -45,13 +45,6 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ - defined(CONFIG_HARDEN_EL2_VECTORS)) - -extern char __bp_harden_hyp_vecs[]; -extern atomic_t arm64_el2_vector_last_slot; -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ - #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 8d7c466f809b..991dd5f031e4 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -50,7 +50,7 @@ static inline void set_fs(mm_segment_t fs) CONFIG_ARM64_UAO)); } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* * Test whether a block of memory is a valid user space address. diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 5051b388c654..09977acc007d 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -85,10 +85,17 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { - if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN)) + /* + * The following macros are defined for code specic to VHE/nVHE. + * If has_vhe() is inlined into those compilation units, it can + * be determined statically. Otherwise fall back to caps. + */ + if (__is_defined(__KVM_VHE_HYPERVISOR__)) return true; - - return false; + else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) + return false; + else + return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0577e2142284..7d32fc959b1a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -102,13 +102,12 @@ int main(void) DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); - DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1])); DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); - DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 79728bfb5351..6bd1d3ad037a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -632,7 +632,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#if defined(CONFIG_HARDEN_EL2_VECTORS) +#ifdef CONFIG_RANDOMIZE_BASE static const struct midr_range ca57_a72[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -891,7 +891,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, }, -#ifdef CONFIG_HARDEN_EL2_VECTORS +#ifdef CONFIG_RANDOMIZE_BASE { .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index be0a63ffed23..9e897c500237 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -51,4 +51,58 @@ __efistub__ctype = _ctype; #endif +#ifdef CONFIG_KVM + +/* + * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, to + * separate it from the kernel proper. The following symbols are legally + * accessed by it, therefore provide aliases to make them linkable. + * Do not include symbols which may not be safely accessed under hypervisor + * memory mappings. + */ + +#define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym; + +/* Alternative callbacks for init-time patching of nVHE hyp code. */ +KVM_NVHE_ALIAS(arm64_enable_wa2_handling); +KVM_NVHE_ALIAS(kvm_patch_vector_branch); +KVM_NVHE_ALIAS(kvm_update_va_mask); + +/* Global kernel state accessed by nVHE hyp code. */ +KVM_NVHE_ALIAS(arm64_ssbd_callback_required); +KVM_NVHE_ALIAS(kvm_host_data); +KVM_NVHE_ALIAS(kvm_vgic_global_state); + +/* Kernel constant needed to compute idmap addresses. */ +KVM_NVHE_ALIAS(kimage_voffset); + +/* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ +KVM_NVHE_ALIAS(__hyp_panic_string); +KVM_NVHE_ALIAS(panic); + +/* Vectors installed by hyp-init on reset HVC. */ +KVM_NVHE_ALIAS(__hyp_stub_vectors); + +/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */ +KVM_NVHE_ALIAS(idmap_t0sz); + +/* Kernel symbol used by icache_is_vpipt(). */ +KVM_NVHE_ALIAS(__icache_flags); + +/* Kernel symbols needed for cpus_have_final/const_caps checks. */ +KVM_NVHE_ALIAS(arm64_const_caps_ready); +KVM_NVHE_ALIAS(cpu_hwcap_keys); +KVM_NVHE_ALIAS(cpu_hwcaps); + +/* Static keys which are set if a vGIC trap should be handled in hyp. */ +KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); +KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); + +/* Static key checked in pmr_sync(). */ +#ifdef CONFIG_ARM64_PSEUDO_NMI +KVM_NVHE_ALIAS(gic_pmr_sync); +#endif + +#endif /* CONFIG_KVM */ + #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index dab88260b137..7689f2031c0c 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -180,7 +180,7 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, /* * We didn't take an exception to get here, set PAN. UAO will be cleared - * by sdei_event_handler()s set_fs(USER_DS) call. + * by sdei_event_handler()s force_uaccess_begin() call. */ __uaccess_enable_hw_pan(); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 13489aff4440..318c8f2df245 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -58,7 +58,7 @@ config KVM_ARM_PMU virtual machines. config KVM_INDIRECT_VECTORS - def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS + def_bool HARDEN_BRANCH_PREDICTOR || RANDOMIZE_BASE endif # KVM diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 8d3d9513cbfe..99977c1972cc 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,8 +13,8 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ - inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \ - guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \ + inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \ + guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ aarch32.o arch_timer.o \ vgic/vgic.o vgic/vgic-init.o \ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index a1fe0ea3254e..32ba6fbc3814 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -51,6 +51,93 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +u32 timer_get_ctl(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); + case TIMER_PTIMER: + return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); + default: + WARN_ON(1); + return 0; + } +} + +u64 timer_get_cval(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); + case TIMER_PTIMER: + return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + default: + WARN_ON(1); + return 0; + } +} + +static u64 timer_get_offset(struct arch_timer_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + return __vcpu_sys_reg(vcpu, CNTVOFF_EL2); + default: + return 0; + } +} + +static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; + break; + case TIMER_PTIMER: + __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; + break; + default: + WARN_ON(1); + } +} + +static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; + break; + case TIMER_PTIMER: + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; + break; + default: + WARN_ON(1); + } +} + +static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + + switch(arch_timer_ctx_index(ctxt)) { + case TIMER_VTIMER: + __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset; + break; + default: + WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); + } +} + u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); @@ -124,8 +211,8 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) { u64 cval, now; - cval = timer_ctx->cnt_cval; - now = kvm_phys_timer_read() - timer_ctx->cntvoff; + cval = timer_get_cval(timer_ctx); + now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); if (now < cval) { u64 ns; @@ -144,8 +231,8 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) { WARN_ON(timer_ctx && timer_ctx->loaded); return timer_ctx && - !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); + ((timer_get_ctl(timer_ctx) & + (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); } /* @@ -256,8 +343,8 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) if (!kvm_timer_irq_can_fire(timer_ctx)) return false; - cval = timer_ctx->cnt_cval; - now = kvm_phys_timer_read() - timer_ctx->cntvoff; + cval = timer_get_cval(timer_ctx); + now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); return cval <= now; } @@ -350,8 +437,8 @@ static void timer_save_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: - ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); - ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL); + timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); + timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTV_CTL); @@ -359,8 +446,8 @@ static void timer_save_state(struct arch_timer_context *ctx) break; case TIMER_PTIMER: - ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); - ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL); + timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); + timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL)); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTP_CTL); @@ -429,14 +516,14 @@ static void timer_restore_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: - write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL); + write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); isb(); - write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL); + write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: - write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL); + write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); isb(); - write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL); + write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); break; case NR_KVM_TIMERS: BUG(); @@ -528,7 +615,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) kvm_timer_vcpu_load_nogic(vcpu); } - set_cntvoff(map.direct_vtimer->cntvoff); + set_cntvoff(timer_get_offset(map.direct_vtimer)); kvm_timer_unblocking(vcpu); @@ -615,7 +702,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) } } -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) +void kvm_timer_sync_user(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -639,8 +726,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - vcpu_vtimer(vcpu)->cnt_ctl = 0; - vcpu_ptimer(vcpu)->cnt_ctl = 0; + timer_set_ctl(vcpu_vtimer(vcpu), 0); + timer_set_ctl(vcpu_ptimer(vcpu), 0); if (timer->enabled) { kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); @@ -668,13 +755,13 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) mutex_lock(&kvm->lock); kvm_for_each_vcpu(i, tmp, kvm) - vcpu_vtimer(tmp)->cntvoff = cntvoff; + timer_set_offset(vcpu_vtimer(tmp), cntvoff); /* * When called from the vcpu create path, the CPU being created is not * included in the loop above, so we just set it here as well. */ - vcpu_vtimer(vcpu)->cntvoff = cntvoff; + timer_set_offset(vcpu_vtimer(vcpu), cntvoff); mutex_unlock(&kvm->lock); } @@ -684,9 +771,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + vtimer->vcpu = vcpu; + ptimer->vcpu = vcpu; + /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); - ptimer->cntvoff = 0; + timer_set_offset(ptimer, 0); hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; @@ -704,9 +794,6 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) vtimer->host_timer_irq_flags = host_vtimer_irq_flags; ptimer->host_timer_irq_flags = host_ptimer_irq_flags; - - vtimer->vcpu = vcpu; - ptimer->vcpu = vcpu; } static void kvm_timer_init_interrupt(void *info) @@ -756,10 +843,12 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit * regardless of ENABLE bit for our implementation convenience. */ + u32 ctl = timer_get_ctl(timer); + if (!kvm_timer_compute_delta(timer)) - return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT; - else - return timer->cnt_ctl; + ctl |= ARCH_TIMER_CTRL_IT_STAT; + + return ctl; } u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) @@ -795,8 +884,8 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, switch (treg) { case TIMER_REG_TVAL: - val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; - val &= lower_32_bits(val); + val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); + val = lower_32_bits(val); break; case TIMER_REG_CTL: @@ -804,11 +893,11 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, break; case TIMER_REG_CVAL: - val = timer->cnt_cval; + val = timer_get_cval(timer); break; case TIMER_REG_CNT: - val = kvm_phys_timer_read() - timer->cntvoff; + val = kvm_phys_timer_read() - timer_get_offset(timer); break; default: @@ -842,15 +931,15 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, { switch (treg) { case TIMER_REG_TVAL: - timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; + timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); break; case TIMER_REG_CTL: - timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; + timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); break; case TIMER_REG_CVAL: - timer->cnt_cval = val; + timer_set_cval(timer, val); break; default: diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 73e12869afe3..691d21e4c717 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -106,22 +106,15 @@ static int kvm_arm_default_max_vcpus(void) */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - int ret, cpu; + int ret; ret = kvm_arm_setup_stage2(kvm, type); if (ret) return ret; - kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); - if (!kvm->arch.last_vcpu_ran) - return -ENOMEM; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; - - ret = kvm_alloc_stage2_pgd(kvm); + ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); if (ret) - goto out_fail_alloc; + return ret; ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); if (ret) @@ -129,18 +122,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); - /* Mark the initial VMID generation invalid */ - kvm->arch.vmid.vmid_gen = 0; - /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); return ret; out_free_stage2_pgd: - kvm_free_stage2_pgd(kvm); -out_fail_alloc: - free_percpu(kvm->arch.last_vcpu_ran); - kvm->arch.last_vcpu_ran = NULL; + kvm_free_stage2_pgd(&kvm->arch.mmu); return ret; } @@ -160,9 +147,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_vgic_destroy(kvm); - free_percpu(kvm->arch.last_vcpu_ran); - kvm->arch.last_vcpu_ran = NULL; - for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_vcpu_destroy(kvm->vcpus[i]); @@ -281,6 +265,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_arm_pvtime_vcpu_init(&vcpu->arch); + vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; + err = kvm_vgic_vcpu_init(vcpu); if (err) return err; @@ -336,16 +322,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + struct kvm_s2_mmu *mmu; int *last_ran; - last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); + mmu = vcpu->arch.hw_mmu; + last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* * We might get preempted before the vCPU actually runs, but * over-invalidation doesn't affect correctness. */ if (*last_ran != vcpu->vcpu_id) { - kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); + kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu); *last_ran = vcpu->vcpu_id; } @@ -353,7 +341,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); - kvm_vcpu_load_sysregs(vcpu); + if (has_vhe()) + kvm_vcpu_load_sysregs_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); kvm_vcpu_pmu_restore_guest(vcpu); if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) @@ -371,7 +360,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_arch_vcpu_put_fp(vcpu); - kvm_vcpu_put_sysregs(vcpu); + if (has_vhe()) + kvm_vcpu_put_sysregs_vhe(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); @@ -468,7 +458,6 @@ static bool need_new_vmid_gen(struct kvm_vmid *vmid) /** * update_vmid - Update the vmid with a valid VMID for the current generation - * @kvm: The guest that struct vmid belongs to * @vmid: The stage-2 VMID information struct */ static void update_vmid(struct kvm_vmid *vmid) @@ -680,7 +669,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ cond_resched(); - update_vmid(&vcpu->kvm->arch.vmid); + update_vmid(&vcpu->arch.hw_mmu->vmid); check_vcpu_requests(vcpu); @@ -729,13 +718,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); - if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || + if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; isb(); /* Ensure work in x_flush_hwstate is committed */ kvm_pmu_sync_hwstate(vcpu); if (static_branch_unlikely(&userspace_irqchip_in_use)) - kvm_timer_sync_hwstate(vcpu); + kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); preempt_enable(); @@ -750,11 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - if (has_vhe()) { - ret = kvm_vcpu_run_vhe(vcpu); - } else { - ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); - } + ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -784,7 +769,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * timer virtual interrupt state. */ if (static_branch_unlikely(&userspace_irqchip_in_use)) - kvm_timer_sync_hwstate(vcpu); + kvm_timer_sync_user(vcpu); kvm_arch_vcpu_ctxsync_fp(vcpu); @@ -1287,7 +1272,7 @@ static void cpu_init_hyp_mode(void) * so that we can use adr_l to access per-cpu variables in EL2. */ tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) - - (unsigned long)kvm_ksym_ref(kvm_host_data)); + (unsigned long)kvm_ksym_ref(&kvm_host_data)); pgd_ptr = kvm_mmu_get_httbr(); hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; @@ -1308,7 +1293,7 @@ static void cpu_init_hyp_mode(void) */ if (this_cpu_has_cap(ARM64_SSBS) && arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - kvm_call_hyp(__kvm_enable_ssbs); + kvm_call_hyp_nvhe(__kvm_enable_ssbs); } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index e329a36b2bee..3e081d556e81 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -85,7 +85,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs, + fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs, vcpu->arch.sve_state, vcpu->arch.sve_max_vl); @@ -109,12 +109,10 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; - fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) - *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12); } else if (host_has_sve) { /* * The FPSIMD/SVE state in the CPU has not been touched, and we diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index aea43ec60f37..dfb5218137ca 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -101,19 +101,69 @@ static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) return size; } -static int validate_core_offset(const struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +static void *core_reg_addr(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { u64 off = core_reg_offset_from_id(reg->id); int size = core_reg_size_from_offset(vcpu, off); if (size < 0) - return -EINVAL; + return NULL; if (KVM_REG_SIZE(reg->id) != size) - return -EINVAL; + return NULL; - return 0; + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + off -= KVM_REG_ARM_CORE_REG(regs.regs[0]); + off /= 2; + return &vcpu->arch.ctxt.regs.regs[off]; + + case KVM_REG_ARM_CORE_REG(regs.sp): + return &vcpu->arch.ctxt.regs.sp; + + case KVM_REG_ARM_CORE_REG(regs.pc): + return &vcpu->arch.ctxt.regs.pc; + + case KVM_REG_ARM_CORE_REG(regs.pstate): + return &vcpu->arch.ctxt.regs.pstate; + + case KVM_REG_ARM_CORE_REG(sp_el1): + return __ctxt_sys_reg(&vcpu->arch.ctxt, SP_EL1); + + case KVM_REG_ARM_CORE_REG(elr_el1): + return __ctxt_sys_reg(&vcpu->arch.ctxt, ELR_EL1); + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_EL1]): + return __ctxt_sys_reg(&vcpu->arch.ctxt, SPSR_EL1); + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_ABT]): + return &vcpu->arch.ctxt.spsr_abt; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_UND]): + return &vcpu->arch.ctxt.spsr_und; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_IRQ]): + return &vcpu->arch.ctxt.spsr_irq; + + case KVM_REG_ARM_CORE_REG(spsr[KVM_SPSR_FIQ]): + return &vcpu->arch.ctxt.spsr_fiq; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + off -= KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]); + off /= 4; + return &vcpu->arch.ctxt.fp_regs.vregs[off]; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return &vcpu->arch.ctxt.fp_regs.fpsr; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return &vcpu->arch.ctxt.fp_regs.fpcr; + + default: + return NULL; + } } static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) @@ -125,8 +175,8 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) * off the index in the "array". */ __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; - struct kvm_regs *regs = vcpu_gp_regs(vcpu); - int nr_regs = sizeof(*regs) / sizeof(__u32); + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); + void *addr; u32 off; /* Our ID is an index into the kvm_regs struct. */ @@ -135,10 +185,11 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; - if (validate_core_offset(vcpu, reg)) + addr = core_reg_addr(vcpu, reg); + if (!addr) return -EINVAL; - if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) + if (copy_to_user(uaddr, addr, KVM_REG_SIZE(reg->id))) return -EFAULT; return 0; @@ -147,10 +198,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr; - struct kvm_regs *regs = vcpu_gp_regs(vcpu); - int nr_regs = sizeof(*regs) / sizeof(__u32); + int nr_regs = sizeof(struct kvm_regs) / sizeof(__u32); __uint128_t tmp; - void *valp = &tmp; + void *valp = &tmp, *addr; u64 off; int err = 0; @@ -160,7 +210,8 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; - if (validate_core_offset(vcpu, reg)) + addr = core_reg_addr(vcpu, reg); + if (!addr) return -EINVAL; if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) @@ -198,7 +249,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } } - memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); + memcpy(addr, valp, KVM_REG_SIZE(reg->id)); if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { int i; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 1df3beafd73f..fe6c7d79309d 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -89,7 +89,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu) */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { + if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); @@ -119,13 +119,13 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int ret = 0; run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.hsr = hsr; + run->debug.arch.hsr = esr; - switch (ESR_ELx_EC(hsr)) { + switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; /* fall through */ @@ -135,8 +135,8 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) case ESR_ELx_EC_BRK64: break; default: - kvm_err("%s: un-handled case hsr: %#08x\n", - __func__, (unsigned int) hsr); + kvm_err("%s: un-handled case esr: %#08x\n", + __func__, (unsigned int) esr); ret = -1; break; } @@ -146,10 +146,10 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) { - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); - kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); + kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n", + esr, esr_get_class_string(esr)); kvm_inject_undefined(vcpu); return 1; @@ -200,10 +200,10 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { - u32 hsr = kvm_vcpu_get_hsr(vcpu); - u8 hsr_ec = ESR_ELx_EC(hsr); + u32 esr = kvm_vcpu_get_esr(vcpu); + u8 esr_ec = ESR_ELx_EC(esr); - return arm_exit_handlers[hsr_ec]; + return arm_exit_handlers[esr_ec]; } /* @@ -242,15 +242,15 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) struct kvm_run *run = vcpu->run; if (ARM_SERROR_PENDING(exception_index)) { - u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); + u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); /* * HVC/SMC already have an adjusted PC, which we need * to correct in order to return to after having * injected the SError. */ - if (hsr_ec == ESR_ELx_EC_HVC32 || hsr_ec == ESR_ELx_EC_HVC64 || - hsr_ec == ESR_ELx_EC_SMC32 || hsr_ec == ESR_ELx_EC_SMC64) { + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || + esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; *vcpu_pc(vcpu) -= adj; } @@ -307,5 +307,5 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) exception_index = ARM_EXCEPTION_CODE(exception_index); if (exception_index == ARM_EXCEPTION_EL1_SERROR) - kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu)); + kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); } diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 8c9880783839..f54f0e89a71c 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -3,18 +3,12 @@ # Makefile for Kernel-based Virtual Machine module, HYP part # -ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ - $(DISABLE_STACKLEAK_PLUGIN) - -obj-$(CONFIG_KVM) += hyp.o - -hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ - debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o - -# KVM code is run at a different exception code with a different map, so -# compiler instrumentation that inserts callbacks or checks into the code may -# cause crashes. Just disable it. -GCOV_PROFILE := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n +incdir := $(srctree)/$(src)/include +subdir-asflags-y := -I$(incdir) +subdir-ccflags-y := -I$(incdir) \ + -fno-stack-protector \ + -DDISABLE_BRANCH_PROFILING \ + $(DISABLE_STACKLEAK_PLUGIN) + +obj-$(CONFIG_KVM) += vhe/ nvhe/ +obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index 25c0e47d57cb..ae56d8a4b382 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -44,14 +44,14 @@ static const unsigned short cc_map[16] = { /* * Check if a trapped instruction should have been executed or not. */ -bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) { unsigned long cpsr; u32 cpsr_cond; int cond; /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) + if (kvm_vcpu_get_esr(vcpu) >> 30) return true; /* Is condition field valid? */ @@ -93,7 +93,7 @@ bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) * * IT[7:0] -> CPSR[26:25],CPSR[15:10] */ -static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) { unsigned long itbits, cond; unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -123,7 +123,7 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) { u32 pc = *vcpu_pc(vcpu); bool is_thumb; diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 90186cf6473e..ee32a7743389 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -16,12 +16,10 @@ #include <asm/kvm_mmu.h> #include <asm/kvm_ptrauth.h> -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) #define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) .text - .pushsection .hyp.text, "ax" /* * We treat x18 as callee-saved as the host may use it as a platform diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 5b8ff517ff10..01f114aa47b0 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -9,7 +9,6 @@ #include <asm/fpsimdmacros.h> .text - .pushsection .hyp.text, "ax" SYM_FUNC_START(__fpsimd_save_state) fpsimd_save x0, 1 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 9c5cfb04170e..689fccbc9de7 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -16,7 +16,6 @@ #include <asm/mmu.h> .text - .pushsection .hyp.text, "ax" .macro do_el2_call /* @@ -40,6 +39,7 @@ el1_sync: // Guest trapped into EL2 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap +#ifdef __KVM_NVHE_HYPERVISOR__ mrs x1, vttbr_el2 // If vttbr is valid, the guest cbnz x1, el1_hvc_guest // called HVC @@ -74,6 +74,7 @@ el1_sync: // Guest trapped into EL2 eret sb +#endif /* __KVM_NVHE_HYPERVISOR__ */ el1_hvc_guest: /* @@ -180,6 +181,7 @@ el2_error: eret sb +#ifdef __KVM_NVHE_HYPERVISOR__ SYM_FUNC_START(__hyp_do_panic) mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) @@ -189,6 +191,7 @@ SYM_FUNC_START(__hyp_do_panic) eret sb SYM_FUNC_END(__hyp_do_panic) +#endif SYM_CODE_START(__hyp_panic) get_host_ctxt x0, x1 @@ -318,20 +321,4 @@ SYM_CODE_START(__bp_harden_hyp_vecs) 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) - - .popsection - -SYM_CODE_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_CODE_END(__smccc_workaround_1_smc) #endif diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index e95af204fec7..0297dc63988c 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -4,6 +4,9 @@ * Author: Marc Zyngier <[email protected]> */ +#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__ +#define __ARM64_KVM_HYP_DEBUG_SR_H__ + #include <linux/compiler.h> #include <linux/kvm_host.h> @@ -85,53 +88,8 @@ default: write_debug(ptr[0], reg, 0); \ } -static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) -{ - u64 reg; - - /* Clear pmscr in case of early return */ - *pmscr_el1 = 0; - - /* SPE present on this CPU? */ - if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), - ID_AA64DFR0_PMSVER_SHIFT)) - return; - - /* Yes; is it owned by EL3? */ - reg = read_sysreg_s(SYS_PMBIDR_EL1); - if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) - return; - - /* No; is the host actually using the thing? */ - reg = read_sysreg_s(SYS_PMBLIMITR_EL1); - if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) - return; - - /* Yes; save the control register and disable data generation */ - *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); - write_sysreg_s(0, SYS_PMSCR_EL1); - isb(); - - /* Now drain all buffered data to memory */ - psb_csync(); - dsb(nsh); -} - -static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1) -{ - if (!pmscr_el1) - return; - - /* The host page table is installed, but not yet synchronised */ - isb(); - - /* Re-enable data generation */ - write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); -} - -static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_save_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -145,12 +103,11 @@ static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, save_debug(dbg->dbg_wcr, dbgwcr, wrps); save_debug(dbg->dbg_wvr, dbgwvr, wrps); - ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); + ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1); } -static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, - struct kvm_guest_debug_arch *dbg, - struct kvm_cpu_context *ctxt) +static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) { u64 aa64dfr0; int brps, wrps; @@ -165,23 +122,16 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, restore_debug(dbg->dbg_wcr, dbgwcr, wrps); restore_debug(dbg->dbg_wvr, dbgwvr, wrps); - write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); + write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1); } -void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - /* - * Non-VHE: Disable and flush SPE data generation - * VHE: The vcpu can run, but it can't hide. - */ - if (!has_vhe()) - __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; @@ -190,20 +140,17 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, host_dbg, host_ctxt); - __debug_restore_state(vcpu, guest_dbg, guest_ctxt); + __debug_save_state(host_dbg, host_ctxt); + __debug_restore_state(guest_dbg, guest_ctxt); } -void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) +static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!has_vhe()) - __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; @@ -212,13 +159,10 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); - __debug_save_state(vcpu, guest_dbg, guest_ctxt); - __debug_restore_state(vcpu, host_dbg, host_ctxt); + __debug_save_state(guest_dbg, guest_ctxt); + __debug_restore_state(host_dbg, host_ctxt); vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; } -u32 __hyp_text __kvm_get_mdcr_el2(void) -{ - return read_sysreg(mdcr_el2); -} +#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h new file mode 100644 index 000000000000..426ef65601dd --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#ifndef __ARM64_KVM_HYP_SWITCH_H__ +#define __ARM64_KVM_HYP_SWITCH_H__ + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +extern const char __hyp_panic_string[]; + +/* Check whether the FP regs were dirtied while in the host-side run loop: */ +static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) +{ + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | + KVM_ARM64_FP_HOST); + + return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); +} + +/* Save the 32-bit only FPSIMD system register state */ +static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); +} + +static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) +{ + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to + * it will cause an exception. + */ + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } +} + +static inline void __activate_traps_common(struct kvm_vcpu *vcpu) +{ + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + + /* + * Make sure we trap PMU access from EL0 to EL2. Also sanitize + * PMSELR_EL0 to make sure it never contains the cycle + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at + * EL1 instead of being trapped to EL2. + */ + write_sysreg(0, pmselr_el0); + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static inline void __deactivate_traps_common(void) +{ + write_sysreg(0, hstr_el2); + write_sysreg(0, pmuserenr_el0); +} + +static inline void ___activate_traps(struct kvm_vcpu *vcpu) +{ + u64 hcr = vcpu->arch.hcr_el2; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + hcr |= HCR_TVM; + + write_sysreg(hcr, hcr_el2); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) + write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); +} + +static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) +{ + /* + * If we pended a virtual abort, preserve it until it gets + * cleared. See D1.14.3 (Virtual Interrupts) for details, but + * the crucial bit is "On taking a vSError interrupt, + * HCR_EL2.VSE is cleared to 0." + */ + if (vcpu->arch.hcr_el2 & HCR_VSE) { + vcpu->arch.hcr_el2 &= ~HCR_VSE; + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + } +} + +static inline void __activate_vm(struct kvm_s2_mmu *mmu) +{ + __load_guest_stage2(mmu); +} + +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg(par_el1); + asm volatile("at s1e1r, %0" : : "r" (far)); + isb(); + + tmp = read_sysreg(par_el1); + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) +{ + u8 ec; + u64 esr; + u64 hpfar, far; + + esr = vcpu->arch.fault.esr_el2; + ec = ESR_ELx_EC(esr); + + if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) + return true; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + vcpu->arch.fault.far_el2 = far; + vcpu->arch.fault.hpfar_el2 = hpfar; + return true; +} + +/* Check for an FPSIMD/SVE trap and handle as appropriate */ +static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +{ + bool vhe, sve_guest, sve_host; + u8 esr_ec; + + if (!system_supports_fpsimd()) + return false; + + /* + * Currently system_supports_sve() currently implies has_vhe(), + * so the check is redundant. However, has_vhe() can be determined + * statically and helps the compiler remove dead code. + */ + if (has_vhe() && system_supports_sve()) { + sve_guest = vcpu_has_sve(vcpu); + sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; + vhe = true; + } else { + sve_guest = false; + sve_host = false; + vhe = has_vhe(); + } + + esr_ec = kvm_vcpu_trap_get_class(vcpu); + if (esr_ec != ESR_ELx_EC_FP_ASIMD && + esr_ec != ESR_ELx_EC_SVE) + return false; + + /* Don't handle SVE traps for non-SVE vcpus here: */ + if (!sve_guest) + if (esr_ec != ESR_ELx_EC_FP_ASIMD) + return false; + + /* Valid trap. Switch the context: */ + + if (vhe) { + u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; + + if (sve_guest) + reg |= CPACR_EL1_ZEN; + + write_sysreg(reg, cpacr_el1); + } else { + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, + cptr_el2); + } + + isb(); + + if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { + /* + * In the SVE case, VHE is assumed: it is enforced by + * Kconfig and kvm_arch_init(). + */ + if (sve_host) { + struct thread_struct *thread = container_of( + vcpu->arch.host_fpsimd_state, + struct thread_struct, uw.fpsimd_state); + + sve_save_state(sve_pffr(thread), + &vcpu->arch.host_fpsimd_state->fpsr); + } else { + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + } + + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + } + + if (sve_guest) { + sve_load_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.fp_regs.fpsr, + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12); + } else { + __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); + } + + /* Skip restoring fpexc32 for AArch64 guests */ + if (!(read_sysreg(hcr_el2) & HCR_RW)) + write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); + + vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; + + return true; +} + +static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + int rt = kvm_vcpu_sys_get_rt(vcpu); + u64 val = vcpu_get_reg(vcpu, rt); + + /* + * The normal sysreg handling code expects to see the traps, + * let's not do anything here. + */ + if (vcpu->arch.hcr_el2 & HCR_TVM) + return false; + + switch (sysreg) { + case SYS_SCTLR_EL1: + write_sysreg_el1(val, SYS_SCTLR); + break; + case SYS_TTBR0_EL1: + write_sysreg_el1(val, SYS_TTBR0); + break; + case SYS_TTBR1_EL1: + write_sysreg_el1(val, SYS_TTBR1); + break; + case SYS_TCR_EL1: + write_sysreg_el1(val, SYS_TCR); + break; + case SYS_ESR_EL1: + write_sysreg_el1(val, SYS_ESR); + break; + case SYS_FAR_EL1: + write_sysreg_el1(val, SYS_FAR); + break; + case SYS_AFSR0_EL1: + write_sysreg_el1(val, SYS_AFSR0); + break; + case SYS_AFSR1_EL1: + write_sysreg_el1(val, SYS_AFSR1); + break; + case SYS_MAIR_EL1: + write_sysreg_el1(val, SYS_MAIR); + break; + case SYS_AMAIR_EL1: + write_sysreg_el1(val, SYS_AMAIR); + break; + case SYS_CONTEXTIDR_EL1: + write_sysreg_el1(val, SYS_CONTEXTIDR); + break; + default: + return false; + } + + __kvm_skip_instr(vcpu); + return true; +} + +static inline bool esr_is_ptrauth_trap(u32 esr) +{ + u32 ec = ESR_ELx_EC(esr); + + if (ec == ESR_ELx_EC_PAC) + return true; + + if (ec != ESR_ELx_EC_SYS64) + return false; + + switch (esr_sys64_to_sysreg(esr)) { + case SYS_APIAKEYLO_EL1: + case SYS_APIAKEYHI_EL1: + case SYS_APIBKEYLO_EL1: + case SYS_APIBKEYHI_EL1: + case SYS_APDAKEYLO_EL1: + case SYS_APDAKEYHI_EL1: + case SYS_APDBKEYLO_EL1: + case SYS_APDBKEYHI_EL1: + case SYS_APGAKEYLO_EL1: + case SYS_APGAKEYHI_EL1: + return true; + } + + return false; +} + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ +} while(0) + +static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *ctxt; + u64 val; + + if (!vcpu_has_ptrauth(vcpu) || + !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + return false; + + ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __ptrauth_save_key(ctxt, APIA); + __ptrauth_save_key(ctxt, APIB); + __ptrauth_save_key(ctxt, APDA); + __ptrauth_save_key(ctxt, APDB); + __ptrauth_save_key(ctxt, APGA); + + vcpu_ptrauth_enable(vcpu); + + val = read_sysreg(hcr_el2); + val |= (HCR_API | HCR_APK); + write_sysreg(val, hcr_el2); + + return true; +} + +/* + * Return true when we were able to fixup the guest exit and should return to + * the guest, false when we should restore the host state and return to the + * main run loop. + */ +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + + /* + * We're using the raw exception code in order to only process + * the trap if no SError is pending. We will come back to the + * same PC once the SError has been injected, and replay the + * trapping instruction. + */ + if (*exit_code != ARM_EXCEPTION_TRAP) + goto exit; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && + handle_tx2_tvm(vcpu)) + return true; + + /* + * We trap the first access to the FP/SIMD to save the host context + * and restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an + * undefined instruction exception to the guest. + * Similarly for trapped SVE accesses. + */ + if (__hyp_handle_fpsimd(vcpu)) + return true; + + if (__hyp_handle_ptrauth(vcpu)) + return true; + + if (!__populate_fault_info(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && + kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && + !kvm_vcpu_dabt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + + goto exit; + } + } + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { + int ret = __vgic_v3_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + } + +exit: + /* Return to the host kernel and handle the exit */ + return false; +} + +static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_SSBD)) + return false; + + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); +} + +static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * The host runs with the workaround always present. If the + * guest wants it disabled, so be it... + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); +#endif +} + +static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * If the guest has disabled the workaround, bring it back on. + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); +#endif +} + +#endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h new file mode 100644 index 000000000000..7a986030145f --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__ +#define __ARM64_KVM_HYP_SYSREG_SR_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); +} + +static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); + ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); +} + +static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); + ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); + ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR); + ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); + ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); + ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); + ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); + ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); + ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR); + ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR); + ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR); + ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); + ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); + ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); + ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); +} + +static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) +{ + ctxt->regs.pc = read_sysreg_el2(SYS_ELR); + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); +} + +static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); +} + +static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); + write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); +} + +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); + write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); + + if (has_vhe() || + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } else if (!ctxt->__hyp_running_vcpu) { + /* + * Must only be done for guest registers, hence the context + * test. We're coming from the host, so SCTLR.M is already + * set. Pairs with nVHE's __activate_traps(). + */ + write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) | + TCR_EPD1_MASK | TCR_EPD0_MASK), + SYS_TCR); + isb(); + } + + write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + + if (!has_vhe() && + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && + ctxt->__hyp_running_vcpu) { + /* + * Must only be done for host registers, hence the context + * test. Pairs with nVHE's __deactivate_traps(). + */ + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * deconfigured and disabled. We can now restore the host's + * S1 configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } + + write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); +} + +static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) +{ + u64 pstate = ctxt->regs.pstate; + u64 mode = pstate & PSR_AA32_MODE_MASK; + + /* + * Safety check to ensure we're setting the CPU up to enter the guest + * in a less privileged mode. + * + * If we are attempting a return to EL2 or higher in AArch64 state, + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that + * we'll take an illegal exception state exception immediately after + * the ERET to the guest. Attempts to return to AArch32 Hyp will + * result in an illegal exception return because EL2's execution state + * is determined by SCR_EL3.RW. + */ + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) + pstate = PSR_MODE_EL2h | PSR_IL_BIT; + + write_sysreg_el2(ctxt->regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); +} + +static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt); + vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und); + vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); + vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); + + __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); + __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); +} + +static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt); + write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und); + write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq); + write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq); + + write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); + + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); +} + +#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile new file mode 100644 index 000000000000..aef76487edc2 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_NVHE_HYPERVISOR__ +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ + +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o + +obj-y := $(patsubst %.o,%.hyp.o,$(obj-y)) +extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y)) + +$(obj)/%.hyp.tmp.o: $(src)/%.c FORCE + $(call if_changed_rule,cc_o_c) +$(obj)/%.hyp.tmp.o: $(src)/%.S FORCE + $(call if_changed_rule,as_o_S) +$(obj)/%.hyp.o: $(obj)/%.hyp.tmp.o FORCE + $(call if_changed,hypcopy) + +# Disable reordering functions by GCC (enabled at -O2). +# This pass puts functions into '.text.*' sections to aid the linker +# in optimizing ELF layout. See HYPCOPY comment below for more info. +ccflags-y += $(call cc-option,-fno-reorder-functions) + +# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names +# and relevant ELF section names to avoid clashes with VHE code/data. +# +# Hyp code is assumed to be in the '.text' section of the input object +# files (with the exception of specialized sections such as +# '.hyp.idmap.text'). This assumption may be broken by a compiler that +# divides code into sections like '.text.unlikely' so as to optimize +# ELF layout. HYPCOPY checks that no such sections exist in the input +# using `objdump`, otherwise they would be linked together with other +# kernel code and not memory-mapped correctly at runtime. +quiet_cmd_hypcopy = HYPCOPY $@ + cmd_hypcopy = \ + if $(OBJDUMP) -h $< | grep -F '.text.'; then \ + echo "$@: function reordering not supported in nVHE hyp code" >&2; \ + /bin/false; \ + fi; \ + $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ \ + --rename-section=.text=.hyp.text \ + $< $@ + +# Remove ftrace and Shadow Call Stack CFLAGS. +# This is equivalent to the 'notrace' and '__noscs' annotations. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) + +# KVM nVHE code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. +GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +# Skip objtool checking for this directory because nVHE code is compiled with +# non-standard build rules. +OBJECT_FILES_NON_STANDARD := y diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c new file mode 100644 index 000000000000..91a711aa8382 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <hyp/debug-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/debug-monitors.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +static void __debug_save_spe(u64 *pmscr_el1) +{ + u64 reg; + + /* Clear pmscr in case of early return */ + *pmscr_el1 = 0; + + /* SPE present on this CPU? */ + if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), + ID_AA64DFR0_PMSVER_SHIFT)) + return; + + /* Yes; is it owned by EL3? */ + reg = read_sysreg_s(SYS_PMBIDR_EL1); + if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT)) + return; + + /* No; is the host actually using the thing? */ + reg = read_sysreg_s(SYS_PMBLIMITR_EL1); + if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) + return; + + /* Yes; save the control register and disable data generation */ + *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); + write_sysreg_s(0, SYS_PMSCR_EL1); + isb(); + + /* Now drain all buffered data to memory */ + psb_csync(); + dsb(nsh); +} + +static void __debug_restore_spe(u64 pmscr_el1) +{ + if (!pmscr_el1) + return; + + /* The host page table is installed, but not yet synchronised */ + isb(); + + /* Re-enable data generation */ + write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); +} + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + /* Disable and flush SPE data generation */ + __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 86971fe26f3d..d9434e90c06d 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -105,6 +105,11 @@ alternative_else_nop_endif */ mov_q x4, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A)) CPU_BE( orr x4, x4, #SCTLR_ELx_EE) +alternative_if ARM64_HAS_ADDRESS_AUTH + mov_q x5, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) + orr x4, x4, x5 +alternative_else_nop_endif msr sctlr_el2, x4 isb diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c new file mode 100644 index 000000000000..341be2f2f312 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <hyp/switch.h> +#include <hyp/sysreg-sr.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + __activate_traps_common(vcpu); + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; + if (!update_fp_enabled(vcpu)) { + val |= CPTR_EL2_TFP; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cptr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * configured and enabled. We can now restore the guest's S1 + * configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } +} + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + u64 mdcr_el2; + + ___deactivate_traps(vcpu); + + mdcr_el2 = read_sysreg(mdcr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * Set the TCR and SCTLR registers in the exact opposite + * sequence as __activate_traps (first prevent walks, + * then force the MMU on). A generous sprinkling of isb() + * ensure that things happen in this exact order. + */ + val = read_sysreg_el1(SYS_TCR); + write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); + isb(); + val = read_sysreg_el1(SYS_SCTLR); + write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); + isb(); + } + + __deactivate_traps_common(); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK; + mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + + write_sysreg(mdcr_el2, mdcr_el2); + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); +} + +static void __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +/* Save VGICv3 state on non-VHE systems */ +static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/* Restore VGICv3 state on non_VEH systems */ +static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); + } +} + +/** + * Disable host events, enable guest events + */ +static bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenclr_el0); + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenset_el0); + + return (pmu->events_host || pmu->events_guest); +} + +/** + * Disable guest events, enable host events + */ +static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenclr_el0); + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenset_el0); +} + +/* Switch to the guest for legacy non-VHE systems */ +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool pmu_switch_needed; + u64 exit_code; + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + */ + if (system_uses_irq_prio_masking()) { + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + pmr_sync(); + } + + vcpu = kern_hyp_va(vcpu); + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); + + __sysreg_save_state_nvhe(host_ctxt); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + * + * Also, and in order to be able to deal with erratum #1319537 (A57) + * and #1319367 (A72), we must ensure that all VM-related sysreg are + * restored before we enable S2 translation. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state_nvhe(guest_ctxt); + + __activate_vm(kern_hyp_va(vcpu->arch.hw_mmu)); + __activate_traps(vcpu); + + __hyp_vgic_restore_state(vcpu); + __timer_enable_traps(vcpu); + + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + __sysreg_save_state_nvhe(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_disable_traps(vcpu); + __hyp_vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state_nvhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + /* + * This must come after restoring the host sysregs, since a non-VHE + * system may enable SPE here and make use of the TTBRs. + */ + __debug_switch_to_host(vcpu); + + if (pmu_switch_needed) + __pmu_switch_to_host(host_ctxt); + + /* Returning to host will clear PSR.I, remask PMR if needed */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQOFF); + + return exit_code; +} + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu; + unsigned long str_va; + + if (read_sysreg(vttbr_el2)) { + __timer_disable_traps(vcpu); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state_nvhe(host_ctxt); + } + + /* + * Force the panic string to be loaded from the literal pool, + * making sure it is a kernel address and not a PC-relative + * reference. + */ + asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); + + __hyp_do_panic(str_va, + spsr, elr, + read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c new file mode 100644 index 000000000000..88a25fc8fcd3 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <hyp/sysreg-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * Non-VHE: Both host and guest must save everything. + */ + +void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_el1_state(ctxt); + __sysreg_save_common_state(ctxt); + __sysreg_save_user_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} + +void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_el1_state(ctxt); + __sysreg_restore_common_state(ctxt); + __sysreg_restore_user_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} + +void __kvm_enable_ssbs(void) +{ + u64 tmp; + + asm volatile( + "mrs %0, sctlr_el2\n" + "orr %0, %0, %1\n" + "msr sctlr_el2, %0" + : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); +} diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c index fb5c0be33223..9072e71693ba 100644 --- a/arch/arm64/kvm/hyp/timer-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -10,7 +10,7 @@ #include <asm/kvm_hyp.h> -void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) +void __kvm_timer_set_cntvoff(u64 cntvoff) { write_sysreg(cntvoff, cntvoff_el2); } @@ -19,7 +19,7 @@ void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) * Should only be called on non-VHE systems. * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). */ -void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +void __timer_disable_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -33,7 +33,7 @@ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) * Should only be called on non-VHE systems. * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). */ -void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) +void __timer_enable_traps(struct kvm_vcpu *vcpu) { u64 val; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c new file mode 100644 index 000000000000..69eae608d670 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/tlbflush.h> + +struct tlb_inv_context { + u64 tcr; +}; + +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + u64 val; + + /* + * For CPUs that are affected by ARM 1319367, we need to + * avoid a host Stage-1 walk while we have the guest's + * VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the S1 MMU is enabled, so we can + * simply set the EPD bits to avoid any further TLB fill. + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + isb(); + } + + __load_guest_stage2(mmu); +} + +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +{ + write_sysreg(0, vttbr_el2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Ensure write of the host VMID */ + isb(); + /* Restore the host's TCR_EL1 */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + } +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1is, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (icache_is_vpipt()) + __flush_icache_all(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + mmu = kern_hyp_va(mmu); + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S new file mode 100644 index 000000000000..b0441dbdf68b --- /dev/null +++ b/arch/arm64/kvm/hyp/smccc_wa.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015-2018 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <linux/arm-smccc.h> +#include <linux/linkage.h> + +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + + /* + * This is not executed directly and is instead copied into the vectors + * by install_bp_hardening_cb(). + */ + .data + .pushsection .rodata + .global __smccc_workaround_1_smc +SYM_DATA_START(__smccc_workaround_1_smc) + esb + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ + .org 1b +SYM_DATA_END(__smccc_workaround_1_smc) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c deleted file mode 100644 index db1c4487d95d..000000000000 --- a/arch/arm64/kvm/hyp/switch.c +++ /dev/null @@ -1,936 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <[email protected]> - */ - -#include <linux/arm-smccc.h> -#include <linux/kvm_host.h> -#include <linux/types.h> -#include <linux/jump_label.h> -#include <uapi/linux/psci.h> - -#include <kvm/arm_psci.h> - -#include <asm/barrier.h> -#include <asm/cpufeature.h> -#include <asm/kprobes.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> -#include <asm/fpsimd.h> -#include <asm/debug-monitors.h> -#include <asm/processor.h> -#include <asm/thread_info.h> - -/* Check whether the FP regs were dirtied while in the host-side run loop: */ -static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) -{ - /* - * When the system doesn't support FP/SIMD, we cannot rely on - * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an - * abort on the very first access to FP and thus we should never - * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always - * trap the accesses. - */ - if (!system_supports_fpsimd() || - vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_FP_HOST); - - return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); -} - -/* Save the 32-bit only FPSIMD system register state */ -static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) -{ - if (!vcpu_el1_is_32bit(vcpu)) - return; - - vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); -} - -static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) -{ - /* - * We are about to set CPTR_EL2.TFP to trap all floating point - * register accesses to EL2, however, the ARM ARM clearly states that - * traps are only taken to EL2 if the operation would not otherwise - * trap to EL1. Therefore, always make sure that for 32-bit guests, - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. - * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to - * it will cause an exception. - */ - if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { - write_sysreg(1 << 30, fpexc32_el2); - isb(); - } -} - -static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) -{ - /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ - write_sysreg(1 << 15, hstr_el2); - - /* - * Make sure we trap PMU access from EL0 to EL2. Also sanitize - * PMSELR_EL0 to make sure it never contains the cycle - * counter, which could make a PMXEVCNTR_EL0 access UNDEF at - * EL1 instead of being trapped to EL2. - */ - write_sysreg(0, pmselr_el0); - write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); - write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); -} - -static void __hyp_text __deactivate_traps_common(void) -{ - write_sysreg(0, hstr_el2); - write_sysreg(0, pmuserenr_el0); -} - -static void activate_traps_vhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - val = read_sysreg(cpacr_el1); - val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - - val |= CPTR_EL2_TAM; - - if (update_fp_enabled(vcpu)) { - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - val &= ~CPACR_EL1_FPEN; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cpacr_el1); - - write_sysreg(kvm_get_hyp_vector(), vbar_el1); -} -NOKPROBE_SYMBOL(activate_traps_vhe); - -static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) -{ - u64 val; - - __activate_traps_common(vcpu); - - val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM; - if (!update_fp_enabled(vcpu)) { - val |= CPTR_EL2_TFP; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cptr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; - - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * configured and enabled. We can now restore the guest's S1 - * configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } -} - -static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) -{ - u64 hcr = vcpu->arch.hcr_el2; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) - hcr |= HCR_TVM; - - write_sysreg(hcr, hcr_el2); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) - write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - - if (has_vhe()) - activate_traps_vhe(vcpu); - else - __activate_traps_nvhe(vcpu); -} - -static void deactivate_traps_vhe(void) -{ - extern char vectors[]; /* kernel exception vectors */ - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - - /* - * ARM errata 1165522 and 1530923 require the actual execution of the - * above before we can switch to the EL2/EL0 translation regime used by - * the host. - */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); - - write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); -} -NOKPROBE_SYMBOL(deactivate_traps_vhe); - -static void __hyp_text __deactivate_traps_nvhe(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - u64 val; - - /* - * Set the TCR and SCTLR registers in the exact opposite - * sequence as __activate_traps_nvhe (first prevent walks, - * then force the MMU on). A generous sprinkling of isb() - * ensure that things happen in this exact order. - */ - val = read_sysreg_el1(SYS_TCR); - write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); - isb(); - val = read_sysreg_el1(SYS_SCTLR); - write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); - isb(); - } - - __deactivate_traps_common(); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK; - mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; - - write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); - write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); -} - -static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) -{ - /* - * If we pended a virtual abort, preserve it until it gets - * cleared. See D1.14.3 (Virtual Interrupts) for details, but - * the crucial bit is "On taking a vSError interrupt, - * HCR_EL2.VSE is cleared to 0." - */ - if (vcpu->arch.hcr_el2 & HCR_VSE) { - vcpu->arch.hcr_el2 &= ~HCR_VSE; - vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; - } - - if (has_vhe()) - deactivate_traps_vhe(); - else - __deactivate_traps_nvhe(); -} - -void activate_traps_vhe_load(struct kvm_vcpu *vcpu) -{ - __activate_traps_common(vcpu); -} - -void deactivate_traps_vhe_put(void) -{ - u64 mdcr_el2 = read_sysreg(mdcr_el2); - - mdcr_el2 &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_TPMS; - - write_sysreg(mdcr_el2, mdcr_el2); - - __deactivate_traps_common(); -} - -static void __hyp_text __activate_vm(struct kvm *kvm) -{ - __load_guest_stage2(kvm); -} - -static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) -{ - write_sysreg(0, vttbr_el2); -} - -/* Save VGICv3 state on non-VHE systems */ -static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); - __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); - } -} - -/* Restore VGICv3 state on non_VEH systems */ -static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) -{ - if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); - __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); - } -} - -static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) -{ - u64 par, tmp; - - /* - * Resolve the IPA the hard way using the guest VA. - * - * Stage-1 translation already validated the memory access - * rights. As such, we can use the EL1 translation regime, and - * don't have to distinguish between EL0 and EL1 access. - * - * We do need to save/restore PAR_EL1 though, as we haven't - * saved the guest context yet, and we may return early... - */ - par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); - write_sysreg(par, par_el1); - - if (unlikely(tmp & SYS_PAR_EL1_F)) - return false; /* Translation failed, back to guest */ - - /* Convert PAR to HPFAR format */ - *hpfar = PAR_TO_HPFAR(tmp); - return true; -} - -static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) -{ - u8 ec; - u64 esr; - u64 hpfar, far; - - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - - far = read_sysreg_el2(SYS_FAR); - - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { - hpfar = read_sysreg(hpfar_el2); - } - - vcpu->arch.fault.far_el2 = far; - vcpu->arch.fault.hpfar_el2 = hpfar; - return true; -} - -/* Check for an FPSIMD/SVE trap and handle as appropriate */ -static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) -{ - bool vhe, sve_guest, sve_host; - u8 hsr_ec; - - if (!system_supports_fpsimd()) - return false; - - if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - vhe = true; - } else { - sve_guest = false; - sve_host = false; - vhe = has_vhe(); - } - - hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec != ESR_ELx_EC_FP_ASIMD && - hsr_ec != ESR_ELx_EC_SVE) - return false; - - /* Don't handle SVE traps for non-SVE vcpus here: */ - if (!sve_guest) - if (hsr_ec != ESR_ELx_EC_FP_ASIMD) - return false; - - /* Valid trap. Switch the context: */ - - if (vhe) { - u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; - - if (sve_guest) - reg |= CPACR_EL1_ZEN; - - write_sysreg(reg, cpacr_el1); - } else { - write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, - cptr_el2); - } - - isb(); - - if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - /* - * In the SVE case, VHE is assumed: it is enforced by - * Kconfig and kvm_arch_init(). - */ - if (sve_host) { - struct thread_struct *thread = container_of( - vcpu->arch.host_fpsimd_state, - struct thread_struct, uw.fpsimd_state); - - sve_save_state(sve_pffr(thread), - &vcpu->arch.host_fpsimd_state->fpsr); - } else { - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - } - - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - } - - if (sve_guest) { - sve_load_state(vcpu_sve_pffr(vcpu), - &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, - sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); - write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); - } else { - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); - } - - /* Skip restoring fpexc32 for AArch64 guests */ - if (!(read_sysreg(hcr_el2) & HCR_RW)) - write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], - fpexc32_el2); - - vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; - - return true; -} - -static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu) -{ - u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu)); - int rt = kvm_vcpu_sys_get_rt(vcpu); - u64 val = vcpu_get_reg(vcpu, rt); - - /* - * The normal sysreg handling code expects to see the traps, - * let's not do anything here. - */ - if (vcpu->arch.hcr_el2 & HCR_TVM) - return false; - - switch (sysreg) { - case SYS_SCTLR_EL1: - write_sysreg_el1(val, SYS_SCTLR); - break; - case SYS_TTBR0_EL1: - write_sysreg_el1(val, SYS_TTBR0); - break; - case SYS_TTBR1_EL1: - write_sysreg_el1(val, SYS_TTBR1); - break; - case SYS_TCR_EL1: - write_sysreg_el1(val, SYS_TCR); - break; - case SYS_ESR_EL1: - write_sysreg_el1(val, SYS_ESR); - break; - case SYS_FAR_EL1: - write_sysreg_el1(val, SYS_FAR); - break; - case SYS_AFSR0_EL1: - write_sysreg_el1(val, SYS_AFSR0); - break; - case SYS_AFSR1_EL1: - write_sysreg_el1(val, SYS_AFSR1); - break; - case SYS_MAIR_EL1: - write_sysreg_el1(val, SYS_MAIR); - break; - case SYS_AMAIR_EL1: - write_sysreg_el1(val, SYS_AMAIR); - break; - case SYS_CONTEXTIDR_EL1: - write_sysreg_el1(val, SYS_CONTEXTIDR); - break; - default: - return false; - } - - __kvm_skip_instr(vcpu); - return true; -} - -static bool __hyp_text esr_is_ptrauth_trap(u32 esr) -{ - u32 ec = ESR_ELx_EC(esr); - - if (ec == ESR_ELx_EC_PAC) - return true; - - if (ec != ESR_ELx_EC_SYS64) - return false; - - switch (esr_sys64_to_sysreg(esr)) { - case SYS_APIAKEYLO_EL1: - case SYS_APIAKEYHI_EL1: - case SYS_APIBKEYLO_EL1: - case SYS_APIBKEYHI_EL1: - case SYS_APDAKEYLO_EL1: - case SYS_APDAKEYHI_EL1: - case SYS_APDBKEYLO_EL1: - case SYS_APDBKEYHI_EL1: - case SYS_APGAKEYLO_EL1: - case SYS_APGAKEYHI_EL1: - return true; - } - - return false; -} - -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) - -static bool __hyp_text __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *ctxt; - u64 val; - - if (!vcpu_has_ptrauth(vcpu) || - !esr_is_ptrauth_trap(kvm_vcpu_get_hsr(vcpu))) - return false; - - ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); - - vcpu_ptrauth_enable(vcpu); - - val = read_sysreg(hcr_el2); - val |= (HCR_API | HCR_APK); - write_sysreg(val, hcr_el2); - - return true; -} - -/* - * Return true when we were able to fixup the guest exit and should return to - * the guest, false when we should restore the host state and return to the - * main run loop. - */ -static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) - vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); - - /* - * We're using the raw exception code in order to only process - * the trap if no SError is pending. We will come back to the - * same PC once the SError has been injected, and replay the - * trapping instruction. - */ - if (*exit_code != ARM_EXCEPTION_TRAP) - goto exit; - - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && - handle_tx2_tvm(vcpu)) - return true; - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - * Similarly for trapped SVE accesses. - */ - if (__hyp_handle_fpsimd(vcpu)) - return true; - - if (__hyp_handle_ptrauth(vcpu)) - return true; - - if (!__populate_fault_info(vcpu)) - return true; - - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { - bool valid; - - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && - kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_dabt_isextabt(vcpu) && - !kvm_vcpu_dabt_iss1tw(vcpu); - - if (valid) { - int ret = __vgic_v2_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - - /* Promote an illegal access to an SError.*/ - if (ret == -1) - *exit_code = ARM_EXCEPTION_EL1_SERROR; - - goto exit; - } - } - - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { - int ret = __vgic_v3_perform_cpuif_access(vcpu); - - if (ret == 1) - return true; - } - -exit: - /* Return to the host kernel and handle the exit */ - return false; -} - -static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) -{ - if (!cpus_have_final_cap(ARM64_SSBD)) - return false; - - return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); -} - -static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * The host runs with the workaround always present. If the - * guest wants it disabled, so be it... - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); -#endif -} - -static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) -{ -#ifdef CONFIG_ARM64_SSBD - /* - * If the guest has disabled the workaround, bring it back on. - */ - if (__needs_ssbd_off(vcpu) && - __hyp_this_cpu_read(arm64_ssbd_callback_required)) - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); -#endif -} - -/** - * Disable host events, enable guest events - */ -static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenclr_el0); - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenset_el0); - - return (pmu->events_host || pmu->events_guest); -} - -/** - * Disable guest events, enable host events - */ -static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenclr_el0); - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenset_el0); -} - -/* Switch to the guest for VHE systems running in EL2 */ -static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - u64 exit_code; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - sysreg_save_host_state_vhe(host_ctxt); - - /* - * ARM erratum 1165522 requires us to configure both stage 1 and - * stage 2 translation for the guest context before we clear - * HCR_EL2.TGE. - * - * We have already configured the guest's stage 1 translation in - * kvm_vcpu_load_sysregs above. We must now call __activate_vm - * before __activate_traps, because __activate_vm configures - * stage 2 translation, and __activate_traps clear HCR_EL2.TGE - * (among other things). - */ - __activate_vm(vcpu->kvm); - __activate_traps(vcpu); - - sysreg_restore_guest_state_vhe(guest_ctxt); - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - sysreg_save_guest_state_vhe(guest_ctxt); - - __deactivate_traps(vcpu); - - sysreg_restore_host_state_vhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - __debug_switch_to_host(vcpu); - - return exit_code; -} -NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); - -int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) -{ - int ret; - - local_daif_mask(); - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - * - * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a - * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. - */ - pmr_sync(); - - ret = __kvm_vcpu_run_vhe(vcpu); - - /* - * local_daif_restore() takes care to properly restore PSTATE.DAIF - * and the GIC PMR if the host is using IRQ priorities. - */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); - - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. - */ - isb(); - - return ret; -} - -/* Switch to the guest for legacy non-VHE systems */ -int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *host_ctxt; - struct kvm_cpu_context *guest_ctxt; - bool pmu_switch_needed; - u64 exit_code; - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - pmr_sync(); - } - - vcpu = kern_hyp_va(vcpu); - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - host_ctxt->__hyp_running_vcpu = vcpu; - guest_ctxt = &vcpu->arch.ctxt; - - pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); - - __sysreg_save_state_nvhe(host_ctxt); - - /* - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - * - * Also, and in order to be able to deal with erratum #1319537 (A57) - * and #1319367 (A72), we must ensure that all VM-related sysreg are - * restored before we enable S2 translation. - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_state_nvhe(guest_ctxt); - - __activate_vm(kern_hyp_va(vcpu->kvm)); - __activate_traps(vcpu); - - __hyp_vgic_restore_state(vcpu); - __timer_enable_traps(vcpu); - - __debug_switch_to_guest(vcpu); - - __set_guest_arch_workaround_state(vcpu); - - do { - /* Jump in the fire! */ - exit_code = __guest_enter(vcpu, host_ctxt); - - /* And we're baaack! */ - } while (fixup_guest_exit(vcpu, &exit_code)); - - __set_host_arch_workaround_state(vcpu); - - __sysreg_save_state_nvhe(guest_ctxt); - __sysreg32_save_state(vcpu); - __timer_disable_traps(vcpu); - __hyp_vgic_save_state(vcpu); - - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - - __sysreg_restore_state_nvhe(host_ctxt); - - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) - __fpsimd_save_fpexc32(vcpu); - - /* - * This must come after restoring the host sysregs, since a non-VHE - * system may enable SPE here and make use of the TTBRs. - */ - __debug_switch_to_host(vcpu); - - if (pmu_switch_needed) - __pmu_switch_to_host(host_ctxt); - - /* Returning to host will clear PSR.I, remask PMR if needed */ - if (system_uses_irq_prio_masking()) - gic_write_pmr(GIC_PRIO_IRQOFF); - - return exit_code; -} - -static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; - -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *__host_ctxt) -{ - struct kvm_vcpu *vcpu; - unsigned long str_va; - - vcpu = __host_ctxt->__hyp_running_vcpu; - - if (read_sysreg(vttbr_el2)) { - __timer_disable_traps(vcpu); - __deactivate_traps(vcpu); - __deactivate_vm(vcpu); - __sysreg_restore_state_nvhe(__host_ctxt); - } - - /* - * Force the panic string to be loaded from the literal pool, - * making sure it is a kernel address and not a PC-relative - * reference. - */ - asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); - - __hyp_do_panic(str_va, - spsr, elr, - read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} - -static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, - struct kvm_cpu_context *host_ctxt) -{ - struct kvm_vcpu *vcpu; - vcpu = host_ctxt->__hyp_running_vcpu; - - __deactivate_traps(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); - - panic(__hyp_panic_string, - spsr, elr, - read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), - read_sysreg(hpfar_el2), par, vcpu); -} -NOKPROBE_SYMBOL(__hyp_call_panic_vhe); - -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) -{ - u64 spsr = read_sysreg_el2(SYS_SPSR); - u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); - - if (!has_vhe()) - __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); - else - __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); - - unreachable(); -} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c deleted file mode 100644 index cc7e957f5b2c..000000000000 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ /dev/null @@ -1,333 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <[email protected]> - */ - -#include <linux/compiler.h> -#include <linux/kvm_host.h> - -#include <asm/kprobes.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -/* - * Non-VHE: Both host and guest must save everything. - * - * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and - * pstate, which are handled as part of the el2 return state) on every - * switch (sp_el0 is being dealt with in the assembly code). - * tpidr_el0 and tpidrro_el0 only need to be switched when going - * to host userspace or a different VCPU. EL1 registers only need to be - * switched when potentially going to run a different VCPU. The latter two - * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. - */ - -static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); -} - -static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); - ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); -} - -static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) -{ - ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); - ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); - ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); - - ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); -} - -static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) -{ - ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_el1_state(ctxt); - __sysreg_save_common_state(ctxt); - __sysreg_save_user_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} - -void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); - -void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_save_common_state(ctxt); - __sysreg_save_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); - -static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); -} - -static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); -} - -static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) -{ - write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); - write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - - if (has_vhe() || - !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } else if (!ctxt->__hyp_running_vcpu) { - /* - * Must only be done for guest registers, hence the context - * test. We're coming from the host, so SCTLR.M is already - * set. Pairs with __activate_traps_nvhe(). - */ - write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | - TCR_EPD1_MASK | TCR_EPD0_MASK), - SYS_TCR); - isb(); - } - - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); - write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); - write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - - if (!has_vhe() && - cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && - ctxt->__hyp_running_vcpu) { - /* - * Must only be done for host registers, hence the context - * test. Pairs with __deactivate_traps_nvhe(). - */ - isb(); - /* - * At this stage, and thanks to the above isb(), S2 is - * deconfigured and disabled. We can now restore the host's - * S1 configuration: SCTLR, and only then TCR. - */ - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); - isb(); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); - } - - write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); -} - -static void __hyp_text -__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) -{ - u64 pstate = ctxt->gp_regs.regs.pstate; - u64 mode = pstate & PSR_AA32_MODE_MASK; - - /* - * Safety check to ensure we're setting the CPU up to enter the guest - * in a less privileged mode. - * - * If we are attempting a return to EL2 or higher in AArch64 state, - * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that - * we'll take an illegal exception state exception immediately after - * the ERET to the guest. Attempts to return to AArch32 Hyp will - * result in an illegal exception return because EL2's execution state - * is determined by SCR_EL3.RW. - */ - if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) - pstate = PSR_MODE_EL2h | PSR_IL_BIT; - - write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); - write_sysreg_el2(pstate, SYS_SPSR); - - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) - write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); -} - -void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_el1_state(ctxt); - __sysreg_restore_common_state(ctxt); - __sysreg_restore_user_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} - -void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); - -void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) -{ - __sysreg_restore_common_state(ctxt); - __sysreg_restore_el2_return_state(ctxt); -} -NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); - -void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); - spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); - spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); - spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); - - sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); - sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); -} - -void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) -{ - u64 *spsr, *sysreg; - - if (!vcpu_el1_is_32bit(vcpu)) - return; - - spsr = vcpu->arch.ctxt.gp_regs.spsr; - sysreg = vcpu->arch.ctxt.sys_regs; - - write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); - write_sysreg(spsr[KVM_SPSR_UND], spsr_und); - write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); - write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); - - write_sysreg(sysreg[DACR32_EL2], dacr32_el2); - write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); - - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) - write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); -} - -/** - * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Load system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_load() function - * and loading system register state early avoids having to load them on - * every entry to the VM. - */ -void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - struct kvm_cpu_context *host_ctxt; - - if (!has_vhe()) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - __sysreg_save_user_state(host_ctxt); - - /* - * Load guest EL1 and user state - * - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_user_state(guest_ctxt); - __sysreg_restore_el1_state(guest_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = true; - - activate_traps_vhe_load(vcpu); -} - -/** - * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU - * - * @vcpu: The VCPU pointer - * - * Save guest system registers that do not affect the host's execution, for - * example EL1 system registers on a VHE system where the host kernel - * runs at EL2. This function is called from KVM's vcpu_put() function - * and deferring saving system register state until we're no longer running the - * VCPU avoids having to save them on every exit from the VM. - */ -void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) -{ - struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; - struct kvm_cpu_context *host_ctxt; - - if (!has_vhe()) - return; - - host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; - deactivate_traps_vhe_put(); - - __sysreg_save_el1_state(guest_ctxt); - __sysreg_save_user_state(guest_ctxt); - __sysreg32_save_state(vcpu); - - /* Restore host user state */ - __sysreg_restore_user_state(host_ctxt); - - vcpu->arch.sysregs_loaded_on_cpu = false; -} - -void __hyp_text __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c deleted file mode 100644 index d063a576d511..000000000000 --- a/arch/arm64/kvm/hyp/tlb.c +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 - ARM Ltd - * Author: Marc Zyngier <[email protected]> - */ - -#include <linux/irqflags.h> - -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> -#include <asm/tlbflush.h> - -struct tlb_inv_context { - unsigned long flags; - u64 tcr; - u64 sctlr; -}; - -static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - u64 val; - - local_irq_save(cxt->flags); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* - * For CPUs that are affected by ARM errata 1165522 or 1530923, - * we cannot trust stage-1 to be in a correct state at that - * point. Since we do not want to force a full load of the - * vcpu state, we prevent the EL1 page-table walker to - * allocate new TLBs. This is done by setting the EPD bits - * in the TCR_EL1 register. We also need to prevent it to - * allocate IPA->PA walks, so we enable the S1 MMU... - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); - val |= SCTLR_ELx_M; - write_sysreg_el1(val, SYS_SCTLR); - } - - /* - * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and - * most TLB operations target EL2/EL0. In order to affect the - * guest TLBs (EL1/EL0), we need to change one of these two - * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so - * let's flip TGE before executing the TLB operation. - * - * ARM erratum 1165522 requires some special handling (again), - * as we need to make sure both stages of translation are in - * place before clearing TGE. __load_guest_stage2() already - * has an ISB in order to deal with this. - */ - __load_guest_stage2(kvm); - val = read_sysreg(hcr_el2); - val &= ~HCR_TGE; - write_sysreg(val, hcr_el2); - isb(); -} - -static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - u64 val; - - /* - * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. - */ - val = cxt->tcr = read_sysreg_el1(SYS_TCR); - val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, SYS_TCR); - isb(); - } - - /* __load_guest_stage2() includes an ISB for the workaround. */ - __load_guest_stage2(kvm); - asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); -} - -static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_guest_vhe(kvm, cxt); - else - __tlb_switch_to_guest_nvhe(kvm, cxt); -} - -static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - /* - * We're done with the TLB operation, let's restore the host's - * view of HCR_EL2. - */ - write_sysreg(0, vttbr_el2); - write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); - isb(); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Restore the registers to what they were */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - write_sysreg_el1(cxt->sctlr, SYS_SCTLR); - } - - local_irq_restore(cxt->flags); -} - -static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - write_sysreg(0, vttbr_el2); - - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ - isb(); - /* Restore the host's TCR_EL1 */ - write_sysreg_el1(cxt->tcr, SYS_TCR); - } -} - -static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, - struct tlb_inv_context *cxt) -{ - if (has_vhe()) - __tlb_switch_to_host_vhe(kvm, cxt); - else - __tlb_switch_to_host_nvhe(kvm, cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - ipa >>= 12; - __tlbi(ipas2e1is, ipa); - - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb(ish); - __tlbi(vmalle1is); - dsb(ish); - isb(); - - /* - * If the host is running at EL1 and we have a VPIPT I-cache, - * then we must perform I-cache maintenance at EL2 in order for - * it to have an effect on the guest. Since the guest cannot hit - * I-cache lines allocated with a different VMID, we don't need - * to worry about junk out of guest reset (we nuke the I-cache on - * VMID rollover), but we do need to be careful when remapping - * executable pages for the same guest. This can happen when KSM - * takes a CoW fault on an executable page, copies the page into - * a page that was previously mapped in the guest and then needs - * to invalidate the guest view of the I-cache for that page - * from EL1. To solve this, we invalidate the entire I-cache when - * unmapping a page from a guest if we have a VPIPT I-cache but - * the host is running at EL1. As above, we could do better if - * we had the VA. - * - * The moral of this story is: if you have a VPIPT I-cache, then - * you should be running with VHE enabled. - */ - if (!has_vhe() && icache_is_vpipt()) - __flush_icache_all(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) -{ - struct tlb_inv_context cxt; - - dsb(ishst); - - /* Switch to requested VMID */ - kvm = kern_hyp_va(kvm); - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalls12e1is); - dsb(ish); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); - struct tlb_inv_context cxt; - - /* Switch to requested VMID */ - __tlb_switch_to_guest(kvm, &cxt); - - __tlbi(vmalle1); - dsb(nsh); - isb(); - - __tlb_switch_to_host(kvm, &cxt); -} - -void __hyp_text __kvm_flush_vm_context(void) -{ - dsb(ishst); - __tlbi(alle1is); - - /* - * VIPT and PIPT caches are not affected by VMID, so no maintenance - * is necessary across a VMID rollover. - * - * VPIPT caches constrain lookup and maintenance to the active VMID, - * so we need to invalidate lines with a stale VMID to avoid an ABA - * race after multiple rollovers. - * - */ - if (icache_is_vpipt()) - asm volatile("ic ialluis"); - - dsb(ish); -} diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 4f3a087e36d5..bd1bab551d48 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -13,7 +13,7 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> -static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) +static bool __is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); @@ -32,7 +32,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) * 0: Not a GICV access * -1: Illegal GICV access successfully performed */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); struct vgic_dist *vgic = &kvm->arch.vgic; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 10ed539835c1..5a0073511efb 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -16,7 +16,7 @@ #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) -static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) +static u64 __gic_v3_get_lr(unsigned int lr) { switch (lr & 0xf) { case 0: @@ -56,7 +56,7 @@ static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) unreachable(); } -static void __hyp_text __gic_v3_set_lr(u64 val, int lr) +static void __gic_v3_set_lr(u64 val, int lr) { switch (lr & 0xf) { case 0: @@ -110,7 +110,7 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) } } -static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) +static void __vgic_v3_write_ap0rn(u32 val, int n) { switch (n) { case 0: @@ -128,7 +128,7 @@ static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) } } -static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) +static void __vgic_v3_write_ap1rn(u32 val, int n) { switch (n) { case 0: @@ -146,7 +146,7 @@ static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) } } -static u32 __hyp_text __vgic_v3_read_ap0rn(int n) +static u32 __vgic_v3_read_ap0rn(int n) { u32 val; @@ -170,7 +170,7 @@ static u32 __hyp_text __vgic_v3_read_ap0rn(int n) return val; } -static u32 __hyp_text __vgic_v3_read_ap1rn(int n) +static u32 __vgic_v3_read_ap1rn(int n) { u32 val; @@ -194,7 +194,7 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n) return val; } -void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; @@ -229,7 +229,7 @@ void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) { u64 used_lrs = cpu_if->used_lrs; int i; @@ -255,7 +255,7 @@ void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) { /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a @@ -302,7 +302,7 @@ void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) { u64 val; @@ -328,7 +328,7 @@ void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(0, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -361,7 +361,7 @@ void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -394,7 +394,7 @@ void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __hyp_text __vgic_v3_init_lrs(void) +void __vgic_v3_init_lrs(void) { int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); int i; @@ -403,30 +403,30 @@ void __hyp_text __vgic_v3_init_lrs(void) __gic_v3_set_lr(0, i); } -u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) +u64 __vgic_v3_get_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } -u64 __hyp_text __vgic_v3_read_vmcr(void) +u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); } -void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) +void __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } -static int __hyp_text __vgic_v3_bpr_min(void) +static int __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); } -static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +static int __vgic_v3_get_group(struct kvm_vcpu *vcpu) { - u32 esr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; return crm != 8; @@ -434,9 +434,8 @@ static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) #define GICv3_IDLE_PRIORITY 0xff -static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, - u32 vmcr, - u64 *lr_val) +static int __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, + u64 *lr_val) { unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; u8 priority = GICv3_IDLE_PRIORITY; @@ -474,8 +473,8 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, return lr; } -static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, - int intid, u64 *lr_val) +static int __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid, + u64 *lr_val) { unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; int i; @@ -494,7 +493,7 @@ static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, return -1; } -static int __hyp_text __vgic_v3_get_highest_active_priority(void) +static int __vgic_v3_get_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); u32 hap = 0; @@ -526,12 +525,12 @@ static int __hyp_text __vgic_v3_get_highest_active_priority(void) return GICv3_IDLE_PRIORITY; } -static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) +static unsigned int __vgic_v3_get_bpr0(u32 vmcr) { return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; } -static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) +static unsigned int __vgic_v3_get_bpr1(u32 vmcr) { unsigned int bpr; @@ -550,7 +549,7 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) * Convert a priority to a preemption level, taking the relevant BPR * into account by zeroing the sub-priority bits. */ -static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +static u8 __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) { unsigned int bpr; @@ -568,7 +567,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) * matter what the guest does with its BPR, we can always set/get the * same value of a priority. */ -static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +static void __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) { u8 pre, ap; u32 val; @@ -587,7 +586,7 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) } } -static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +static int __vgic_v3_clear_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); u32 hap = 0; @@ -625,7 +624,7 @@ static int __hyp_text __vgic_v3_clear_highest_active_priority(void) return GICv3_IDLE_PRIORITY; } -static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; u8 lr_prio, pmr; @@ -661,7 +660,7 @@ spurious: vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); } -static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +static void __vgic_v3_clear_active_lr(int lr, u64 lr_val) { lr_val &= ~ICH_LR_ACTIVE_BIT; if (lr_val & ICH_LR_HW) { @@ -674,7 +673,7 @@ static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) __gic_v3_set_lr(lr_val, lr); } -static void __hyp_text __vgic_v3_bump_eoicount(void) +static void __vgic_v3_bump_eoicount(void) { u32 hcr; @@ -683,8 +682,7 @@ static void __hyp_text __vgic_v3_bump_eoicount(void) write_gicreg(hcr, ICH_HCR_EL2); } -static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); u64 lr_val; @@ -707,7 +705,7 @@ static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, __vgic_v3_clear_active_lr(lr, lr_val); } -static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); u64 lr_val; @@ -744,17 +742,17 @@ static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_clear_active_lr(lr, lr_val); } -static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); } -static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); } -static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -766,7 +764,7 @@ static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -778,17 +776,17 @@ static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); } -static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); } -static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min() - 1; @@ -805,7 +803,7 @@ static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +static void __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); u8 bpr_min = __vgic_v3_bpr_min(); @@ -825,7 +823,7 @@ static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } -static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +static void __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) { u32 val; @@ -837,7 +835,7 @@ static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +static void __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) { u32 val = vcpu_get_reg(vcpu, rt); @@ -847,56 +845,49 @@ static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int __vgic_v3_write_ap1rn(val, n); } -static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, +static void __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 0); } -static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, +static void __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 1); } -static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 2); } -static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_read_apxrn(vcpu, rt, 3); } -static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 0); } -static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 1); } -static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 2); } -static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { __vgic_v3_write_apxrn(vcpu, rt, 3); } -static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; int lr, lr_grp, grp; @@ -915,16 +906,14 @@ spurious: vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); } -static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vmcr &= ICH_VMCR_PMR_MASK; vmcr >>= ICH_VMCR_PMR_SHIFT; vcpu_set_reg(vcpu, rt, vmcr); } -static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); @@ -936,15 +925,13 @@ static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, write_gicreg(vmcr, ICH_VMCR_EL2); } -static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = __vgic_v3_get_highest_active_priority(); vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vtr, val; @@ -965,8 +952,7 @@ static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, vcpu_set_reg(vcpu, rt, val); } -static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, - u32 vmcr, int rt) +static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 val = vcpu_get_reg(vcpu, rt); @@ -983,7 +969,7 @@ static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, write_gicreg(vmcr, ICH_VMCR_EL2); } -int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; u32 esr; @@ -992,7 +978,7 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) bool is_read; u32 sysreg; - esr = kvm_vcpu_get_hsr(vcpu); + esr = kvm_vcpu_get_esr(vcpu); if (vcpu_mode_is_32bit(vcpu)) { if (!kvm_condition_valid(vcpu)) { __kvm_skip_instr(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile new file mode 100644 index 000000000000..461e97c375cc --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# + +asflags-y := -D__KVM_VHE_HYPERVISOR__ +ccflags-y := -D__KVM_VHE_HYPERVISOR__ + +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o +obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ + ../fpsimd.o ../hyp-entry.o diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c new file mode 100644 index 000000000000..f1e2e5a00933 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <hyp/debug-sr.h> + +#include <linux/kvm_host.h> + +#include <asm/kvm_hyp.h> + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_guest_common(vcpu); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ + __debug_switch_to_host_common(vcpu); +} + +u32 __kvm_get_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c new file mode 100644 index 000000000000..c52d714e0d75 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <hyp/switch.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +static void __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + ___activate_traps(vcpu); + + val = read_sysreg(cpacr_el1); + val |= CPACR_EL1_TTA; + val &= ~CPACR_EL1_ZEN; + + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + + val |= CPTR_EL2_TAM; + + if (update_fp_enabled(vcpu)) { + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } else { + val &= ~CPACR_EL1_FPEN; + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cpacr_el1); + + write_sysreg(kvm_get_hyp_vector(), vbar_el1); +} +NOKPROBE_SYMBOL(__activate_traps); + +static void __deactivate_traps(struct kvm_vcpu *vcpu) +{ + extern char vectors[]; /* kernel exception vectors */ + + ___deactivate_traps(vcpu); + + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + + /* + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL2/EL0 translation regime used by + * the host. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + + write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); + write_sysreg(vectors, vbar_el1); +} +NOKPROBE_SYMBOL(__deactivate_traps); + +void activate_traps_vhe_load(struct kvm_vcpu *vcpu) +{ + __activate_traps_common(vcpu); +} + +void deactivate_traps_vhe_put(void) +{ + u64 mdcr_el2 = read_sysreg(mdcr_el2); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_TPMS; + + write_sysreg(mdcr_el2, mdcr_el2); + + __deactivate_traps_common(); +} + +/* Switch to the guest for VHE systems running in EL2 */ +static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + u64 exit_code; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + host_ctxt->__hyp_running_vcpu = vcpu; + guest_ctxt = &vcpu->arch.ctxt; + + sysreg_save_host_state_vhe(host_ctxt); + + /* + * ARM erratum 1165522 requires us to configure both stage 1 and + * stage 2 translation for the guest context before we clear + * HCR_EL2.TGE. + * + * We have already configured the guest's stage 1 translation in + * kvm_vcpu_load_sysregs_vhe above. We must now call __activate_vm + * before __activate_traps, because __activate_vm configures + * stage 2 translation, and __activate_traps clear HCR_EL2.TGE + * (among other things). + */ + __activate_vm(vcpu->arch.hw_mmu); + __activate_traps(vcpu); + + sysreg_restore_guest_state_vhe(guest_ctxt); + __debug_switch_to_guest(vcpu); + + __set_guest_arch_workaround_state(vcpu); + + do { + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + + /* And we're baaack! */ + } while (fixup_guest_exit(vcpu, &exit_code)); + + __set_host_arch_workaround_state(vcpu); + + sysreg_save_guest_state_vhe(guest_ctxt); + + __deactivate_traps(vcpu); + + sysreg_restore_host_state_vhe(host_ctxt); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + __fpsimd_save_fpexc32(vcpu); + + __debug_switch_to_host(vcpu); + + return exit_code; +} +NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); + +int __kvm_vcpu_run(struct kvm_vcpu *vcpu) +{ + int ret; + + local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. + */ + pmr_sync(); + + ret = __kvm_vcpu_run_vhe(vcpu); + + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); + + return ret; +} + +static void __hyp_call_panic(u64 spsr, u64 elr, u64 par, + struct kvm_cpu_context *host_ctxt) +{ + struct kvm_vcpu *vcpu; + vcpu = host_ctxt->__hyp_running_vcpu; + + __deactivate_traps(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); + + panic(__hyp_panic_string, + spsr, elr, + read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), + read_sysreg(hpfar_el2), par, vcpu); +} +NOKPROBE_SYMBOL(__hyp_call_panic); + +void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) +{ + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); + u64 par = read_sysreg(par_el1); + + __hyp_call_panic(spsr, elr, par, host_ctxt); + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c new file mode 100644 index 000000000000..996471e4c138 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <hyp/sysreg-sr.h> + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and + * pstate, which are handled as part of the el2 return state) on every + * switch (sp_el0 is being dealt with in the assembly code). + * tpidr_el0 and tpidrro_el0 only need to be switched when going + * to host userspace or a different VCPU. EL1 registers only need to be + * switched when potentially going to run a different VCPU. The latter two + * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. + */ + +void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_host_state_vhe); + +void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_save_common_state(ctxt); + __sysreg_save_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); + +void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_host_state_vhe); + +void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) +{ + __sysreg_restore_common_state(ctxt); + __sysreg_restore_el2_return_state(ctxt); +} +NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); + +/** + * kvm_vcpu_load_sysregs_vhe - Load guest system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Load system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_load() function + * and loading system register state early avoids having to load them on + * every entry to the VM. + */ +void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + __sysreg_save_user_state(host_ctxt); + + /* + * Load guest EL1 and user state + * + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_user_state(guest_ctxt); + __sysreg_restore_el1_state(guest_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = true; + + activate_traps_vhe_load(vcpu); +} + +/** + * kvm_vcpu_put_sysregs_vhe - Restore host system registers to the physical CPU + * + * @vcpu: The VCPU pointer + * + * Save guest system registers that do not affect the host's execution, for + * example EL1 system registers on a VHE system where the host kernel + * runs at EL2. This function is called from KVM's vcpu_put() function + * and deferring saving system register state until we're no longer running the + * VCPU avoids having to save them on every exit from the VM. + */ +void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + deactivate_traps_vhe_put(); + + __sysreg_save_el1_state(guest_ctxt); + __sysreg_save_user_state(guest_ctxt); + __sysreg32_save_state(vcpu); + + /* Restore host user state */ + __sysreg_restore_user_state(host_ctxt); + + vcpu->arch.sysregs_loaded_on_cpu = false; +} diff --git a/arch/arm64/kvm/hyp/vhe/timer-sr.c b/arch/arm64/kvm/hyp/vhe/timer-sr.c new file mode 100644 index 000000000000..4cda674a8be6 --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/timer-sr.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <asm/kvm_hyp.h> + +void __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c new file mode 100644 index 000000000000..fd7895945bbc --- /dev/null +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <[email protected]> + */ + +#include <linux/irqflags.h> + +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/tlbflush.h> + +struct tlb_inv_context { + unsigned long flags; + u64 tcr; + u64 sctlr; +}; + +static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) +{ + u64 val; + + local_irq_save(cxt->flags); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* + * For CPUs that are affected by ARM errata 1165522 or 1530923, + * we cannot trust stage-1 to be in a correct state at that + * point. Since we do not want to force a full load of the + * vcpu state, we prevent the EL1 page-table walker to + * allocate new TLBs. This is done by setting the EPD bits + * in the TCR_EL1 register. We also need to prevent it to + * allocate IPA->PA walks, so we enable the S1 MMU... + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + } + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + * + * ARM erratum 1165522 requires some special handling (again), + * as we need to make sure both stages of translation are in + * place before clearing TGE. __load_guest_stage2() already + * has an ISB in order to deal with this. + */ + __load_guest_stage2(mmu); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + isb(); + + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + /* Restore the registers to what they were */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + } + + local_irq_restore(cxt->flags); +} + +void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, int level) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + __tlbi_level(ipas2e1is, ipa, level); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalls12e1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +{ + struct tlb_inv_context cxt; + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __tlbi(vmalle1); + dsb(nsh); + isb(); + + __tlb_switch_to_host(&cxt); +} + +void __kvm_flush_vm_context(void) +{ + dsb(ishst); + __tlbi(alle1is); + + /* + * VIPT and PIPT caches are not affected by VMID, so no maintenance + * is necessary across a VMID rollover. + * + * VPIPT caches constrain lookup and maintenance to the active VMID, + * so we need to invalidate lines with a stale VMID to avoid an ABA + * race after multiple rollovers. + * + */ + if (icache_is_vpipt()) + asm volatile("ic ialluis"); + + dsb(ish); +} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e21fdd93027a..ebfdfc27b2bd 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -64,7 +64,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, case PSR_MODE_EL1h: vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); + vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); break; default: /* Don't do that */ diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 158fbe682611..6a2826f1bf5e 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -146,12 +146,6 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) return -ENOSYS; } - /* Page table accesses IO mem: tell guest to fix its TTBR */ - if (kvm_vcpu_dabt_iss1tw(vcpu)) { - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - /* * Prepare MMIO operation. First decode the syndrome data we get * from the CPU. Then try if some in-kernel emulation feels diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7a7ddc4558a7..0121ef2c7c8d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -55,12 +55,13 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) */ void kvm_flush_remote_tlbs(struct kvm *kvm) { - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); + kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); } -static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, + int level) { - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ipa, level); } /* @@ -90,74 +91,80 @@ static bool kvm_is_device_pfn(unsigned long pfn) /** * stage2_dissolve_pmd() - clear and flush huge PMD entry - * @kvm: pointer to kvm structure. + * @mmu: pointer to mmu structure to operate on * @addr: IPA * @pmd: pmd pointer for IPA * * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. */ -static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) +static void stage2_dissolve_pmd(struct kvm_s2_mmu *mmu, phys_addr_t addr, pmd_t *pmd) { if (!pmd_thp_or_huge(*pmd)) return; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); put_page(virt_to_page(pmd)); } /** * stage2_dissolve_pud() - clear and flush huge PUD entry - * @kvm: pointer to kvm structure. + * @mmu: pointer to mmu structure to operate on * @addr: IPA * @pud: pud pointer for IPA * * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. */ -static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) +static void stage2_dissolve_pud(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t *pudp) { + struct kvm *kvm = mmu->kvm; + if (!stage2_pud_huge(kvm, *pudp)) return; stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); put_page(virt_to_page(pudp)); } -static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +static void clear_stage2_pgd_entry(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL); stage2_pgd_clear(kvm, pgd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_p4d_free(kvm, p4d_table); put_page(virt_to_page(pgd)); } -static void clear_stage2_p4d_entry(struct kvm *kvm, p4d_t *p4d, phys_addr_t addr) +static void clear_stage2_p4d_entry(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, p4d, 0); stage2_p4d_clear(kvm, p4d); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_pud_free(kvm, pud_table); put_page(virt_to_page(p4d)); } -static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +static void clear_stage2_pud_entry(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); stage2_pmd_free(kvm, pmd_table); put_page(virt_to_page(pud)); } -static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +static void clear_stage2_pmd_entry(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr) { pte_t *pte_table = pte_offset_kernel(pmd, 0); VM_BUG_ON(pmd_thp_or_huge(*pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_NO_LEVEL_HINT); free_page((unsigned long)pte_table); put_page(virt_to_page(pmd)); } @@ -223,7 +230,7 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, p4d_t *p4dp) * we then fully enforce cacheability of RAM, no matter what the guest * does. */ -static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, +static void unmap_stage2_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { phys_addr_t start_addr = addr; @@ -235,7 +242,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, pte_t old_pte = *pte; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); /* No need to invalidate the cache for device mappings */ if (!kvm_is_device_pfn(pte_pfn(old_pte))) @@ -245,13 +252,14 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (stage2_pte_table_empty(kvm, start_pte)) - clear_stage2_pmd_entry(kvm, pmd, start_addr); + if (stage2_pte_table_empty(mmu->kvm, start_pte)) + clear_stage2_pmd_entry(mmu, pmd, start_addr); } -static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, +static void unmap_stage2_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; @@ -263,24 +271,25 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, pmd_t old_pmd = *pmd; pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); kvm_flush_dcache_pmd(old_pmd); put_page(virt_to_page(pmd)); } else { - unmap_stage2_ptes(kvm, pmd, addr, next); + unmap_stage2_ptes(mmu, pmd, addr, next); } } } while (pmd++, addr = next, addr != end); if (stage2_pmd_table_empty(kvm, start_pmd)) - clear_stage2_pud_entry(kvm, pud, start_addr); + clear_stage2_pud_entry(mmu, pud, start_addr); } -static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, +static void unmap_stage2_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; @@ -292,22 +301,23 @@ static void unmap_stage2_puds(struct kvm *kvm, p4d_t *p4d, pud_t old_pud = *pud; stage2_pud_clear(kvm, pud); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); } else { - unmap_stage2_pmds(kvm, pud, addr, next); + unmap_stage2_pmds(mmu, pud, addr, next); } } } while (pud++, addr = next, addr != end); if (stage2_pud_table_empty(kvm, start_pud)) - clear_stage2_p4d_entry(kvm, p4d, start_addr); + clear_stage2_p4d_entry(mmu, p4d, start_addr); } -static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, +static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; phys_addr_t next, start_addr = addr; p4d_t *p4d, *start_p4d; @@ -315,11 +325,11 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - unmap_stage2_puds(kvm, p4d, addr, next); + unmap_stage2_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); if (stage2_p4d_table_empty(kvm, start_p4d)) - clear_stage2_pgd_entry(kvm, pgd, start_addr); + clear_stage2_pgd_entry(mmu, pgd, start_addr); } /** @@ -333,8 +343,9 @@ static void unmap_stage2_p4ds(struct kvm *kvm, pgd_t *pgd, * destroying the VM), otherwise another faulting VCPU may come in and mess * with things behind our backs. */ -static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) +static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; phys_addr_t addr = start, end = start + size; phys_addr_t next; @@ -342,18 +353,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { /* * Make sure the page table is still active, as another thread * could have possibly freed the page table, while we released * the lock. */ - if (!READ_ONCE(kvm->arch.pgd)) + if (!READ_ONCE(mmu->pgd)) break; next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) - unmap_stage2_p4ds(kvm, pgd, addr, next); + unmap_stage2_p4ds(mmu, pgd, addr, next); /* * If the range is too large, release the kvm->mmu_lock * to prevent starvation and lockup detector warnings. @@ -363,7 +374,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) } while (pgd++, addr = next, addr != end); } -static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, +static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { pte_t *pte; @@ -375,9 +386,10 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, +static void stage2_flush_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd; phys_addr_t next; @@ -388,14 +400,15 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); else - stage2_flush_ptes(kvm, pmd, addr, next); + stage2_flush_ptes(mmu, pmd, addr, next); } } while (pmd++, addr = next, addr != end); } -static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, +static void stage2_flush_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pud_t *pud; phys_addr_t next; @@ -406,14 +419,15 @@ static void stage2_flush_puds(struct kvm *kvm, p4d_t *p4d, if (stage2_pud_huge(kvm, *pud)) kvm_flush_dcache_pud(*pud); else - stage2_flush_pmds(kvm, pud, addr, next); + stage2_flush_pmds(mmu, pud, addr, next); } } while (pud++, addr = next, addr != end); } -static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, +static void stage2_flush_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; phys_addr_t next; @@ -421,23 +435,24 @@ static void stage2_flush_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - stage2_flush_puds(kvm, p4d, addr, next); + stage2_flush_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); } static void stage2_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = addr + PAGE_SIZE * memslot->npages; phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) - stage2_flush_p4ds(kvm, pgd, addr, next); + stage2_flush_p4ds(mmu, pgd, addr, next); if (next != end) cond_resched_lock(&kvm->mmu_lock); @@ -964,21 +979,23 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, } /** - * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. - * @kvm: The KVM struct pointer for the VM. + * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure + * @kvm: The pointer to the KVM structure + * @mmu: The pointer to the s2 MMU structure * * Allocates only the stage-2 HW PGD level table(s) of size defined by - * stage2_pgd_size(kvm). + * stage2_pgd_size(mmu->kvm). * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ -int kvm_alloc_stage2_pgd(struct kvm *kvm) +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) { phys_addr_t pgd_phys; pgd_t *pgd; + int cpu; - if (kvm->arch.pgd != NULL) { + if (mmu->pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } @@ -992,8 +1009,20 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) return -EINVAL; - kvm->arch.pgd = pgd; - kvm->arch.pgd_phys = pgd_phys; + mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); + if (!mmu->last_vcpu_ran) { + free_pages_exact(pgd, stage2_pgd_size(kvm)); + return -ENOMEM; + } + + for_each_possible_cpu(cpu) + *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; + + mmu->kvm = kvm; + mmu->pgd = pgd; + mmu->pgd_phys = pgd_phys; + mmu->vmid.vmid_gen = 0; + return 0; } @@ -1032,7 +1061,7 @@ static void stage2_unmap_memslot(struct kvm *kvm, if (!(vma->vm_flags & VM_PFNMAP)) { gpa_t gpa = addr + (vm_start - memslot->userspace_addr); - unmap_stage2_range(kvm, gpa, vm_end - vm_start); + unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); } hva = vm_end; } while (hva < reg_end); @@ -1064,39 +1093,34 @@ void stage2_unmap_vm(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } -/** - * kvm_free_stage2_pgd - free all stage-2 tables - * @kvm: The KVM struct pointer for the VM. - * - * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all - * underlying level-2 and level-3 tables before freeing the actual level-1 table - * and setting the struct pointer to NULL. - */ -void kvm_free_stage2_pgd(struct kvm *kvm) +void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) { + struct kvm *kvm = mmu->kvm; void *pgd = NULL; spin_lock(&kvm->mmu_lock); - if (kvm->arch.pgd) { - unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); - pgd = READ_ONCE(kvm->arch.pgd); - kvm->arch.pgd = NULL; - kvm->arch.pgd_phys = 0; + if (mmu->pgd) { + unmap_stage2_range(mmu, 0, kvm_phys_size(kvm)); + pgd = READ_ONCE(mmu->pgd); + mmu->pgd = NULL; } spin_unlock(&kvm->mmu_lock); /* Free the HW pgd, one page at a time */ - if (pgd) + if (pgd) { free_pages_exact(pgd, stage2_pgd_size(kvm)); + free_percpu(mmu->last_vcpu_ran); + } } -static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static p4d_t *stage2_get_p4d(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; p4d_t *p4d; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); if (stage2_pgd_none(kvm, *pgd)) { if (!cache) return NULL; @@ -1108,13 +1132,14 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_p4d_offset(kvm, pgd, addr); } -static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; pud_t *pud; - p4d = stage2_get_p4d(kvm, cache, addr); + p4d = stage2_get_p4d(mmu, cache, addr); if (stage2_p4d_none(kvm, *p4d)) { if (!cache) return NULL; @@ -1126,13 +1151,14 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_pud_offset(kvm, p4d, addr); } -static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pmd_t *stage2_get_pmd(struct kvm_s2_mmu *mmu, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { + struct kvm *kvm = mmu->kvm; pud_t *pud; pmd_t *pmd; - pud = stage2_get_pud(kvm, cache, addr); + pud = stage2_get_pud(mmu, cache, addr); if (!pud || stage2_pud_huge(kvm, *pud)) return NULL; @@ -1147,13 +1173,14 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache return stage2_pmd_offset(kvm, pud, addr); } -static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache - *cache, phys_addr_t addr, const pmd_t *new_pmd) +static int stage2_set_pmd_huge(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pmd_t *new_pmd) { pmd_t *pmd, old_pmd; retry: - pmd = stage2_get_pmd(kvm, cache, addr); + pmd = stage2_get_pmd(mmu, cache, addr); VM_BUG_ON(!pmd); old_pmd = *pmd; @@ -1186,7 +1213,7 @@ retry: * get handled accordingly. */ if (!pmd_thp_or_huge(old_pmd)) { - unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); + unmap_stage2_range(mmu, addr & S2_PMD_MASK, S2_PMD_SIZE); goto retry; } /* @@ -1202,7 +1229,7 @@ retry: */ WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PMD_LEVEL); } else { get_page(virt_to_page(pmd)); } @@ -1211,13 +1238,15 @@ retry: return 0; } -static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static int stage2_set_pud_huge(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pud_t *new_pudp) { + struct kvm *kvm = mmu->kvm; pud_t *pudp, old_pud; retry: - pudp = stage2_get_pud(kvm, cache, addr); + pudp = stage2_get_pud(mmu, cache, addr); VM_BUG_ON(!pudp); old_pud = *pudp; @@ -1236,13 +1265,13 @@ retry: * the range for this block and retry. */ if (!stage2_pud_huge(kvm, old_pud)) { - unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); + unmap_stage2_range(mmu, addr & S2_PUD_MASK, S2_PUD_SIZE); goto retry; } WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); stage2_pud_clear(kvm, pudp); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PUD_LEVEL); } else { get_page(virt_to_page(pudp)); } @@ -1257,9 +1286,10 @@ retry: * leaf-entry is returned in the appropriate level variable - pudpp, * pmdpp, ptepp. */ -static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, +static bool stage2_get_leaf_entry(struct kvm_s2_mmu *mmu, phys_addr_t addr, pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp) { + struct kvm *kvm = mmu->kvm; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -1268,7 +1298,7 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, *pmdpp = NULL; *ptepp = NULL; - pudp = stage2_get_pud(kvm, NULL, addr); + pudp = stage2_get_pud(mmu, NULL, addr); if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp)) return false; @@ -1294,14 +1324,14 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, return true; } -static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz) +static bool stage2_is_exec(struct kvm_s2_mmu *mmu, phys_addr_t addr, unsigned long sz) { pud_t *pudp; pmd_t *pmdp; pte_t *ptep; bool found; - found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep); + found = stage2_get_leaf_entry(mmu, addr, &pudp, &pmdp, &ptep); if (!found) return false; @@ -1313,10 +1343,12 @@ static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr, unsigned long sz) return sz == PAGE_SIZE && kvm_s2pte_exec(ptep); } -static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static int stage2_set_pte(struct kvm_s2_mmu *mmu, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, unsigned long flags) { + struct kvm *kvm = mmu->kvm; pud_t *pud; pmd_t *pmd; pte_t *pte, old_pte; @@ -1326,7 +1358,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, VM_BUG_ON(logging_active && !cache); /* Create stage-2 page table mapping - Levels 0 and 1 */ - pud = stage2_get_pud(kvm, cache, addr); + pud = stage2_get_pud(mmu, cache, addr); if (!pud) { /* * Ignore calls from kvm_set_spte_hva for unallocated @@ -1340,7 +1372,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * on to allocate page. */ if (logging_active) - stage2_dissolve_pud(kvm, addr, pud); + stage2_dissolve_pud(mmu, addr, pud); if (stage2_pud_none(kvm, *pud)) { if (!cache) @@ -1364,7 +1396,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * allocate page. */ if (logging_active) - stage2_dissolve_pmd(kvm, addr, pmd); + stage2_dissolve_pmd(mmu, addr, pmd); /* Create stage-2 page mappings - Level 2 */ if (pmd_none(*pmd)) { @@ -1388,7 +1420,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; kvm_set_pte(pte, __pte(0)); - kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_tlb_flush_vmid_ipa(mmu, addr, S2_PTE_LEVEL); } else { get_page(virt_to_page(pte)); } @@ -1453,8 +1485,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (ret) goto out; spin_lock(&kvm->mmu_lock); - ret = stage2_set_pte(kvm, &cache, addr, &pte, - KVM_S2PTE_FLAG_IS_IOMAP); + ret = stage2_set_pte(&kvm->arch.mmu, &cache, addr, &pte, + KVM_S2PTE_FLAG_IS_IOMAP); spin_unlock(&kvm->mmu_lock); if (ret) goto out; @@ -1493,9 +1525,10 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) * @addr: range start address * @end: range end address */ -static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, +static void stage2_wp_pmds(struct kvm_s2_mmu *mmu, pud_t *pud, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pmd_t *pmd; phys_addr_t next; @@ -1516,13 +1549,14 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, /** * stage2_wp_puds - write protect P4D range - * @pgd: pointer to pgd entry + * @p4d: pointer to p4d entry * @addr: range start address * @end: range end address */ -static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, +static void stage2_wp_puds(struct kvm_s2_mmu *mmu, p4d_t *p4d, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pud_t *pud; phys_addr_t next; @@ -1534,7 +1568,7 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, if (!kvm_s2pud_readonly(pud)) kvm_set_s2pud_readonly(pud); } else { - stage2_wp_pmds(kvm, pud, addr, next); + stage2_wp_pmds(mmu, pud, addr, next); } } } while (pud++, addr = next, addr != end); @@ -1546,9 +1580,10 @@ static void stage2_wp_puds(struct kvm *kvm, p4d_t *p4d, * @addr: range start address * @end: range end address */ -static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, +static void stage2_wp_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; p4d_t *p4d; phys_addr_t next; @@ -1556,7 +1591,7 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, do { next = stage2_p4d_addr_end(kvm, addr, end); if (!stage2_p4d_none(kvm, *p4d)) - stage2_wp_puds(kvm, p4d, addr, next); + stage2_wp_puds(mmu, p4d, addr, next); } while (p4d++, addr = next, addr != end); } @@ -1566,12 +1601,13 @@ static void stage2_wp_p4ds(struct kvm *kvm, pgd_t *pgd, * @addr: Start address of range * @end: End address of range */ -static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { + struct kvm *kvm = mmu->kvm; pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + pgd = mmu->pgd + stage2_pgd_index(kvm, addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1583,11 +1619,11 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) * the lock. */ cond_resched_lock(&kvm->mmu_lock); - if (!READ_ONCE(kvm->arch.pgd)) + if (!READ_ONCE(mmu->pgd)) break; next = stage2_pgd_addr_end(kvm, addr, end); if (stage2_pgd_present(kvm, *pgd)) - stage2_wp_p4ds(kvm, pgd, addr, next); + stage2_wp_p4ds(mmu, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -1617,7 +1653,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - stage2_wp_range(kvm, start, end); + stage2_wp_range(&kvm->arch.mmu, start, end); spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); } @@ -1641,7 +1677,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; - stage2_wp_range(kvm, start, end); + stage2_wp_range(&kvm->arch.mmu, start, end); } /* @@ -1804,6 +1840,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pgprot_t mem_type = PAGE_S2; bool logging_active = memslot_is_logging(memslot); unsigned long vma_pagesize, flags = 0; + struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu; write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_iabt(vcpu); @@ -1925,7 +1962,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ needs_exec = exec_fault || (fault_status == FSC_PERM && - stage2_is_exec(kvm, fault_ipa, vma_pagesize)); + stage2_is_exec(mmu, fault_ipa, vma_pagesize)); if (vma_pagesize == PUD_SIZE) { pud_t new_pud = kvm_pfn_pud(pfn, mem_type); @@ -1937,7 +1974,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pud = kvm_s2pud_mkexec(new_pud); - ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud); + ret = stage2_set_pud_huge(mmu, memcache, fault_ipa, &new_pud); } else if (vma_pagesize == PMD_SIZE) { pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type); @@ -1949,7 +1986,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pmd = kvm_s2pmd_mkexec(new_pmd); - ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + ret = stage2_set_pmd_huge(mmu, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = kvm_pfn_pte(pfn, mem_type); @@ -1961,7 +1998,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (needs_exec) new_pte = kvm_s2pte_mkexec(new_pte); - ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); + ret = stage2_set_pte(mmu, memcache, fault_ipa, &new_pte, flags); } out_unlock: @@ -1990,7 +2027,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) spin_lock(&vcpu->kvm->mmu_lock); - if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(vcpu->arch.hw_mmu, fault_ipa, &pud, &pmd, &pte)) goto out; if (pud) { /* HugeTLB */ @@ -2040,21 +2077,18 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) is_iabt = kvm_vcpu_trap_is_iabt(vcpu); /* Synchronous External Abort? */ - if (kvm_vcpu_dabt_isextabt(vcpu)) { + if (kvm_vcpu_abt_issea(vcpu)) { /* * For RAS the host kernel may handle this abort. * There is no need to pass the error into the guest. */ - if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) - return 1; - - if (unlikely(!is_iabt)) { + if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) kvm_inject_vabt(vcpu); - return 1; - } + + return 1; } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ @@ -2063,7 +2097,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), - (unsigned long)kvm_vcpu_get_hsr(vcpu)); + (unsigned long)kvm_vcpu_get_esr(vcpu)); return -EFAULT; } @@ -2074,12 +2108,23 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); write_fault = kvm_is_write_fault(vcpu); if (kvm_is_error_hva(hva) || (write_fault && !writable)) { + /* + * The guest has put either its instructions or its page-tables + * somewhere it shouldn't have. Userspace won't be able to do + * anything about this (there's no syndrome for a start), so + * re-inject the abort back into the guest. + */ if (is_iabt) { - /* Prefetch Abort on I/O address */ ret = -ENOEXEC; goto out; } + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + goto out_unlock; + } + /* * Check for a cache maintenance operation. Since we * ended-up here, we know it is outside of any memory @@ -2090,7 +2135,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * So let's assume that the guest is just being * cautious, and skip the instruction. */ - if (kvm_vcpu_dabt_is_cm(vcpu)) { + if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); ret = 1; goto out_unlock; @@ -2163,14 +2208,14 @@ static int handle_hva_to_gpa(struct kvm *kvm, static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { - unmap_stage2_range(kvm, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size); return 0; } int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_unmap_hva_range(start, end); @@ -2190,7 +2235,7 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data * therefore stage2_set_pte() never needs to clear out a huge PMD * through this calling path. */ - stage2_set_pte(kvm, NULL, gpa, pte, 0); + stage2_set_pte(&kvm->arch.mmu, NULL, gpa, pte, 0); return 0; } @@ -2201,7 +2246,7 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) kvm_pfn_t pfn = pte_pfn(pte); pte_t stage2_pte; - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_set_spte_hva(hva); @@ -2224,7 +2269,7 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) pte_t *pte; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) return 0; if (pud) @@ -2242,7 +2287,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void * pte_t *pte; WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); - if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + if (!stage2_get_leaf_entry(&kvm->arch.mmu, gpa, &pud, &pmd, &pte)) return 0; if (pud) @@ -2255,7 +2300,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void * int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_age_hva(start, end); return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); @@ -2263,7 +2308,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { - if (!kvm->arch.pgd) + if (!kvm->arch.mmu.pgd) return 0; trace_kvm_test_age_hva(hva); return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, @@ -2476,7 +2521,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (ret) - unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); + unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); else stage2_flush_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); @@ -2495,7 +2540,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_free_stage2_pgd(kvm); + kvm_free_stage2_pgd(&kvm->arch.mmu); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, @@ -2505,7 +2550,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); - unmap_stage2_range(kvm, gpa, size); + unmap_stage2_range(&kvm->arch.mmu, gpa, size); spin_unlock(&kvm->mmu_lock); } diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index a900181e3867..accc1d5fba61 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -100,7 +100,7 @@ static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { */ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) { - unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs; unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; switch (mode) { @@ -147,8 +147,20 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) { int spsr_idx = vcpu_spsr32_mode(vcpu); - if (!vcpu->arch.sysregs_loaded_on_cpu) - return vcpu_gp_regs(vcpu)->spsr[spsr_idx]; + if (!vcpu->arch.sysregs_loaded_on_cpu) { + switch (spsr_idx) { + case KVM_SPSR_SVC: + return __vcpu_sys_reg(vcpu, SPSR_EL1); + case KVM_SPSR_ABT: + return vcpu->arch.ctxt.spsr_abt; + case KVM_SPSR_UND: + return vcpu->arch.ctxt.spsr_und; + case KVM_SPSR_IRQ: + return vcpu->arch.ctxt.spsr_irq; + case KVM_SPSR_FIQ: + return vcpu->arch.ctxt.spsr_fiq; + } + } switch (spsr_idx) { case KVM_SPSR_SVC: @@ -171,7 +183,24 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) int spsr_idx = vcpu_spsr32_mode(vcpu); if (!vcpu->arch.sysregs_loaded_on_cpu) { - vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v; + switch (spsr_idx) { + case KVM_SPSR_SVC: + __vcpu_sys_reg(vcpu, SPSR_EL1) = v; + break; + case KVM_SPSR_ABT: + vcpu->arch.ctxt.spsr_abt = v; + break; + case KVM_SPSR_UND: + vcpu->arch.ctxt.spsr_und = v; + break; + case KVM_SPSR_IRQ: + vcpu->arch.ctxt.spsr_irq = v; + break; + case KVM_SPSR_FIQ: + vcpu->arch.ctxt.spsr_fiq = v; + break; + } + return; } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 6ed36be51b4b..ee33875c5c2a 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -42,6 +42,11 @@ static u32 kvm_ipa_limit; #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) +static bool system_has_full_ptr_auth(void) +{ + return system_supports_address_auth() && system_supports_generic_auth(); +} + /** * kvm_arch_vm_ioctl_check_extension * @@ -80,8 +85,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_ARM_PTRAUTH_ADDRESS: case KVM_CAP_ARM_PTRAUTH_GENERIC: - r = has_vhe() && system_supports_address_auth() && - system_supports_generic_auth(); + r = system_has_full_ptr_auth(); break; default: r = 0; @@ -205,19 +209,14 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) { - /* Support ptrauth only if the system supports these capabilities. */ - if (!has_vhe()) - return -EINVAL; - - if (!system_supports_address_auth() || - !system_supports_generic_auth()) - return -EINVAL; /* * For now make sure that both address/generic pointer authentication - * features are requested by the userspace together. + * features are requested by the userspace together and the system + * supports these capabilities. */ if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || - !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) + !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) || + !system_has_full_ptr_auth()) return -EINVAL; vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; @@ -292,7 +291,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset core registers */ memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); - vcpu_gp_regs(vcpu)->regs.pstate = pstate; + vcpu_gp_regs(vcpu)->pstate = pstate; /* Reset system registers */ kvm_reset_sys_regs(vcpu); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 138961d7ebe3..077293b5115f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -94,6 +94,7 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; @@ -133,6 +134,7 @@ static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; @@ -242,6 +244,25 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } +static bool access_actlr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); + + if (p->is_aarch32) { + if (r->Op2 & 2) + p->regval = upper_32_bits(p->regval); + else + p->regval = lower_32_bits(p->regval); + } + + return true; +} + /* * Trap handler for the GICv3 SGI generation system register. * Forward the request to the VGIC emulation. @@ -615,6 +636,12 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1); } +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 actlr = read_sysreg(actlr_el1); + vcpu_write_sys_reg(vcpu, actlr, ACTLR_EL1); +} + static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 mpidr; @@ -1518,6 +1545,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(7,7), { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, + { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, @@ -1957,6 +1985,8 @@ static const struct sys_reg_desc cp14_64_regs[] = { static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, @@ -2109,36 +2139,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, return 0; } -/* Target specific emulation tables */ -static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; - -void kvm_register_target_sys_reg_table(unsigned int target, - struct kvm_sys_reg_target_table *table) -{ - if (check_sysreg_table(table->table64.table, table->table64.num, false) || - check_sysreg_table(table->table32.table, table->table32.num, true)) - return; - - target_tables[target] = table; -} - -/* Get specific register table for this target. */ -static const struct sys_reg_desc *get_target_table(unsigned target, - bool mode_is_64, - size_t *num) -{ - struct kvm_sys_reg_target_table *table; - - table = target_tables[target]; - if (mode_is_64) { - *num = table->table64.num; - return table->table64.table; - } else { - *num = table->table32.num; - return table->table32.table; - } -} - static int match_sys_reg(const void *key, const void *elt) { const unsigned long pval = (unsigned long)key; @@ -2220,10 +2220,10 @@ static int emulate_cp(struct kvm_vcpu *vcpu, static void unhandled_cp_access(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { - u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); int cp = -1; - switch(hsr_ec) { + switch (esr_ec) { case ESR_ELx_EC_CP15_32: case ESR_ELx_EC_CP15_64: cp = 15; @@ -2249,22 +2249,20 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, */ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, const struct sys_reg_desc *global, - size_t nr_global, - const struct sys_reg_desc *target_specific, - size_t nr_specific) + size_t nr_global) { struct sys_reg_params params; - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - int Rt2 = (hsr >> 10) & 0x1f; + int Rt2 = (esr >> 10) & 0x1f; params.is_aarch32 = true; params.is_32bit = false; - params.CRm = (hsr >> 1) & 0xf; - params.is_write = ((hsr & 1) == 0); + params.CRm = (esr >> 1) & 0xf; + params.is_write = ((esr & 1) == 0); params.Op0 = 0; - params.Op1 = (hsr >> 16) & 0xf; + params.Op1 = (esr >> 16) & 0xf; params.Op2 = 0; params.CRn = 0; @@ -2278,14 +2276,11 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, } /* - * Try to emulate the coprocessor access using the target - * specific table first, and using the global table afterwards. - * If either of the tables contains a handler, handle the + * If the table contains a handler, handle the * potential register operation in the case of a read and return * with success. */ - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || - !emulate_cp(vcpu, ¶ms, global, nr_global)) { + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) { /* Split up the value between registers for the read side */ if (!params.is_write) { vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); @@ -2306,26 +2301,23 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, */ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, const struct sys_reg_desc *global, - size_t nr_global, - const struct sys_reg_desc *target_specific, - size_t nr_specific) + size_t nr_global) { struct sys_reg_params params; - u32 hsr = kvm_vcpu_get_hsr(vcpu); + u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); params.is_aarch32 = true; params.is_32bit = true; - params.CRm = (hsr >> 1) & 0xf; + params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); - params.is_write = ((hsr & 1) == 0); - params.CRn = (hsr >> 10) & 0xf; + params.is_write = ((esr & 1) == 0); + params.CRn = (esr >> 10) & 0xf; params.Op0 = 0; - params.Op1 = (hsr >> 14) & 0x7; - params.Op2 = (hsr >> 17) & 0x7; + params.Op1 = (esr >> 14) & 0x7; + params.Op2 = (esr >> 17) & 0x7; - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || - !emulate_cp(vcpu, ¶ms, global, nr_global)) { + if (!emulate_cp(vcpu, ¶ms, global, nr_global)) { if (!params.is_write) vcpu_set_reg(vcpu, Rt, params.regval); return 1; @@ -2337,38 +2329,22 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, int kvm_handle_cp15_64(struct kvm_vcpu *vcpu) { - const struct sys_reg_desc *target_specific; - size_t num; - - target_specific = get_target_table(vcpu->arch.target, false, &num); - return kvm_handle_cp_64(vcpu, - cp15_64_regs, ARRAY_SIZE(cp15_64_regs), - target_specific, num); + return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs)); } int kvm_handle_cp15_32(struct kvm_vcpu *vcpu) { - const struct sys_reg_desc *target_specific; - size_t num; - - target_specific = get_target_table(vcpu->arch.target, false, &num); - return kvm_handle_cp_32(vcpu, - cp15_regs, ARRAY_SIZE(cp15_regs), - target_specific, num); + return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs)); } int kvm_handle_cp14_64(struct kvm_vcpu *vcpu) { - return kvm_handle_cp_64(vcpu, - cp14_64_regs, ARRAY_SIZE(cp14_64_regs), - NULL, 0); + return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs)); } int kvm_handle_cp14_32(struct kvm_vcpu *vcpu) { - return kvm_handle_cp_32(vcpu, - cp14_regs, ARRAY_SIZE(cp14_regs), - NULL, 0); + return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs)); } static bool is_imp_def_sys_reg(struct sys_reg_params *params) @@ -2380,15 +2356,9 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params) static int emulate_sys_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { - size_t num; - const struct sys_reg_desc *table, *r; + const struct sys_reg_desc *r; - table = get_target_table(vcpu->arch.target, true, &num); - - /* Search target-specific then generic table. */ - r = find_reg(params, table, num); - if (!r) - r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); if (likely(r)) { perform_access(vcpu, params, r); @@ -2403,14 +2373,20 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, return 1; } -static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *table, size_t num) +/** + * kvm_reset_sys_regs - sets system registers to reset value + * @vcpu: The VCPU pointer + * + * This function finds the right table above and sets the registers on the + * virtual CPU struct to their architecturally defined reset values. + */ +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { unsigned long i; - for (i = 0; i < num; i++) - if (table[i].reset) - table[i].reset(vcpu, &table[i]); + for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) + if (sys_reg_descs[i].reset) + sys_reg_descs[i].reset(vcpu, &sys_reg_descs[i]); } /** @@ -2420,7 +2396,7 @@ static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) { struct sys_reg_params params; - unsigned long esr = kvm_vcpu_get_hsr(vcpu); + unsigned long esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); int ret; @@ -2491,8 +2467,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id) { - size_t num; - const struct sys_reg_desc *table, *r; + const struct sys_reg_desc *r; struct sys_reg_params params; /* We only do sys_reg for now. */ @@ -2502,10 +2477,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if (!index_to_params(id, ¶ms)) return NULL; - table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg(¶ms, table, num); - if (!r) - r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); /* Not saved in the sys_reg array and not otherwise accessible? */ if (r && !(r->reg || r->get_user)) @@ -2805,35 +2777,17 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, /* Assumed ordered tables, see kvm_sys_reg_table_init. */ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) { - const struct sys_reg_desc *i1, *i2, *end1, *end2; + const struct sys_reg_desc *i2, *end2; unsigned int total = 0; - size_t num; int err; - /* We check for duplicates here, to allow arch-specific overrides. */ - i1 = get_target_table(vcpu->arch.target, true, &num); - end1 = i1 + num; i2 = sys_reg_descs; end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs); - BUG_ON(i1 == end1 || i2 == end2); - - /* Walk carefully, as both tables may refer to the same register. */ - while (i1 || i2) { - int cmp = cmp_sys_reg(i1, i2); - /* target-specific overrides generic entry. */ - if (cmp <= 0) - err = walk_one_sys_reg(vcpu, i1, &uind, &total); - else - err = walk_one_sys_reg(vcpu, i2, &uind, &total); - + while (i2 != end2) { + err = walk_one_sys_reg(vcpu, i2++, &uind, &total); if (err) return err; - - if (cmp <= 0 && ++i1 == end1) - i1 = NULL; - if (cmp >= 0 && ++i2 == end2) - i2 = NULL; } return total; } @@ -2900,22 +2854,3 @@ void kvm_sys_reg_table_init(void) /* Clear all higher bits. */ cache_levels &= (1 << (i*3))-1; } - -/** - * kvm_reset_sys_regs - sets system registers to reset value - * @vcpu: The VCPU pointer - * - * This function finds the right table above and sets the registers on the - * virtual CPU struct to their architecturally defined reset values. - */ -void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) -{ - size_t num; - const struct sys_reg_desc *table; - - /* Generic chip reset first (so target could override). */ - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); - - table = get_target_table(vcpu->arch.target, true, &num); - reset_sys_reg_descs(vcpu, table, num); -} diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c deleted file mode 100644 index aa9d356451eb..000000000000 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <[email protected]> - * - * Based on arch/arm/kvm/coproc_a15.c: - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Authors: Rusty Russell <[email protected]> - * Christoffer Dall <[email protected]> - */ -#include <linux/kvm_host.h> -#include <asm/cputype.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> -#include <asm/sysreg.h> -#include <linux/init.h> - -#include "sys_regs.h" - -static bool access_actlr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) - return ignore_write(vcpu, p); - - p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); - - if (p->is_aarch32) { - if (r->Op2 & 2) - p->regval = upper_32_bits(p->regval); - else - p->regval = lower_32_bits(p->regval); - } - - return true; -} - -static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) -{ - __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); -} - -/* - * Implementation specific sys-reg registers. - * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 - */ -static const struct sys_reg_desc genericv8_sys_regs[] = { - { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, -}; - -static const struct sys_reg_desc genericv8_cp15_regs[] = { - /* ACTLR */ - { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), - access_actlr }, - { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b011), - access_actlr }, -}; - -static struct kvm_sys_reg_target_table genericv8_target_table = { - .table64 = { - .table = genericv8_sys_regs, - .num = ARRAY_SIZE(genericv8_sys_regs), - }, - .table32 = { - .table = genericv8_cp15_regs, - .num = ARRAY_SIZE(genericv8_cp15_regs), - }, -}; - -static int __init sys_reg_genericv8_init(void) -{ - unsigned int i; - - for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++) - BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1], - &genericv8_sys_regs[i]) >= 0); - - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, - &genericv8_target_table); - kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8, - &genericv8_target_table); - - return 0; -} -late_initcall(sys_reg_genericv8_init); diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 4c71270cc097..4691053c5ee4 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -301,8 +301,8 @@ TRACE_EVENT(kvm_timer_save_state, ), TP_fast_assign( - __entry->ctl = ctx->cnt_ctl; - __entry->cval = ctx->cnt_cval; + __entry->ctl = timer_get_ctl(ctx); + __entry->cval = timer_get_cval(ctx); __entry->timer_idx = arch_timer_ctx_index(ctx); ), @@ -323,8 +323,8 @@ TRACE_EVENT(kvm_timer_restore_state, ), TP_fast_assign( - __entry->ctl = ctx->cnt_ctl; - __entry->cval = ctx->cnt_cval; + __entry->ctl = timer_get_ctl(ctx); + __entry->cval = timer_get_cval(ctx); __entry->timer_idx = arch_timer_ctx_index(ctx); ), diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index a4f48c1ac28c..e0404bcab019 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -48,7 +48,7 @@ __init void kvm_compute_layout(void) va_mask = GENMASK_ULL(tag_lsb - 1, 0); tag_val = hyp_va_msb; - if (tag_lsb != (vabits_actual - 1)) { + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && tag_lsb != (vabits_actual - 1)) { /* We have some free bits to insert a random tag. */ tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index d8cdfea5cc96..79f8899b234c 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -100,19 +100,33 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, /** * kvm_arch_set_irq_inatomic: fast-path for irqfd injection - * - * Currently only direct MSI injection is supported. */ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { - if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) { + if (!level) + return -EWOULDBLOCK; + + switch (e->type) { + case KVM_IRQ_ROUTING_MSI: { struct kvm_msi msi; + if (!vgic_has_its(kvm)) + break; + kvm_populate_msi(e, &msi); - if (!vgic_its_inject_cached_translation(kvm, &msi)) - return 0; + return vgic_its_inject_cached_translation(kvm, &msi); + } + + case KVM_IRQ_ROUTING_IRQCHIP: + /* + * Injecting SPIs is always possible in atomic context + * as long as the damn vgic is initialized. + */ + if (unlikely(!vgic_initialized(kvm))) + break; + return vgic_irqfd_set_irq(e, kvm, irq_source_id, 1, line_status); } return -EWOULDBLOCK; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index c012a52b19f5..40cbaca81333 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -757,9 +757,8 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) db = (u64)msi->address_hi << 32 | msi->address_lo; irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); - if (!irq) - return -1; + return -EWOULDBLOCK; raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index d2339a2b9fb9..5c786b915cd3 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -389,7 +389,7 @@ u64 vgic_sanitise_outer_cacheability(u64 field) case GIC_BASER_CACHE_nC: return field; default: - return GIC_BASER_CACHE_nC; + return GIC_BASER_CACHE_SameAsInner; } } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 8afb238ff335..f07333e86c2f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -404,7 +404,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags) + unsigned int mm_flags, unsigned long vm_flags, + struct pt_regs *regs) { struct vm_area_struct *vma = find_vma(mm, addr); @@ -428,7 +429,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, */ if (!(vma->vm_flags & vm_flags)) return VM_FAULT_BADACCESS; - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs); } static bool is_el0_instruction_abort(unsigned int esr) @@ -450,7 +451,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { const struct fault_info *inf; struct mm_struct *mm = current->mm; - vm_fault_t fault, major = 0; + vm_fault_t fault; unsigned long vm_flags = VM_ACCESS_FLAGS; unsigned int mm_flags = FAULT_FLAG_DEFAULT; @@ -516,8 +517,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags); - major |= fault & VM_FAULT_MAJOR; + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -538,25 +538,8 @@ retry: * Handle the "normal" (no error) case first. */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) { - /* - * Major/minor page fault accounting is only done - * once. If we go through a retry, it is extremely - * likely that the page will be found in page cache at - * that point. - */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - addr); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - addr); - } - + VM_FAULT_BADACCESS)))) return 0; - } /* * If we are in kernel mode at this point, we have no context to diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index aafcee3e3f7e..73f8b49d485c 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -461,13 +461,3 @@ void __init arm64_numa_init(void) numa_init(dummy_numa_init); } - -/* - * We hope that we will be hotplugging memory on nodes we already know about, - * such that acpi_get_node() succeeds and we never fall back to this... - */ -int memory_add_physaddr_to_nid(u64 addr) -{ - pr_warn("Unknown node for memory at 0x%llx, assuming node 0\n", addr); - return 0; -} diff --git a/arch/csky/include/asm/segment.h b/arch/csky/include/asm/segment.h index db2640d5f575..79ede9b1a646 100644 --- a/arch/csky/include/asm/segment.h +++ b/arch/csky/include/asm/segment.h @@ -13,6 +13,6 @@ typedef struct { #define USER_DS ((mm_segment_t) { 0x80000000UL }) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASM_CSKY_SEGMENT_H */ diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index b1dce9f2f04d..081b178b41b1 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -150,7 +150,8 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0, + regs); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -160,16 +161,6 @@ good_area: goto bad_area; BUG(); } - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - address); - } - mmap_read_unlock(mm); return; diff --git a/arch/h8300/include/asm/segment.h b/arch/h8300/include/asm/segment.h index a407978f9f9f..37950725d9b9 100644 --- a/arch/h8300/include/asm/segment.h +++ b/arch/h8300/include/asm/segment.h @@ -33,7 +33,7 @@ static inline mm_segment_t get_fs(void) return USER_DS; } -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASSEMBLY__ */ diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index cd3808f96b93..ef32c5a84ff3 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -18,6 +18,7 @@ #include <linux/signal.h> #include <linux/extable.h> #include <linux/hardirq.h> +#include <linux/perf_event.h> /* * Decode of hardware exception sends us to one of several @@ -53,6 +54,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -88,7 +91,7 @@ good_area: break; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -96,10 +99,6 @@ good_area: /* The most common case -- we are done. */ if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 8aa473a4b0f4..179243c3dfc7 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -50,7 +50,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * When accessing user memory, we need to make sure the entire area really is in diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3a4dec334cc5..cd9766d2b6e0 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <linux/prefetch.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/processor.h> #include <asm/exception.h> @@ -105,6 +106,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re flags |= FAULT_FLAG_USER; if (mask & VM_WRITE) flags |= FAULT_FLAG_WRITE; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -143,7 +146,7 @@ retry: * sure we exit gracefully rather than endlessly redo the * fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -166,10 +169,6 @@ retry: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 5e1015eb6d0d..f34964271101 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -106,7 +106,5 @@ int memory_add_physaddr_to_nid(u64 addr) return 0; return nid; } - -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h index c6686559e9b7..2b5e68a71ef7 100644 --- a/arch/m68k/include/asm/segment.h +++ b/arch/m68k/include/asm/segment.h @@ -52,7 +52,7 @@ static inline void set_fs(mm_segment_t val) #define set_fs(x) (current_thread_info()->addr_limit = (x)) #endif -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif /* __ASSEMBLY__ */ diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h index 191e75a6bb24..5337bc2c262f 100644 --- a/arch/m68k/include/asm/tlbflush.h +++ b/arch/m68k/include/asm/tlbflush.h @@ -85,10 +85,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { if (vma->vm_mm == current->active_mm) { - mm_segment_t old_fs = get_fs(); - set_fs(USER_DS); + mm_segment_t old_fs = force_uaccess_begin(); + __flush_tlb_one(addr); - set_fs(old_fs); + force_uaccess_end(old_fs); } } diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 508abb63da67..795f483b1050 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -12,6 +12,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/setup.h> #include <asm/traps.h> @@ -84,6 +85,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); @@ -134,7 +137,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); pr_debug("handle_mm_fault returns %x\n", fault); if (fault_signal_pending(fault, regs)) @@ -150,16 +153,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 6723c56ec378..304b04ffea2f 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -41,7 +41,7 @@ # define get_fs() (current_thread_info()->addr_limit) # define set_fs(val) (current_thread_info()->addr_limit = (val)) -# define segment_eq(a, b) ((a).seg == (b).seg) +# define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #ifndef CONFIG_MMU diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index a2bfe587b491..b3fed2cecf84 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -28,6 +28,7 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/interrupt.h> +#include <linux/perf_event.h> #include <asm/page.h> #include <asm/mmu.h> @@ -121,6 +122,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -214,7 +217,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -230,10 +233,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (unlikely(fault & VM_FAULT_MAJOR)) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 62b298c50905..61fc01f177a6 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -72,7 +72,7 @@ extern u64 __ua_limit; #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* * eva_kernel_access() - determine whether kernel memory access on an EVA system diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 0adce604fa44..126a5f3f4e4c 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -191,17 +191,16 @@ static void emulate_load_store_insn(struct pt_regs *regs, * memory, so we need to "switch" the address limit to * user space, so that address check can work properly. */ - seg = get_fs(); - set_fs(USER_DS); + seg = force_uaccess_begin(); switch (insn.spec3_format.func) { case lhe_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadHWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -209,12 +208,12 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case lwe_op: if (!access_ok(addr, 4)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -222,12 +221,12 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case lhue_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } LoadHWUE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } compute_return_epc(regs); @@ -235,35 +234,35 @@ static void emulate_load_store_insn(struct pt_regs *regs, break; case she_op: if (!access_ok(addr, 2)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } compute_return_epc(regs); value = regs->regs[insn.spec3_format.rt]; StoreHWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } break; case swe_op: if (!access_ok(addr, 4)) { - set_fs(seg); + force_uaccess_end(seg); goto sigbus; } compute_return_epc(regs); value = regs->regs[insn.spec3_format.rt]; StoreWE(addr, value, res); if (res) { - set_fs(seg); + force_uaccess_end(seg); goto fault; } break; default: - set_fs(seg); + force_uaccess_end(seg); goto sigill; } - set_fs(seg); + force_uaccess_end(seg); } #endif break; diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 703782355318..d70c4f8e14e2 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1935,7 +1935,7 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, case lwu_op: vcpu->mmio_needed = 1; /* unsigned */ - /* fall through */ + fallthrough; #endif case lw_op: run->mmio.len = 4; diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 3932f767e938..c299e5d6d69c 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -29,7 +29,9 @@ #include <linux/kvm_host.h> #include "interrupt.h" +#ifdef CONFIG_CPU_LOONGSON64 #include "loongson_regs.h" +#endif #include "trace.h" @@ -1142,7 +1144,6 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, #ifdef CONFIG_CPU_LOONGSON64 static enum emulation_result kvm_vz_gpsi_lwc2(union mips_instruction inst, u32 *opc, u32 cause, - struct kvm_run *run, struct kvm_vcpu *vcpu) { unsigned int rs, rd; @@ -1240,7 +1241,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, #endif #ifdef CONFIG_CPU_LOONGSON64 case lwc2_op: - er = kvm_vz_gpsi_lwc2(inst, opc, cause, run, vcpu); + er = kvm_vz_gpsi_lwc2(inst, opc, cause, vcpu); break; #endif case spec3_op: diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 01b168a90434..7c871b14e74a 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -96,6 +96,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -152,12 +154,11 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -168,15 +169,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - tsk->maj_flt++; - } else { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - tsk->min_flt++; - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 3a9219f53ee0..010ba5f1d7dd 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -44,7 +44,7 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size)) diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c index e85bbbadc0e7..e01ad5d17224 100644 --- a/arch/nds32/kernel/process.c +++ b/arch/nds32/kernel/process.c @@ -121,7 +121,7 @@ void show_regs(struct pt_regs *regs) regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]); pr_info(" IRQs o%s Segment %s\n", interrupts_enabled(regs) ? "n" : "ff", - segment_eq(get_fs(), KERNEL_DS)? "kernel" : "user"); + uaccess_kernel() ? "kernel" : "user"); } EXPORT_SYMBOL(show_regs); diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index c8b9061a2ee3..1eb7ded6992b 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -512,7 +512,7 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) { unsigned long inst; int ret = -EFAULT; - mm_segment_t seg = get_fs(); + mm_segment_t seg; inst = get_inst(regs->ipc); @@ -520,13 +520,12 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs) "Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr, regs->ipc, inst); - set_fs(USER_DS); - + seg = force_uaccess_begin(); if (inst & NDS32_16BIT_INSTRUCTION) ret = do_16((inst >> 16) & 0xffff, regs); else ret = do_32(inst, regs); - set_fs(seg); + force_uaccess_end(seg); return ret; } diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 8fb73f6401a0..f02524eb6d56 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -121,6 +121,8 @@ void do_page_fault(unsigned long entry, unsigned long addr, if (unlikely(faulthandler_disabled() || !mm)) goto no_context; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -206,7 +208,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the @@ -228,22 +230,7 @@ good_area: goto bad_area; } - /* - * Major/minor page fault accounting is only done on the initial - * attempt. If we go through a retry, it is extremely likely that the - * page will be found in page cache at that point. - */ - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index e83f831a76f9..a741abbed6fb 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -30,7 +30,7 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(seg) (current_thread_info()->addr_limit = (seg)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define __access_ok(addr, len) \ (((signed long)(((long)get_fs().seg) & \ diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 4112ef0e247e..9476feecf512 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -24,6 +24,7 @@ #include <linux/mm.h> #include <linux/extable.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/mmu_context.h> #include <asm/traps.h> @@ -83,6 +84,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, if (user_mode(regs)) flags |= FAULT_FLAG_USER; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (!mmap_read_trylock(mm)) { if (!user_mode(regs) && !search_exception_tables(regs->ea)) goto bad_area_nosemaphore; @@ -131,7 +134,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -146,16 +149,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h index db02fb2077d9..7d6b4a77b379 100644 --- a/arch/openrisc/include/asm/io.h +++ b/arch/openrisc/include/asm/io.h @@ -14,6 +14,8 @@ #ifndef __ASM_OPENRISC_IO_H #define __ASM_OPENRISC_IO_H +#include <linux/types.h> + /* * PCI: can we really do 0 here if we have no port IO? */ @@ -25,9 +27,12 @@ #define PIO_OFFSET 0 #define PIO_MASK 0 -#include <asm-generic/io.h> - +#define ioremap ioremap void __iomem *ioremap(phys_addr_t offset, unsigned long size); + +#define iounmap iounmap extern void iounmap(void *addr); +#include <asm-generic/io.h> + #endif diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 17c24f14615f..f0390211236b 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -43,21 +43,22 @@ #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) -#define segment_eq(a, b) ((a) == (b)) +#define uaccess_kernel() (get_fs() == KERNEL_DS) /* Ensure that the range from addr to addr+size is all within the process' * address space */ -#define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs()-size)) +static inline int __range_ok(unsigned long addr, unsigned long size) +{ + const mm_segment_t fs = get_fs(); -/* Ensure that addr is below task's addr_limit */ -#define __addr_ok(addr) ((unsigned long) addr < get_fs()) + return size <= fs && addr <= (fs - size); +} #define access_ok(addr, size) \ ({ \ - unsigned long __ao_addr = (unsigned long)(addr); \ - unsigned long __ao_size = (unsigned long)(size); \ - __range_ok(__ao_addr, __ao_size); \ + __chk_user_ptr(addr); \ + __range_ok((unsigned long)(addr), (size)); \ }) /* @@ -100,7 +101,7 @@ extern long __put_user_bad(void); #define __put_user_check(x, ptr, size) \ ({ \ long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (access_ok(__pu_addr, size)) \ __put_user_size((x), __pu_addr, (size), __pu_err); \ __pu_err; \ @@ -173,7 +174,7 @@ struct __large_struct { #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT, __gu_val = 0; \ - const __typeof__(*(ptr)) * __gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ if (access_ok(__gu_addr, size)) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ @@ -241,17 +242,17 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long size) return __copy_tofrom_user(to, (__force const void *)from, size); } static inline unsigned long -raw_copy_to_user(void *to, const void __user *from, unsigned long size) +raw_copy_to_user(void __user *to, const void *from, unsigned long size) { return __copy_tofrom_user((__force void *)to, from, size); } #define INLINE_COPY_FROM_USER #define INLINE_COPY_TO_USER -extern unsigned long __clear_user(void *addr, unsigned long size); +extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline __must_check unsigned long -clear_user(void *addr, unsigned long size) +clear_user(void __user *addr, unsigned long size) { if (likely(access_ok(addr, size))) size = __clear_user(addr, size); diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 8aa438e1f51f..b18e775f8be3 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -292,13 +292,15 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = (unsigned long)_end; #ifdef CONFIG_BLK_DEV_INITRD - initrd_start = (unsigned long)&__initrd_start; - initrd_end = (unsigned long)&__initrd_end; if (initrd_start == initrd_end) { + printk(KERN_INFO "Initial ramdisk not found\n"); initrd_start = 0; initrd_end = 0; + } else { + printk(KERN_INFO "Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *)(initrd_start), initrd_end - initrd_start); + initrd_below_start_ok = 1; } - initrd_below_start_ok = 1; #endif /* setup memblock allocator */ diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 4f0754874d78..97804f21a40c 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -68,7 +68,7 @@ static int restore_sigcontext(struct pt_regs *regs, asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) { - struct rt_sigframe *frame = (struct rt_sigframe __user *)regs->sp; + struct rt_sigframe __user *frame = (struct rt_sigframe __user *)regs->sp; sigset_t set; /* @@ -76,7 +76,7 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) * then frame should be dword aligned here. If it's * not, then the user is trying to mess with us. */ - if (((long)frame) & 3) + if (((unsigned long)frame) & 3) goto badframe; if (!access_ok(frame, sizeof(*frame))) @@ -151,7 +151,7 @@ static inline void __user *get_sigframe(struct ksignal *ksig, static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; unsigned long return_ip; int err = 0; @@ -181,10 +181,10 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, l.ori r11,r0,__NR_sigreturn l.sys 1 */ - err |= __put_user(0xa960, (short *)(frame->retcode + 0)); - err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); - err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4)); - err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8)); + err |= __put_user(0xa960, (short __user *)(frame->retcode + 0)); + err |= __put_user(__NR_rt_sigreturn, (short __user *)(frame->retcode + 2)); + err |= __put_user(0x20000001, (unsigned long __user *)(frame->retcode + 4)); + err |= __put_user(0x15000000, (unsigned long __user *)(frame->retcode + 8)); if (err) return -EFAULT; diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index bd1e660bbc89..29c82ef2e207 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -219,30 +219,99 @@ static inline void ipi_flush_tlb_all(void *ignored) local_flush_tlb_all(); } +static inline void ipi_flush_tlb_mm(void *info) +{ + struct mm_struct *mm = (struct mm_struct *)info; + + local_flush_tlb_mm(mm); +} + +static void smp_flush_tlb_mm(struct cpumask *cmask, struct mm_struct *mm) +{ + unsigned int cpuid; + + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { + /* local cpu is the only cpu present in cpumask */ + local_flush_tlb_mm(mm); + } else { + on_each_cpu_mask(cmask, ipi_flush_tlb_mm, mm, 1); + } + put_cpu(); +} + +struct flush_tlb_data { + unsigned long addr1; + unsigned long addr2; +}; + +static inline void ipi_flush_tlb_page(void *info) +{ + struct flush_tlb_data *fd = (struct flush_tlb_data *)info; + + local_flush_tlb_page(NULL, fd->addr1); +} + +static inline void ipi_flush_tlb_range(void *info) +{ + struct flush_tlb_data *fd = (struct flush_tlb_data *)info; + + local_flush_tlb_range(NULL, fd->addr1, fd->addr2); +} + +static void smp_flush_tlb_range(struct cpumask *cmask, unsigned long start, + unsigned long end) +{ + unsigned int cpuid; + + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { + /* local cpu is the only cpu present in cpumask */ + if ((end - start) <= PAGE_SIZE) + local_flush_tlb_page(NULL, start); + else + local_flush_tlb_range(NULL, start, end); + } else { + struct flush_tlb_data fd; + + fd.addr1 = start; + fd.addr2 = end; + + if ((end - start) <= PAGE_SIZE) + on_each_cpu_mask(cmask, ipi_flush_tlb_page, &fd, 1); + else + on_each_cpu_mask(cmask, ipi_flush_tlb_range, &fd, 1); + } + put_cpu(); +} + void flush_tlb_all(void) { on_each_cpu(ipi_flush_tlb_all, NULL, 1); } -/* - * FIXME: implement proper functionality instead of flush_tlb_all. - * *But*, as things currently stands, the local_tlb_flush_* functions will - * all boil down to local_tlb_flush_all anyway. - */ void flush_tlb_mm(struct mm_struct *mm) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_mm(mm_cpumask(mm), mm); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_range(mm_cpumask(vma->vm_mm), uaddr, uaddr + PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - on_each_cpu(ipi_flush_tlb_all, NULL, 1); + smp_flush_tlb_range(mm_cpumask(vma->vm_mm), start, end); } /* Instruction cache invalidate - performed on each cpu */ diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c index 43f140a28bc7..54d38809e22c 100644 --- a/arch/openrisc/kernel/stacktrace.c +++ b/arch/openrisc/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> #include <linux/stacktrace.h> #include <asm/processor.h> @@ -68,12 +69,25 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { unsigned long *sp = NULL; + if (!try_get_task_stack(tsk)) + return; + if (tsk == current) sp = (unsigned long *) &sp; - else - sp = (unsigned long *) KSTK_ESP(tsk); + else { + unsigned long ksp; + + /* Locate stack from kernel context */ + ksp = task_thread_info(tsk)->ksp; + ksp += STACK_FRAME_OVERHEAD; /* redzone */ + ksp += sizeof(struct pt_regs); + + sp = (unsigned long *) ksp; + } unwind_stack(trace, sp, save_stack_address_nosched); + + put_task_stack(tsk); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index 60449fd7f16f..22fbc5fb24b3 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -96,18 +96,6 @@ SECTIONS __init_end = .; - . = ALIGN(PAGE_SIZE); - .initrd : AT(ADDR(.initrd) - LOAD_OFFSET) - { - __initrd_start = .; - *(.initrd) - __initrd_end = .; - FILL (0); - . = ALIGN (PAGE_SIZE); - } - - __vmlinux_end = .; /* last address of the physical file */ - BSS_SECTION(0, 0, 0x20) _end = .; diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index d2224ccca294..ca97d9baab51 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -15,6 +15,7 @@ #include <linux/interrupt.h> #include <linux/extable.h> #include <linux/sched/signal.h> +#include <linux/perf_event.h> #include <linux/uaccess.h> #include <asm/siginfo.h> @@ -103,6 +104,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, if (in_interrupt() || !mm) goto no_context; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -159,7 +162,7 @@ good_area: * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -176,10 +179,6 @@ good_area: if (flags & FAULT_FLAG_ALLOW_RETRY) { /*RGD modeled on Cris */ - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/openrisc/mm/tlb.c b/arch/openrisc/mm/tlb.c index 4b680aed8f5f..2b6feabf6381 100644 --- a/arch/openrisc/mm/tlb.c +++ b/arch/openrisc/mm/tlb.c @@ -137,21 +137,28 @@ void local_flush_tlb_mm(struct mm_struct *mm) void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *next_tsk) { + unsigned int cpu; + + if (unlikely(prev == next)) + return; + + cpu = smp_processor_id(); + + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + /* remember the pgd for the fault handlers * this is similar to the pgd register in some other CPU's. * we need our own copy of it because current and active_mm * might be invalid at points where we still need to derefer * the pgd. */ - current_pgd[smp_processor_id()] = next->pgd; + current_pgd[cpu] = next->pgd; /* We don't have context support implemented, so flush all * entries belonging to previous map */ - - if (prev != next) - local_flush_tlb_mm(prev); - + local_flush_tlb_mm(prev); } /* diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 03862320f779..21b375c67e53 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -34,13 +34,13 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; /* Can't use raw_spin_lock_irq because of #include problems, so * this is the substitute */ #define _atomic_spin_lock_irqsave(l,f) do { \ - arch_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ local_irq_save(f); \ arch_spin_lock(s); \ } while(0) #define _atomic_spin_unlock_irqrestore(l,f) do { \ - arch_spinlock_t *s = ATOMIC_HASH(l); \ + arch_spinlock_t *s = ATOMIC_HASH(l); \ arch_spin_unlock(s); \ local_irq_restore(f); \ } while(0) @@ -85,7 +85,7 @@ static __inline__ void atomic_##op(int i, atomic_t *v) \ _atomic_spin_lock_irqsave(v, flags); \ v->counter c_op i; \ _atomic_spin_unlock_irqrestore(v, flags); \ -} \ +} #define ATOMIC_OP_RETURN(op, c_op) \ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ @@ -148,7 +148,7 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t *v) \ _atomic_spin_lock_irqsave(v, flags); \ v->counter c_op i; \ _atomic_spin_unlock_irqrestore(v, flags); \ -} \ +} #define ATOMIC64_OP_RETURN(op, c_op) \ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \ diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h index dbaaca84f27f..640d46edf32e 100644 --- a/arch/parisc/include/asm/barrier.h +++ b/arch/parisc/include/asm/barrier.h @@ -26,6 +26,67 @@ #define __smp_rmb() mb() #define __smp_wmb() mb() +#define __smp_store_release(p, v) \ +do { \ + typeof(p) __p = (p); \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("stb,ma %0,0(%1)" \ + : : "r"(*(__u8 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("sth,ma %0,0(%1)" \ + : : "r"(*(__u16 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("stw,ma %0,0(%1)" \ + : : "r"(*(__u32 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("std,ma %0,0(%1)" \ + : : "r"(*(__u64 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + } \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + typeof(p) __p = (p); \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("ldb,ma 0(%1),%0" \ + : "=r"(*(__u8 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("ldh,ma 0(%1),%0" \ + : "=r"(*(__u16 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("ldw,ma 0(%1),%0" \ + : "=r"(*(__u32 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("ldd,ma 0(%1),%0" \ + : "=r"(*(__u64 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + } \ + __u.__val; \ +}) #include <asm-generic/barrier.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ebbb9ffe038c..ed2cd4fb479b 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -14,7 +14,7 @@ #define KERNEL_DS ((mm_segment_t){0}) #define USER_DS ((mm_segment_t){1}) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 1df0f67ed667..4bab21c71055 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -64,7 +64,8 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, function_trace_op, regs); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub || + if (dereference_function_descriptor(ftrace_graph_return) != + dereference_function_descriptor(ftrace_stub) || ftrace_graph_entry != ftrace_graph_entry_stub) { unsigned long *parent_rp; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 66ac0719bd49..4bfe2da9fbe3 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -18,6 +18,7 @@ #include <linux/extable.h> #include <linux/uaccess.h> #include <linux/hugetlb.h> +#include <linux/perf_event.h> #include <asm/traps.h> @@ -281,6 +282,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, acc_type = parisc_acctyp(code, regs->iir); if (acc_type & VM_WRITE) flags |= FAULT_FLAG_WRITE; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); retry: mmap_read_lock(mm); vma = find_vma_prev(mm, address, &prev_vma); @@ -302,7 +304,7 @@ good_area: * fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -323,10 +325,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { /* * No need to mmap_read_unlock(mm) as we would diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h index 9cb7d8be2366..0a6319448cb6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h +++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h @@ -23,6 +23,10 @@ int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn); unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm); void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, struct kvm *kvm, bool skip_page_out); +int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *new); +void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *old); #else static inline int kvmppc_uvmem_init(void) { @@ -82,5 +86,15 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn) static inline void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, struct kvm *kvm, bool skip_page_out) { } + +static inline int kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *new) +{ + return H_UNSUPPORTED; +} + +static inline void kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *old) { } + #endif /* CONFIG_PPC_UV */ #endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ccf66b3a4c1d..0a056c64c317 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -59,7 +59,7 @@ enum xlate_readwrite { }; extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu); -extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu); extern void kvmppc_handler_highmem(void); extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 41419f1fc00f..88fb88491fe9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -474,7 +474,8 @@ #ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif -#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ +#define LPID_RSVD_POWER7 0x3ff /* Reserved LPID for partn switching */ +#define LPID_RSVD 0xfff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ @@ -1362,6 +1363,7 @@ #define PVR_ARCH_206p 0x0f100003 #define PVR_ARCH_207 0x0f000004 #define PVR_ARCH_300 0x0f000005 +#define PVR_ARCH_31 0x0f000006 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 64c04ab09112..00699903f1ef 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -38,8 +38,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) #ifdef __powerpc64__ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7c5a1812a1c3..38ea396a23d6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void) if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) return -EINVAL; - /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ host_lpid = 0; if (cpu_has_feature(CPU_FTR_HVMODE)) host_lpid = mfspr(SPRN_LPID); - rsvd_lpid = LPID_RSVD; + + /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + rsvd_lpid = LPID_RSVD; + else + rsvd_lpid = LPID_RSVD_POWER7; kvmppc_init_lpid(rsvd_lpid + 1); diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 777aa5625d5f..22a677b18695 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -161,7 +161,9 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, return -EINVAL; /* Read the entry from guest memory */ addr = base + (index * sizeof(rpte)); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte)); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (ret) { if (pte_ret_p) *pte_ret_p = addr; @@ -237,7 +239,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, /* Read the table to find the root of the radix tree */ ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry)); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry)); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (ret) return ret; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0f83f39a2bd2..4ba06a2a306c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -343,13 +343,18 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } +/* Dummy value used in computing PCR value below */ +#define PCR_ARCH_31 (PCR_ARCH_300 << 1) + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; /* We can (emulate) our own architecture version and anything older */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + host_pcr_bit = PCR_ARCH_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) host_pcr_bit = PCR_ARCH_300; else if (cpu_has_feature(CPU_FTR_ARCH_207S)) host_pcr_bit = PCR_ARCH_207; @@ -375,6 +380,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) case PVR_ARCH_300: guest_pcr_bit = PCR_ARCH_300; break; + case PVR_ARCH_31: + guest_pcr_bit = PCR_ARCH_31; + break; default: return -EINVAL; } @@ -2355,7 +2363,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) * to trap and then we emulate them. */ vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB | - HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; + HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) @@ -4552,16 +4560,14 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, switch (change) { case KVM_MR_CREATE: - if (kvmppc_uvmem_slot_init(kvm, new)) - return; - uv_register_mem_slot(kvm->arch.lpid, - new->base_gfn << PAGE_SHIFT, - new->npages * PAGE_SIZE, - 0, new->id); + /* + * @TODO kvmppc_uvmem_memslot_create() can fail and + * return error. Fix this. + */ + kvmppc_uvmem_memslot_create(kvm, new); break; case KVM_MR_DELETE: - uv_unregister_mem_slot(kvm->arch.lpid, old->id); - kvmppc_uvmem_slot_free(kvm, old); + kvmppc_uvmem_memslot_delete(kvm, old); break; default: /* TODO: Handle KVM_MR_MOVE */ diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 2c849a65db77..6822d23a2da4 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -233,20 +233,21 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); + regs_ptr = kvmppc_get_gpr(vcpu, 5); + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)); + sizeof(struct hv_guest_state)) || + kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, + sizeof(struct pt_regs)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_PARAMETER; + if (kvmppc_need_byteswap(vcpu)) byteswap_hv_regs(&l2_hv); if (l2_hv.version != HV_GUEST_STATE_VERSION) return H_P2; - regs_ptr = kvmppc_get_gpr(vcpu, 5); - err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, - sizeof(struct pt_regs)); - if (err) - return H_PARAMETER; if (kvmppc_need_byteswap(vcpu)) byteswap_pt_regs(&l2_regs); if (l2_hv.vcpu_token >= NR_CPUS) @@ -323,12 +324,12 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) byteswap_hv_regs(&l2_hv); byteswap_pt_regs(&l2_regs); } + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, - sizeof(struct hv_guest_state)); - if (err) - return H_AUTHORITY; - err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, + sizeof(struct hv_guest_state)) || + kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, sizeof(struct pt_regs)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (err) return H_AUTHORITY; @@ -508,12 +509,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) goto not_found; /* Write what was loaded into our buffer back to the L1 guest */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto not_found; } else { /* Load the data to be stored from the L1 guest into our buf */ + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto not_found; @@ -548,9 +553,12 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) ret = -EFAULT; ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); - if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) + if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { + int srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl_addr, &ptbl_entry, sizeof(ptbl_entry)); + srcu_read_unlock(&kvm->srcu, srcu_idx); + } if (ret) { gp->l1_gr_to_hr = 0; gp->process_table = 0; diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 6850bd04bcb9..7705d5557239 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -93,12 +93,133 @@ #include <asm/ultravisor.h> #include <asm/mman.h> #include <asm/kvm_ppc.h> +#include <asm/kvm_book3s_uvmem.h> static struct dev_pagemap kvmppc_uvmem_pgmap; static unsigned long *kvmppc_uvmem_bitmap; static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock); -#define KVMPPC_UVMEM_PFN (1UL << 63) +/* + * States of a GFN + * --------------- + * The GFN can be in one of the following states. + * + * (a) Secure - The GFN is secure. The GFN is associated with + * a Secure VM, the contents of the GFN is not accessible + * to the Hypervisor. This GFN can be backed by a secure-PFN, + * or can be backed by a normal-PFN with contents encrypted. + * The former is true when the GFN is paged-in into the + * ultravisor. The latter is true when the GFN is paged-out + * of the ultravisor. + * + * (b) Shared - The GFN is shared. The GFN is associated with a + * a secure VM. The contents of the GFN is accessible to + * Hypervisor. This GFN is backed by a normal-PFN and its + * content is un-encrypted. + * + * (c) Normal - The GFN is a normal. The GFN is associated with + * a normal VM. The contents of the GFN is accesible to + * the Hypervisor. Its content is never encrypted. + * + * States of a VM. + * --------------- + * + * Normal VM: A VM whose contents are always accessible to + * the hypervisor. All its GFNs are normal-GFNs. + * + * Secure VM: A VM whose contents are not accessible to the + * hypervisor without the VM's consent. Its GFNs are + * either Shared-GFN or Secure-GFNs. + * + * Transient VM: A Normal VM that is transitioning to secure VM. + * The transition starts on successful return of + * H_SVM_INIT_START, and ends on successful return + * of H_SVM_INIT_DONE. This transient VM, can have GFNs + * in any of the three states; i.e Secure-GFN, Shared-GFN, + * and Normal-GFN. The VM never executes in this state + * in supervisor-mode. + * + * Memory slot State. + * ----------------------------- + * The state of a memory slot mirrors the state of the + * VM the memory slot is associated with. + * + * VM State transition. + * -------------------- + * + * A VM always starts in Normal Mode. + * + * H_SVM_INIT_START moves the VM into transient state. During this + * time the Ultravisor may request some of its GFNs to be shared or + * secured. So its GFNs can be in one of the three GFN states. + * + * H_SVM_INIT_DONE moves the VM entirely from transient state to + * secure-state. At this point any left-over normal-GFNs are + * transitioned to Secure-GFN. + * + * H_SVM_INIT_ABORT moves the transient VM back to normal VM. + * All its GFNs are moved to Normal-GFNs. + * + * UV_TERMINATE transitions the secure-VM back to normal-VM. All + * the secure-GFN and shared-GFNs are tranistioned to normal-GFN + * Note: The contents of the normal-GFN is undefined at this point. + * + * GFN state implementation: + * ------------------------- + * + * Secure GFN is associated with a secure-PFN; also called uvmem_pfn, + * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag + * set, and contains the value of the secure-PFN. + * It is associated with a normal-PFN; also called mem_pfn, when + * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set. + * The value of the normal-PFN is not tracked. + * + * Shared GFN is associated with a normal-PFN. Its pfn[] has + * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN + * is not tracked. + * + * Normal GFN is associated with normal-PFN. Its pfn[] has + * no flag set. The value of the normal-PFN is not tracked. + * + * Life cycle of a GFN + * -------------------- + * + * -------------------------------------------------------------- + * | | Share | Unshare | SVM |H_SVM_INIT_DONE| + * | |operation |operation | abort/ | | + * | | | | terminate | | + * ------------------------------------------------------------- + * | | | | | | + * | Secure | Shared | Secure |Normal |Secure | + * | | | | | | + * | Shared | Shared | Secure |Normal |Shared | + * | | | | | | + * | Normal | Shared | Secure |Normal |Secure | + * -------------------------------------------------------------- + * + * Life cycle of a VM + * -------------------- + * + * -------------------------------------------------------------------- + * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ | + * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE | + * | | | | | | | + * --------- ---------------------------------------------------------- + * | | | | | | | + * | Normal | Normal | Transient|Error |Error |Normal | + * | | | | | | | + * | Secure | Error | Error |Error |Error |Normal | + * | | | | | | | + * |Transient| N/A | Error |Secure |Normal |Normal | + * -------------------------------------------------------------------- + */ + +#define KVMPPC_GFN_UVMEM_PFN (1UL << 63) +#define KVMPPC_GFN_MEM_PFN (1UL << 62) +#define KVMPPC_GFN_SHARED (1UL << 61) +#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN) +#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED) +#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK) struct kvmppc_uvmem_slot { struct list_head list; @@ -106,11 +227,11 @@ struct kvmppc_uvmem_slot { unsigned long base_pfn; unsigned long *pfns; }; - struct kvmppc_uvmem_page_pvt { struct kvm *kvm; unsigned long gpa; bool skip_page_out; + bool remove_gfn; }; bool kvmppc_uvmem_available(void) @@ -163,8 +284,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) mutex_unlock(&kvm->arch.uvmem_lock); } -static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, - struct kvm *kvm) +static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm, + unsigned long flag, unsigned long uvmem_pfn) { struct kvmppc_uvmem_slot *p; @@ -172,24 +293,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn, if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { unsigned long index = gfn - p->base_pfn; - p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN; + if (flag == KVMPPC_GFN_UVMEM_PFN) + p->pfns[index] = uvmem_pfn | flag; + else + p->pfns[index] = flag; return; } } } -static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm) +/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */ +static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn, + unsigned long uvmem_pfn, struct kvm *kvm) { - struct kvmppc_uvmem_slot *p; + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn); +} - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) { - if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { - p->pfns[gfn - p->base_pfn] = 0; - return; - } - } +/* mark the GFN as secure-GFN associated with a memory-PFN. */ +static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0); +} + +/* mark the GFN as a shared GFN. */ +static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0); +} + +/* mark the GFN as a non-existent GFN. */ +static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm) +{ + kvmppc_mark_gfn(gfn, kvm, 0, 0); } +/* return true, if the GFN is a secure-GFN backed by a secure-PFN */ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, unsigned long *uvmem_pfn) { @@ -199,10 +337,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) { unsigned long index = gfn - p->base_pfn; - if (p->pfns[index] & KVMPPC_UVMEM_PFN) { + if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) { if (uvmem_pfn) *uvmem_pfn = p->pfns[index] & - ~KVMPPC_UVMEM_PFN; + KVMPPC_GFN_PFN_MASK; return true; } else return false; @@ -211,10 +349,114 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, return false; } +/* + * starting from *gfn search for the next available GFN that is not yet + * transitioned to a secure GFN. return the value of that GFN in *gfn. If a + * GFN is found, return true, else return false + * + * Must be called with kvm->arch.uvmem_lock held. + */ +static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, + struct kvm *kvm, unsigned long *gfn) +{ + struct kvmppc_uvmem_slot *p; + bool ret = false; + unsigned long i; + + list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) + if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) + break; + if (!p) + return ret; + /* + * The code below assumes, one to one correspondence between + * kvmppc_uvmem_slot and memslot. + */ + for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) { + unsigned long index = i - p->base_pfn; + + if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) { + *gfn = i; + ret = true; + break; + } + } + return ret; +} + +static int kvmppc_memslot_page_merge(struct kvm *kvm, + const struct kvm_memory_slot *memslot, bool merge) +{ + unsigned long gfn = memslot->base_gfn; + unsigned long end, start = gfn_to_hva(kvm, gfn); + int ret = 0; + struct vm_area_struct *vma; + int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE; + + if (kvm_is_error_hva(start)) + return H_STATE; + + end = start + (memslot->npages << PAGE_SHIFT); + + mmap_write_lock(kvm->mm); + do { + vma = find_vma_intersection(kvm->mm, start, end); + if (!vma) { + ret = H_STATE; + break; + } + ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, + merge_flag, &vma->vm_flags); + if (ret) { + ret = H_STATE; + break; + } + start = vma->vm_end; + } while (end > vma->vm_end); + + mmap_write_unlock(kvm->mm); + return ret; +} + +static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + uv_unregister_mem_slot(kvm->arch.lpid, memslot->id); + kvmppc_uvmem_slot_free(kvm, memslot); + kvmppc_memslot_page_merge(kvm, memslot, true); +} + +static int __kvmppc_uvmem_memslot_create(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + int ret = H_PARAMETER; + + if (kvmppc_memslot_page_merge(kvm, memslot, false)) + return ret; + + if (kvmppc_uvmem_slot_init(kvm, memslot)) + goto out1; + + ret = uv_register_mem_slot(kvm->arch.lpid, + memslot->base_gfn << PAGE_SHIFT, + memslot->npages * PAGE_SIZE, + 0, memslot->id); + if (ret < 0) { + ret = H_PARAMETER; + goto out; + } + return 0; +out: + kvmppc_uvmem_slot_free(kvm, memslot); +out1: + kvmppc_memslot_page_merge(kvm, memslot, true); + return ret; +} + unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) { struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; + struct kvm_memory_slot *memslot, *m; int ret = H_SUCCESS; int srcu_idx; @@ -232,35 +474,117 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) return H_AUTHORITY; srcu_idx = srcu_read_lock(&kvm->srcu); + + /* register the memslot */ slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { - if (kvmppc_uvmem_slot_init(kvm, memslot)) { - ret = H_PARAMETER; - goto out; - } - ret = uv_register_mem_slot(kvm->arch.lpid, - memslot->base_gfn << PAGE_SHIFT, - memslot->npages * PAGE_SIZE, - 0, memslot->id); - if (ret < 0) { - kvmppc_uvmem_slot_free(kvm, memslot); - ret = H_PARAMETER; - goto out; + ret = __kvmppc_uvmem_memslot_create(kvm, memslot); + if (ret) + break; + } + + if (ret) { + slots = kvm_memslots(kvm); + kvm_for_each_memslot(m, slots) { + if (m == memslot) + break; + __kvmppc_uvmem_memslot_delete(kvm, memslot); } } -out: + srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } -unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) +/* + * Provision a new page on HV side and copy over the contents + * from secure memory using UV_PAGE_OUT uvcall. + * Caller must held kvm->arch.uvmem_lock. + */ +static int __kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) { - if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) - return H_UNSUPPORTED; + unsigned long src_pfn, dst_pfn = 0; + struct migrate_vma mig; + struct page *dpage, *spage; + struct kvmppc_uvmem_page_pvt *pvt; + unsigned long pfn; + int ret = U_SUCCESS; - kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; - pr_info("LPID %d went secure\n", kvm->arch.lpid); - return H_SUCCESS; + memset(&mig, 0, sizeof(mig)); + mig.vma = vma; + mig.start = start; + mig.end = end; + mig.src = &src_pfn; + mig.dst = &dst_pfn; + mig.pgmap_owner = &kvmppc_uvmem_pgmap; + mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + + /* The requested page is already paged-out, nothing to do */ + if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) + return ret; + + ret = migrate_vma_setup(&mig); + if (ret) + return -1; + + spage = migrate_pfn_to_page(*mig.src); + if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) + goto out_finalize; + + if (!is_zone_device_page(spage)) + goto out_finalize; + + dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); + if (!dpage) { + ret = -1; + goto out_finalize; + } + + lock_page(dpage); + pvt = spage->zone_device_data; + pfn = page_to_pfn(dpage); + + /* + * This function is used in two cases: + * - When HV touches a secure page, for which we do UV_PAGE_OUT + * - When a secure page is converted to shared page, we *get* + * the page to essentially unmap the device page. In this + * case we skip page-out. + */ + if (!pvt->skip_page_out) + ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, + gpa, 0, page_shift); + + if (ret == U_SUCCESS) + *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; + else { + unlock_page(dpage); + __free_page(dpage); + goto out_finalize; + } + + migrate_vma_pages(&mig); + +out_finalize: + migrate_vma_finalize(&mig); + return ret; +} + +static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long page_shift, + struct kvm *kvm, unsigned long gpa) +{ + int ret; + + mutex_lock(&kvm->arch.uvmem_lock); + ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa); + mutex_unlock(&kvm->arch.uvmem_lock); + + return ret; } /* @@ -271,33 +595,53 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) * fault on them, do fault time migration to replace the device PTEs in * QEMU page table with normal PTEs from newly allocated pages. */ -void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free, +void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, struct kvm *kvm, bool skip_page_out) { int i; struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn, uvmem_pfn; - unsigned long gfn = free->base_gfn; + struct page *uvmem_page; + struct vm_area_struct *vma = NULL; + unsigned long uvmem_pfn, gfn; + unsigned long addr; - for (i = free->npages; i; --i, ++gfn) { - struct page *uvmem_page; + mmap_read_lock(kvm->mm); + + addr = slot->userspace_addr; + + gfn = slot->base_gfn; + for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) { + + /* Fetch the VMA if addr is not in the latest fetched one */ + if (!vma || addr >= vma->vm_end) { + vma = find_vma_intersection(kvm->mm, addr, addr+1); + if (!vma) { + pr_err("Can't find VMA for gfn:0x%lx\n", gfn); + break; + } + } mutex_lock(&kvm->arch.uvmem_lock); - if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { - mutex_unlock(&kvm->arch.uvmem_lock); - continue; + + if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) { + uvmem_page = pfn_to_page(uvmem_pfn); + pvt = uvmem_page->zone_device_data; + pvt->skip_page_out = skip_page_out; + pvt->remove_gfn = true; + + if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, + PAGE_SHIFT, kvm, pvt->gpa)) + pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", + pvt->gpa, addr); + } else { + /* Remove the shared flag if any */ + kvmppc_gfn_remove(gfn, kvm); } - uvmem_page = pfn_to_page(uvmem_pfn); - pvt = uvmem_page->zone_device_data; - pvt->skip_page_out = skip_page_out; mutex_unlock(&kvm->arch.uvmem_lock); - - pfn = gfn_to_pfn(kvm, gfn); - if (is_error_noslot_pfn(pfn)) - continue; - kvm_release_pfn_clean(pfn); } + + mmap_read_unlock(kvm->mm); } unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm) @@ -360,7 +704,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm) goto out_clear; uvmem_pfn = bit + pfn_first; - kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); + kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm); pvt->gpa = gpa; pvt->kvm = kvm; @@ -379,13 +723,14 @@ out: } /* - * Alloc a PFN from private device memory pool and copy page from normal - * memory to secure memory using UV_PAGE_IN uvcall. + * Alloc a PFN from private device memory pool. If @pagein is true, + * copy page from normal memory to secure memory using UV_PAGE_IN uvcall. */ -static int -kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long gpa, struct kvm *kvm, - unsigned long page_shift, bool *downgrade) +static int kvmppc_svm_page_in(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, unsigned long gpa, struct kvm *kvm, + unsigned long page_shift, + bool pagein) { unsigned long src_pfn, dst_pfn = 0; struct migrate_vma mig; @@ -402,18 +747,6 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, mig.dst = &dst_pfn; mig.flags = MIGRATE_VMA_SELECT_SYSTEM; - /* - * We come here with mmap_lock write lock held just for - * ksm_madvise(), otherwise we only need read mmap_lock. - * Hence downgrade to read lock once ksm_madvise() is done. - */ - ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags); - mmap_write_downgrade(kvm->mm); - *downgrade = true; - if (ret) - return ret; - ret = migrate_vma_setup(&mig); if (ret) return ret; @@ -429,11 +762,16 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start, goto out_finalize; } - pfn = *mig.src >> MIGRATE_PFN_SHIFT; - spage = migrate_pfn_to_page(*mig.src); - if (spage) - uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, - page_shift); + if (pagein) { + pfn = *mig.src >> MIGRATE_PFN_SHIFT; + spage = migrate_pfn_to_page(*mig.src); + if (spage) { + ret = uv_page_in(kvm->arch.lpid, pfn << page_shift, + gpa, 0, page_shift); + if (ret) + goto out_finalize; + } + } *mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; migrate_vma_pages(&mig); @@ -442,6 +780,80 @@ out_finalize: return ret; } +static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + unsigned long gfn = memslot->base_gfn; + struct vm_area_struct *vma; + unsigned long start, end; + int ret = 0; + + mmap_read_lock(kvm->mm); + mutex_lock(&kvm->arch.uvmem_lock); + while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) { + ret = H_STATE; + start = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(start)) + break; + + end = start + (1UL << PAGE_SHIFT); + vma = find_vma_intersection(kvm->mm, start, end); + if (!vma || vma->vm_start > start || vma->vm_end < end) + break; + + ret = kvmppc_svm_page_in(vma, start, end, + (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false); + if (ret) { + ret = H_STATE; + break; + } + + /* relinquish the cpu if needed */ + cond_resched(); + } + mutex_unlock(&kvm->arch.uvmem_lock); + mmap_read_unlock(kvm->mm); + return ret; +} + +unsigned long kvmppc_h_svm_init_done(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int srcu_idx; + long ret = H_SUCCESS; + + if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)) + return H_UNSUPPORTED; + + /* migrate any unmoved normal pfn to device pfns*/ + srcu_idx = srcu_read_lock(&kvm->srcu); + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) { + ret = kvmppc_uv_migrate_mem_slot(kvm, memslot); + if (ret) { + /* + * The pages will remain transitioned. + * Its the callers responsibility to + * terminate the VM, which will undo + * all state of the VM. Till then + * this VM is in a erroneous state. + * Its KVMPPC_SECURE_INIT_DONE will + * remain unset. + */ + ret = H_STATE; + goto out; + } + } + + kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE; + pr_info("LPID %d went secure\n", kvm->arch.lpid); + +out: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return ret; +} + /* * Shares the page with HV, thus making it a normal page. * @@ -451,8 +863,8 @@ out_finalize: * In the former case, uses dev_pagemap_ops.migrate_to_ram handler * to unmap the device page from QEMU's page tables. */ -static unsigned long -kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) +static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa, + unsigned long page_shift) { int ret = H_PARAMETER; @@ -469,6 +881,11 @@ kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift) uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; + /* + * do not drop the GFN. It is a valid GFN + * that is transitioned to a shared GFN. + */ + pvt->remove_gfn = false; } retry: @@ -482,12 +899,16 @@ retry: uvmem_page = pfn_to_page(uvmem_pfn); pvt = uvmem_page->zone_device_data; pvt->skip_page_out = true; + pvt->remove_gfn = false; /* it continues to be a valid GFN */ kvm_release_pfn_clean(pfn); goto retry; } - if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift)) + if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, + page_shift)) { + kvmppc_gfn_shared(gfn, kvm); ret = H_SUCCESS; + } kvm_release_pfn_clean(pfn); mutex_unlock(&kvm->arch.uvmem_lock); out: @@ -501,11 +922,10 @@ out: * H_PAGE_IN_SHARED flag makes the page shared which means that the same * memory in is visible from both UV and HV. */ -unsigned long -kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, - unsigned long flags, unsigned long page_shift) +unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, + unsigned long flags, + unsigned long page_shift) { - bool downgrade = false; unsigned long start, end; struct vm_area_struct *vma; int srcu_idx; @@ -526,7 +946,7 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, ret = H_PARAMETER; srcu_idx = srcu_read_lock(&kvm->srcu); - mmap_write_lock(kvm->mm); + mmap_read_lock(kvm->mm); start = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(start)) @@ -542,97 +962,20 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa, if (!vma || vma->vm_start > start || vma->vm_end < end) goto out_unlock; - if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, - &downgrade)) - ret = H_SUCCESS; + if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift, + true)) + goto out_unlock; + + ret = H_SUCCESS; + out_unlock: mutex_unlock(&kvm->arch.uvmem_lock); out: - if (downgrade) - mmap_read_unlock(kvm->mm); - else - mmap_write_unlock(kvm->mm); + mmap_read_unlock(kvm->mm); srcu_read_unlock(&kvm->srcu, srcu_idx); return ret; } -/* - * Provision a new page on HV side and copy over the contents - * from secure memory using UV_PAGE_OUT uvcall. - */ -static int -kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, - unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) -{ - unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; - struct page *dpage, *spage; - struct kvmppc_uvmem_page_pvt *pvt; - unsigned long pfn; - int ret = U_SUCCESS; - - memset(&mig, 0, sizeof(mig)); - mig.vma = vma; - mig.start = start; - mig.end = end; - mig.src = &src_pfn; - mig.dst = &dst_pfn; - mig.pgmap_owner = &kvmppc_uvmem_pgmap; - mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; - - mutex_lock(&kvm->arch.uvmem_lock); - /* The requested page is already paged-out, nothing to do */ - if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) - goto out; - - ret = migrate_vma_setup(&mig); - if (ret) - goto out; - - spage = migrate_pfn_to_page(*mig.src); - if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) - goto out_finalize; - - if (!is_zone_device_page(spage)) - goto out_finalize; - - dpage = alloc_page_vma(GFP_HIGHUSER, vma, start); - if (!dpage) { - ret = -1; - goto out_finalize; - } - - lock_page(dpage); - pvt = spage->zone_device_data; - pfn = page_to_pfn(dpage); - - /* - * This function is used in two cases: - * - When HV touches a secure page, for which we do UV_PAGE_OUT - * - When a secure page is converted to shared page, we *get* - * the page to essentially unmap the device page. In this - * case we skip page-out. - */ - if (!pvt->skip_page_out) - ret = uv_page_out(kvm->arch.lpid, pfn << page_shift, - gpa, 0, page_shift); - - if (ret == U_SUCCESS) - *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED; - else { - unlock_page(dpage); - __free_page(dpage); - goto out_finalize; - } - - migrate_vma_pages(&mig); -out_finalize: - migrate_vma_finalize(&mig); -out: - mutex_unlock(&kvm->arch.uvmem_lock); - return ret; -} /* * Fault handler callback that gets called when HV touches any page that @@ -657,7 +1000,8 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) /* * Release the device PFN back to the pool * - * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT. + * Gets called when secure GFN tranistions from a secure-PFN + * to a normal PFN during H_SVM_PAGE_OUT. * Gets called with kvm->arch.uvmem_lock held. */ static void kvmppc_uvmem_page_free(struct page *page) @@ -672,7 +1016,10 @@ static void kvmppc_uvmem_page_free(struct page *page) pvt = page->zone_device_data; page->zone_device_data = NULL; - kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); + if (pvt->remove_gfn) + kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm); + else + kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm); kfree(pvt); } @@ -744,6 +1091,21 @@ out: return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT; } +int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new) +{ + int ret = __kvmppc_uvmem_memslot_create(kvm, new); + + if (!ret) + ret = kvmppc_uv_migrate_mem_slot(kvm, new); + + return ret; +} + +void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old) +{ + __kvmppc_uvmem_memslot_delete(kvm, old); +} + static u64 kvmppc_get_secmem_size(void) { struct device_node *np; diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 607a9b99c334..25a3679fb590 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -55,8 +55,7 @@ ****************************************************************************/ /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) @@ -68,8 +67,8 @@ kvm_start_entry: /* Save host state to the stack */ PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) - /* Save r3 (kvm_run) and r4 (vcpu) */ - SAVE_2GPRS(3, r1) + /* Save r3 (vcpu) */ + SAVE_GPR(3, r1) /* Save non-volatile registers (r14 - r31) */ SAVE_NVGPRS(r1) @@ -82,47 +81,46 @@ kvm_start_entry: PPC_STL r0, _LINK(r1) /* Load non-volatile guest state from the vcpu */ - VCPU_LOAD_NVGPRS(r4) + VCPU_LOAD_NVGPRS(r3) kvm_start_lightweight: /* Copy registers into shadow vcpu so we can access them in real mode */ - mr r3, r4 bl FUNC(kvmppc_copy_to_svcpu) nop - REST_GPR(4, r1) + REST_GPR(3, r1) #ifdef CONFIG_PPC_BOOK3S_64 /* Get the dcbz32 flag */ - PPC_LL r3, VCPU_HFLAGS(r4) - rldicl r3, r3, 0, 63 /* r3 &= 1 */ - stb r3, HSTATE_RESTORE_HID5(r13) + PPC_LL r0, VCPU_HFLAGS(r3) + rldicl r0, r0, 0, 63 /* r3 &= 1 */ + stb r0, HSTATE_RESTORE_HID5(r13) /* Load up guest SPRG3 value, since it's user readable */ - lwz r3, VCPU_SHAREDBE(r4) - cmpwi r3, 0 - ld r5, VCPU_SHARED(r4) + lbz r4, VCPU_SHAREDBE(r3) + cmpwi r4, 0 + ld r5, VCPU_SHARED(r3) beq sprg3_little_endian sprg3_big_endian: #ifdef __BIG_ENDIAN__ - ld r3, VCPU_SHARED_SPRG3(r5) + ld r4, VCPU_SHARED_SPRG3(r5) #else addi r5, r5, VCPU_SHARED_SPRG3 - ldbrx r3, 0, r5 + ldbrx r4, 0, r5 #endif b after_sprg3_load sprg3_little_endian: #ifdef __LITTLE_ENDIAN__ - ld r3, VCPU_SHARED_SPRG3(r5) + ld r4, VCPU_SHARED_SPRG3(r5) #else addi r5, r5, VCPU_SHARED_SPRG3 - ldbrx r3, 0, r5 + ldbrx r4, 0, r5 #endif after_sprg3_load: - mtspr SPRN_SPRG3, r3 + mtspr SPRN_SPRG3, r4 #endif /* CONFIG_PPC_BOOK3S_64 */ - PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */ + PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */ /* Jump to segment patching handler and into our guest */ bl FUNC(kvmppc_entry_trampoline) @@ -146,7 +144,7 @@ after_sprg3_load: * */ - PPC_LL r3, GPR4(r1) /* vcpu pointer */ + PPC_LL r3, GPR3(r1) /* vcpu pointer */ /* * kvmppc_copy_from_svcpu can clobber volatile registers, save @@ -169,7 +167,7 @@ after_sprg3_load: #endif /* CONFIG_PPC_BOOK3S_64 */ /* R7 = vcpu */ - PPC_LL r7, GPR4(r1) + PPC_LL r7, GPR3(r1) PPC_STL r14, VCPU_GPR(R14)(r7) PPC_STL r15, VCPU_GPR(R15)(r7) @@ -190,11 +188,11 @@ after_sprg3_load: PPC_STL r30, VCPU_GPR(R30)(r7) PPC_STL r31, VCPU_GPR(R31)(r7) - /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ - lwz r5, VCPU_TRAP(r7) + /* Pass the exit number as 2nd argument to kvmppc_handle_exit */ + lwz r4, VCPU_TRAP(r7) - /* Restore r3 (kvm_run) and r4 (vcpu) */ - REST_2GPRS(3, r1) + /* Restore r3 (vcpu) */ + REST_GPR(3, r1) bl FUNC(kvmppc_handle_exit_pr) /* If RESUME_GUEST, get back in the loop */ @@ -223,11 +221,11 @@ kvm_loop_heavyweight: PPC_LL r4, _LINK(r1) PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) - /* Load vcpu and cpu_run */ - REST_2GPRS(3, r1) + /* Load vcpu */ + REST_GPR(3, r1) /* Load non-volatile guest state from the vcpu */ - VCPU_LOAD_NVGPRS(r4) + VCPU_LOAD_NVGPRS(r3) /* Jump back into the beginning of this function */ b kvm_start_lightweight @@ -235,7 +233,7 @@ kvm_loop_heavyweight: kvm_loop_lightweight: /* We'll need the vcpu pointer */ - REST_GPR(4, r1) + REST_GPR(3, r1) /* Jump back into the beginning of this function */ b kvm_start_lightweight diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ed12dfbf9bb5..88fac22fbf09 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1151,9 +1151,9 @@ static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr) return r; } -int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; int s; @@ -1826,12 +1826,11 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu->run; int ret; /* Check if we can run the vcpu at all */ if (!vcpu->arch.sane) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = -EINVAL; goto out; } @@ -1858,7 +1857,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(run, vcpu); + ret = __kvmppc_vcpu_run(vcpu); kvmppc_clear_debug(vcpu); diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 26b25994c969..c5e677508d3b 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -229,7 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) */ args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (rc) goto fail; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index c0d62a917e20..3e1c9f08e302 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -731,12 +731,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu->run; int ret, s; struct debug_reg debug; if (!vcpu->arch.sane) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } @@ -778,7 +777,7 @@ int kvmppc_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.pgdir = vcpu->kvm->mm->pgd; kvmppc_fix_ee_before_entry(); - ret = __kvmppc_vcpu_run(run, vcpu); + ret = __kvmppc_vcpu_run(vcpu); /* No need for guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ @@ -982,9 +981,9 @@ static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu, * * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) */ -int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) { + struct kvm_run *run = vcpu->run; int r = RESUME_HOST; int s; int idx; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 2e56ab5a5f55..6fa82efe833b 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -237,7 +237,7 @@ _GLOBAL(kvmppc_resume_host) /* Switch to kernel stack and jump to handler. */ LOAD_REG_ADDR(r3, kvmppc_handle_exit) mtctr r3 - lwz r3, HOST_RUN(r1) + mr r3, r4 lwz r2, HOST_R2(r1) mr r14, r4 /* Save vcpu pointer. */ @@ -337,15 +337,14 @@ heavyweight_exit: /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) stwu r1, -HOST_STACK_SIZE(r1) - stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ /* Save host state to stack. */ - stw r3, HOST_RUN(r1) + mr r4, r3 mflr r3 stw r3, HOST_STACK_LR(r1) mfcr r5 diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index c577ba4b3169..8262c14fc9e6 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -434,9 +434,10 @@ _GLOBAL(kvmppc_resume_host) #endif /* Switch to kernel stack and jump to handler. */ - PPC_LL r3, HOST_RUN(r1) + mr r3, r4 mr r5, r14 /* intno */ mr r14, r4 /* Save vcpu pointer. */ + mr r4, r5 bl kvmppc_handle_exit /* Restore vcpu pointer and the nonvolatiles we used. */ @@ -525,15 +526,14 @@ heavyweight_exit: blr /* Registers: - * r3: kvm_run pointer - * r4: vcpu pointer + * r3: vcpu pointer */ _GLOBAL(__kvmppc_vcpu_run) stwu r1, -HOST_STACK_SIZE(r1) - PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */ /* Save host state to stack. */ - PPC_STL r3, HOST_RUN(r1) + mr r4, r3 mflr r3 mfcr r5 PPC_STL r3, HOST_STACK_LR(r1) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index aaa7b62f2f82..13999123b735 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -403,7 +403,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, return EMULATE_DONE; } - if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (rc) return EMULATE_DO_MMIO; return EMULATE_DONE; diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 1478fceeb683..1da9dbba9217 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1115,9 +1115,8 @@ void hash__early_init_mmu_secondary(void) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); -#ifdef CONFIG_PPC_MEM_KEYS - mtspr(SPRN_UAMOR, default_uamor); -#endif + if (IS_ENABLED(CONFIG_PPC_MEM_KEYS) && mmu_has_feature(MMU_FTR_PKEY)) + mtspr(SPRN_UAMOR, default_uamor); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 69a6b87f2bb4..b1d091a97611 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -73,12 +73,6 @@ static int scan_pkey_feature(void) if (early_radix_enabled()) return 0; - /* - * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 - */ - if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) - return 0; - ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total); if (ret == 0) { /* @@ -124,6 +118,12 @@ void __init pkey_early_init_devtree(void) __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) != (sizeof(u64) * BITS_PER_BYTE)); + /* + * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 + */ + if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) + return; + /* scan the device tree for pkey feature */ pkeys_total = scan_pkey_feature(); if (!pkeys_total) diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index b83abbead4a2..8acd00178956 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, } ret = 0; - *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; @@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, BUG(); } - if (*flt & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - out_unlock: mmap_read_unlock(mm); return ret; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 925a7231abb3..0add963a849b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -511,7 +511,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); major |= fault & VM_FAULT_MAJOR; @@ -537,14 +537,9 @@ retry: /* * Major/minor page fault accounting. */ - if (major) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + if (major) cmo_account_page_fault(); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } + return 0; } NOKPROBE_SYMBOL(__do_page_fault); diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 8ce9d607b53d..f56c66b3f5fe 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -8,6 +8,8 @@ #ifndef _ASM_RISCV_UACCESS_H #define _ASM_RISCV_UACCESS_H +#include <asm/pgtable.h> /* for TASK_SIZE */ + /* * User space memory access functions */ @@ -62,11 +64,9 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (get_fs().seg) - /** * access_ok: - Checks if a user space pointer is valid * @addr: User space pointer to start of block to check diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5873835a3e6b..716d64e36f83 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -109,7 +109,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, addr, flags); + fault = handle_mm_fault(vma, addr, flags, regs); /* * If we need to retry but a fatal signal is pending, handle the @@ -127,21 +127,7 @@ good_area: BUG(); } - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, addr); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index e63940bb57cd..8b98c501142d 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -7,5 +7,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ -obj-$(CONFIG_NUMA) += numa/ obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8c0b52940165..3d86e12e8e3c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -126,7 +126,6 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC - select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY @@ -766,6 +765,7 @@ config VFIO_AP def_tristate n prompt "VFIO support for AP devices" depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM + depends on ZCRYPT help This driver grants access to Adjunct Processor (AP) devices via the VFIO mediated device interface. diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index cae473a7b6f7..11c5952e1afa 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -45,7 +45,11 @@ static inline int atomic_fetch_add(int i, atomic_t *v) static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic_add_const(i, &v->counter); return; } @@ -112,7 +116,11 @@ static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) static inline void atomic64_add(s64 i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + /* + * Order of conditions is important to circumvent gcc 10 bug: + * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html + */ + if ((i > -129) && (i < 128) && __builtin_constant_p(i)) { __atomic64_add_const(i, (long *)&v->counter); return; } diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 17a26261f288..c1b82bcc017c 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2000 + * Copyright IBM Corp. 1999, 2020 */ #ifndef DEBUG_H #define DEBUG_H @@ -26,19 +26,14 @@ #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */ /* the entry information */ -#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ +#define __DEBUG_FEATURE_VERSION 3 /* version of debug feature */ struct __debug_entry { - union { - struct { - unsigned long clock : 52; - unsigned long exception : 1; - unsigned long level : 3; - unsigned long cpuid : 8; - } fields; - unsigned long stck; - } id; + unsigned long clock : 60; + unsigned long exception : 1; + unsigned long level : 3; void *caller; + unsigned short cpu; } __packed; typedef struct __debug_entry debug_entry_t; diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index fbb507504a3b..3a0ac0c7a9a3 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -86,12 +86,6 @@ static inline const struct cpumask *cpumask_of_node(int node) #define pcibus_to_node(bus) __pcibus_to_node(bus) -#define node_distance(a, b) __node_distance(a, b) -static inline int __node_distance(int a, int b) -{ - return 0; -} - #else /* !CONFIG_NUMA */ #define numa_node_id numa_node_id diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 324438889fe1..f09444d6aeab 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -32,7 +32,7 @@ #define USER_DS_SACF (3) #define get_fs() (current->thread.mm_segment) -#define segment_eq(a,b) (((a) & 2) == ((b) & 2)) +#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS) void set_fs(mm_segment_t fs); diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a8f136943deb..efca70970761 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -49,6 +49,7 @@ CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index beb4b44a11d1..b6619ae9a3e0 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -2,7 +2,7 @@ /* * S/390 debug facility * - * Copyright IBM Corp. 1999, 2012 + * Copyright IBM Corp. 1999, 2020 * * Author(s): Michael Holzheu ([email protected]), * Holger Smolinski ([email protected]) @@ -433,7 +433,7 @@ static int debug_format_entry(file_private_info_t *p_info) act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area] [p_info->act_page] + p_info->act_entry); - if (act_entry->id.stck == 0LL) + if (act_entry->clock == 0LL) goto out; /* empty entry */ if (view->header_proc) len += view->header_proc(id_snap, view, p_info->act_area, @@ -829,12 +829,17 @@ static inline debug_entry_t *get_active_entry(debug_info_t *id) static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active, int level, int exception) { - active->id.stck = get_tod_clock_fast() - - *(unsigned long long *) &tod_clock_base[1]; - active->id.fields.cpuid = smp_processor_id(); + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + unsigned long timestamp; + + get_tod_clock_ext(clk); + timestamp = *(unsigned long *) &clk[0] >> 4; + timestamp -= TOD_UNIX_EPOCH >> 12; + active->clock = timestamp; + active->cpu = smp_processor_id(); active->caller = __builtin_return_address(0); - active->id.fields.exception = exception; - active->id.fields.level = level; + active->exception = exception; + active->level = level; proceed_active_entry(id); if (exception) proceed_active_area(id); @@ -1398,25 +1403,24 @@ static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf) { - unsigned long base, sec, usec; + unsigned long sec, usec; unsigned long caller; unsigned int level; char *except_str; int rc = 0; - level = entry->id.fields.level; - base = (*(unsigned long *) &tod_clock_base[0]) >> 4; - sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12); + level = entry->level; + sec = entry->clock; usec = do_div(sec, USEC_PER_SEC); - if (entry->id.fields.exception) + if (entry->exception) except_str = "*"; else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ", area, sec, usec, level, except_str, - entry->id.fields.cpuid, (void *)caller); + entry->cpu, (void *)caller); return rc; } EXPORT_SYMBOL(debug_dflt_header_fn); diff --git a/arch/s390/numa/numa.c b/arch/s390/kernel/numa.c index 51c5a9f6e525..51c5a9f6e525 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/kernel/numa.c diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1608fd99bbee..2f177298c663 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2768,7 +2768,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr) struct page *page = NULL; mmap_read_lock(kvm->mm); - get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE, + get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, &page, NULL, NULL); mmap_read_unlock(kvm->mm); return page; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 66da278a67fb..6b74b92c1a58 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) { - r = fixup_user_fault(current, current->mm, hva, + r = fixup_user_fault(current->mm, hva, FAULT_FLAG_WRITE, &unlocked); if (r) break; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2f721a923b54..cd74989ce0b0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -273,7 +273,7 @@ retry: rc = get_guest_storage_key(current->mm, vmaddr, &key); if (rc) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { mmap_read_unlock(current->mm); @@ -319,7 +319,7 @@ retry: mmap_read_lock(current->mm); rc = reset_guest_reference_bit(current->mm, vmaddr); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { mmap_read_unlock(current->mm); @@ -390,7 +390,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) m3 & SSKE_MC); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } @@ -1094,7 +1094,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); if (rc < 0) { - rc = fixup_user_fault(current, current->mm, vmaddr, + rc = fixup_user_fault(current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index eb382ceaa116..7c988994931f 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -64,6 +64,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, break; if (state.reliable && !addr) { pr_err("unwind state reliable but addr is 0\n"); + kfree(bt); return -EINVAL; } sprint_symbol(sym, addr); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index aebf9183bedd..4c8c063bce5b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -476,7 +476,7 @@ retry: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) { fault = VM_FAULT_SIGNAL; if (flags & FAULT_FLAG_RETRY_NOWAIT) @@ -486,21 +486,7 @@ retry: if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; - /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. - */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 190357ff86b3..373542ca1113 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -649,7 +649,7 @@ retry: rc = vmaddr; goto out_up; } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + if (fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked)) { rc = -EFAULT; goto out_up; @@ -879,7 +879,7 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, BUG_ON(gmap_is_shadow(gmap)); fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; - if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) + if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) return -EFAULT; if (unlocked) /* lost mmap_lock, caller has to retry __gmap_translate */ @@ -2485,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], } EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + + split_huge_pmd(vma, pmd, addr); + return 0; +} + +static const struct mm_walk_ops thp_split_walk_ops = { + .pmd_entry = thp_split_walk_pmd_entry, +}; + static inline void thp_split_mm(struct mm_struct *mm) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE struct vm_area_struct *vma; - unsigned long addr; for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - for (addr = vma->vm_start; - addr < vma->vm_end; - addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; + walk_page_vma(vma, &thp_split_walk_ops, NULL); } mm->def_flags |= VM_NOHUGEPAGE; -#endif } +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * Remove all empty zero pages from the mapping for lazy refaulting diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile deleted file mode 100644 index c89d26f4f77d..000000000000 --- a/arch/s390/numa/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-y += numa.o diff --git a/arch/sh/include/asm/segment.h b/arch/sh/include/asm/segment.h index 33d1d28057cb..02e54a3335d6 100644 --- a/arch/sh/include/asm/segment.h +++ b/arch/sh/include/asm/segment.h @@ -24,8 +24,7 @@ typedef struct { #define USER_DS KERNEL_DS #endif -#define segment_eq(a, b) ((a).seg == (b).seg) - +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define get_fs() (current_thread_info()->addr_limit) #define set_fs(x) (current_thread_info()->addr_limit = (x)) diff --git a/arch/sh/include/asm/sparsemem.h b/arch/sh/include/asm/sparsemem.h index 4eb899751e45..084706bb8cca 100644 --- a/arch/sh/include/asm/sparsemem.h +++ b/arch/sh/include/asm/sparsemem.h @@ -5,11 +5,9 @@ #ifdef __KERNEL__ /* * SECTION_SIZE_BITS 2^N: how big each section will be - * MAX_PHYSADDR_BITS 2^N: how much physical address space we have - * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + * MAX_PHYSMEM_BITS 2^N: how much physical address space we have */ #define SECTION_SIZE_BITS 26 -#define MAX_PHYSADDR_BITS 32 #define MAX_PHYSMEM_BITS 32 #endif diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 058c6181bb30..b62ad0ba2395 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -482,8 +482,6 @@ asmlinkage void do_address_error(struct pt_regs *regs, error_code = lookup_exception_vector(); #endif - oldfs = get_fs(); - if (user_mode(regs)) { int si_code = BUS_ADRERR; unsigned int user_action; @@ -491,13 +489,13 @@ asmlinkage void do_address_error(struct pt_regs *regs, local_irq_enable(); inc_unaligned_user_access(); - set_fs(USER_DS); + oldfs = force_uaccess_begin(); if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1), sizeof(instruction))) { - set_fs(oldfs); + force_uaccess_end(oldfs); goto uspace_segv; } - set_fs(oldfs); + force_uaccess_end(oldfs); /* shout about userspace fixups */ unaligned_fixups_notify(current, instruction, regs); @@ -520,11 +518,11 @@ fixup: goto uspace_segv; } - set_fs(USER_DS); + oldfs = force_uaccess_begin(); tmp = handle_unaligned_access(instruction, regs, &user_mem_access, 0, address); - set_fs(oldfs); + force_uaccess_end(oldfs); if (tmp == 0) return; /* sorted */ diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index fbe1f2fe9a8c..482668a2f6d3 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -482,22 +482,13 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR))) if (mm_fault_error(regs, error_code, address, fault)) return; if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 613de8096335..cd1379360f08 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -425,15 +425,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -#ifdef CONFIG_NUMA -int memory_add_physaddr_to_nid(u64 addr) -{ - /* Node 0 for now.. */ - return 0; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h index 1dd1b61432db..aa9a676bc341 100644 --- a/arch/sparc/include/asm/sparsemem.h +++ b/arch/sparc/include/asm/sparsemem.h @@ -7,7 +7,6 @@ #include <asm/page.h> #define SECTION_SIZE_BITS 30 -#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS #define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS #endif /* !(__KERNEL__) */ diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index d6d8413eca83..0a2d3ebc4bb8 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -28,7 +28,7 @@ #define get_fs() (current->thread.current_ds) #define set_fs(val) ((current->thread.current_ds) = (val)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) /* We have there a nice not-mapped page at PAGE_OFFSET - PAGE_SIZE, so that this test * can be fairly lightweight. diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index bf9d330073b2..698cf69f74e9 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -32,7 +32,7 @@ #define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)}) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define set_fs(val) \ do { \ diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index cfef656eda0f..8071bfd72349 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -234,7 +234,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -250,15 +250,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; @@ -410,7 +401,7 @@ good_area: if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - switch (handle_mm_fault(vma, address, flags)) { + switch (handle_mm_fault(vma, address, flags, NULL)) { case VM_FAULT_SIGBUS: case VM_FAULT_OOM: goto do_sigbus; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a3806614e4dc..0a6bcc85fba7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -422,7 +422,7 @@ good_area: goto bad_area; } - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) goto exit_exception; @@ -438,15 +438,6 @@ good_area: } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, regs, address); - } if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 2b3afa354a90..ad12f78bda7e 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -71,7 +71,7 @@ good_area: do { vm_fault_t fault; - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, NULL); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) goto out_nosemaphore; @@ -88,10 +88,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 29b7d52143e9..df8c017e6161 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -449,8 +449,6 @@ .macro SWITCH_TO_KERNEL_STACK - ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV - BUG_IF_WRONG_CR3 SWITCH_TO_KERNEL_CR3 scratch_reg=%eax @@ -599,8 +597,6 @@ */ .macro SWITCH_TO_ENTRY_STACK - ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV - /* Bytes to copy */ movl $PTREGS_SIZE, %ecx @@ -872,17 +868,6 @@ SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) * will ignore all of the single-step traps generated in this range. */ -#ifdef CONFIG_XEN_PV -/* - * Xen doesn't set %esp to be precisely what the normal SYSENTER - * entry point expects, so fix it up before using the normal path. - */ -SYM_CODE_START(xen_sysenter_target) - addl $5*4, %esp /* remove xen-provided frame */ - jmp .Lsysenter_past_esp -SYM_CODE_END(xen_sysenter_target) -#endif - /* * 32-bit SYSENTER entry. * @@ -965,9 +950,8 @@ SYM_FUNC_START(entry_SYSENTER_32) movl %esp, %eax call do_SYSENTER_32 - /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + testl %eax, %eax + jz .Lsyscall_32_done STACKLEAK_ERASE @@ -1165,95 +1149,6 @@ SYM_FUNC_END(entry_INT80_32) #endif .endm -#ifdef CONFIG_PARAVIRT -SYM_CODE_START(native_iret) - iret - _ASM_EXTABLE(native_iret, asm_iret_error) -SYM_CODE_END(native_iret) -#endif - -#ifdef CONFIG_XEN_PV -/* - * See comment in entry_64.S for further explanation - * - * Note: This is not an actual IDT entry point. It's a XEN specific entry - * point and therefore named to match the 64-bit trampoline counterpart. - */ -SYM_FUNC_START(xen_asm_exc_xen_hypervisor_callback) - /* - * Check to see if we got the event in the critical - * region in xen_iret_direct, after we've reenabled - * events and checked for pending events. This simulates - * iret instruction's behaviour where it delivers a - * pending interrupt when enabling interrupts: - */ - cmpl $xen_iret_start_crit, (%esp) - jb 1f - cmpl $xen_iret_end_crit, (%esp) - jae 1f - call xen_iret_crit_fixup -1: - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - - mov %esp, %eax - call xen_pv_evtchn_do_upcall - jmp handle_exception_return -SYM_FUNC_END(xen_asm_exc_xen_hypervisor_callback) - -/* - * Hypervisor uses this for application faults while it executes. - * We get here for two reasons: - * 1. Fault while reloading DS, ES, FS or GS - * 2. Fault while executing IRET - * Category 1 we fix up by reattempting the load, and zeroing the segment - * register if the load fails. - * Category 2 we fix up by jumping to do_iret_error. We cannot use the - * normal Linux return path in this case because if we use the IRET hypercall - * to pop the stack frame we end up in an infinite loop of failsafe callbacks. - * We distinguish between categories by maintaining a status value in EAX. - */ -SYM_FUNC_START(xen_failsafe_callback) - pushl %eax - movl $1, %eax -1: mov 4(%esp), %ds -2: mov 8(%esp), %es -3: mov 12(%esp), %fs -4: mov 16(%esp), %gs - /* EAX == 0 => Category 1 (Bad segment) - EAX != 0 => Category 2 (Bad IRET) */ - testl %eax, %eax - popl %eax - lea 16(%esp), %esp - jz 5f - jmp asm_iret_error -5: pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - jmp handle_exception_return - -.section .fixup, "ax" -6: xorl %eax, %eax - movl %eax, 4(%esp) - jmp 1b -7: xorl %eax, %eax - movl %eax, 8(%esp) - jmp 2b -8: xorl %eax, %eax - movl %eax, 12(%esp) - jmp 3b -9: xorl %eax, %eax - movl %eax, 16(%esp) - jmp 4b -.previous - _ASM_EXTABLE(1b, 6b) - _ASM_EXTABLE(2b, 7b) - _ASM_EXTABLE(3b, 8b) - _ASM_EXTABLE(4b, 9b) -SYM_FUNC_END(xen_failsafe_callback) -#endif /* CONFIG_XEN_PV */ - SYM_CODE_START_LOCAL_NOALIGN(handle_exception) /* the function address is in %gs's slot on the stack */ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S index e78047d119f6..2cbd39939dc6 100644 --- a/arch/x86/entry/vdso/vdso32/note.S +++ b/arch/x86/entry/vdso/vdso32/note.S @@ -16,33 +16,3 @@ ELFNOTE_START(Linux, 0, "a") ELFNOTE_END BUILD_SALT - -#ifdef CONFIG_XEN -/* - * Add a special note telling glibc's dynamic linker a fake hardware - * flavor that it will use to choose the search path for libraries in the - * same way it uses real hardware capabilities like "mmx". - * We supply "nosegneg" as the fake capability, to indicate that we - * do not like negative offsets in instructions using segment overrides, - * since we implement those inefficiently. This makes it possible to - * install libraries optimized to avoid those access patterns in someplace - * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file - * corresponding to the bits here is needed to make ldconfig work right. - * It should contain: - * hwcap 1 nosegneg - * to match the mapping of bit to name that we give here. - * - * At runtime, the fake hardware feature will be considered to be present - * if its bit is set in the mask word. So, we start with the mask 0, and - * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen. - */ - -#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */ - -ELFNOTE_START(GNU, 2, "a") - .long 1 /* ncaps */ -VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */ - .long 0 /* mask */ - .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */ -ELFNOTE_END -#endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 60b944dd2df1..4f77b8f22e54 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -8,6 +8,7 @@ #include <asm/io.h> #include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> +#include <asm/paravirt.h> typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, @@ -54,6 +55,17 @@ typedef int (*hyperv_fill_flush_list_func)( vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); #define hv_get_raw_timer() rdtsc_ordered() +/* + * Reference to pv_ops must be inline so objtool + * detection of noinstr violations can work correctly. + */ +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ +#ifdef CONFIG_PARAVIRT + pv_ops.time.sched_clock = sched_clock; +#endif +} + void hyperv_vector_handler(struct pt_regs *regs); static inline void hv_enable_stimer0_percpu_irq(int irq) {} diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6e81788a30c1..28996fe19301 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -25,7 +25,7 @@ void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); void entry_INT80_compat(void); -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +#ifdef CONFIG_XEN_PV void xen_entry_INT80_compat(void); #endif #endif diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 6669164abadc..9646c300f128 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -301,7 +301,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +#ifdef CONFIG_XEN_PV extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; #endif diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2f3e8f2a958f..ecefaffd15d4 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -33,7 +33,7 @@ static inline void set_fs(mm_segment_t fs) set_thread_flag(TIF_FSCHECK); } -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define user_addr_max() (current->thread.addr_limit.seg) /* diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index af94f05a5c66..31125448b174 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -361,13 +361,6 @@ static void __init ms_hyperv_init_platform(void) #endif } -void hv_setup_sched_clock(void *sched_clock) -{ -#ifdef CONFIG_PARAVIRT - pv_ops.time.sched_clock = sched_clock; -#endif -} - const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f66a6b90f954..7ed84c282233 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -134,38 +134,7 @@ SYM_CODE_START(startup_32) movl %eax,pa(initial_page_table+0xffc) #endif -#ifdef CONFIG_PARAVIRT - /* This is can only trip for a broken bootloader... */ - cmpw $0x207, pa(boot_params + BP_version) - jb .Ldefault_entry - - /* Paravirt-compatible boot parameters. Look to see what architecture - we're booting under. */ - movl pa(boot_params + BP_hardware_subarch), %eax - cmpl $num_subarch_entries, %eax - jae .Lbad_subarch - - movl pa(subarch_entries)(,%eax,4), %eax - subl $__PAGE_OFFSET, %eax - jmp *%eax - -.Lbad_subarch: -SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK) - /* Unknown implementation; there's really - nothing we can do at this point. */ - ud2a - - __INITDATA - -subarch_entries: - .long .Ldefault_entry /* normal x86/PC */ - .long xen_entry /* Xen hypervisor */ - .long .Ldefault_entry /* Moorestown MID */ -num_subarch_entries = (. - subarch_entries) / 4 -.previous -#else jmp .Ldefault_entry -#endif /* CONFIG_PARAVIRT */ SYM_CODE_END(startup_32) #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d6f946707270..9afefe325acb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -390,7 +390,7 @@ unsigned long x86_fsgsbase_read_task(struct task_struct *task, */ mutex_lock(&task->mm->context.lock); ldt = task->mm->context.ldt; - if (unlikely(idx >= ldt->nr_entries)) + if (unlikely(!ldt || idx >= ldt->nr_entries)) base = 0; else base = get_desc_base(ldt->entries + idx); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index fa873e3e6e90..3fd6eec202d7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -370,7 +370,8 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_7_EDX, F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | - F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) + F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | + F(SERIALIZE) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index af9cdb426dd2..814d3aee5cef 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -900,6 +900,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); synic->active = true; synic->dont_zero_synic_pages = dont_zero_synic_pages; + synic->control = HV_SYNIC_CONTROL_ENABLE; return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 232dd2f9a081..599d73206299 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -820,22 +820,22 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) return 1; - if (cr0 & X86_CR0_PG) { #ifdef CONFIG_X86_64 - if (!is_paging(vcpu) && (vcpu->arch.efer & EFER_LME)) { - int cs_db, cs_l; + if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) && + (cr0 & X86_CR0_PG)) { + int cs_db, cs_l; - if (!is_pae(vcpu)) - return 1; - kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - if (cs_l) - return 1; - } else -#endif - if (is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && - !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) + if (!is_pae(vcpu)) + return 1; + kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + if (cs_l) return 1; } +#endif + if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) && + is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) + return 1; if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) return 1; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 0c7643d9f7cb..35f1498e9832 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1139,7 +1139,7 @@ void do_user_addr_fault(struct pt_regs *regs, struct vm_area_struct *vma; struct task_struct *tsk; struct mm_struct *mm; - vm_fault_t fault, major = 0; + vm_fault_t fault; unsigned int flags = FAULT_FLAG_DEFAULT; tsk = current; @@ -1291,8 +1291,7 @@ good_area: * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. */ - fault = handle_mm_fault(vma, address, flags); - major |= fault & VM_FAULT_MAJOR; + fault = handle_mm_fault(vma, address, flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { @@ -1319,18 +1318,6 @@ good_area: return; } - /* - * Major/minor page fault accounting. If any of the events - * returned VM_FAULT_MAJOR, we account it as a major fault. - */ - if (major) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - } - check_v8086_mode(regs, address, tsk); } NOKPROBE_SYMBOL(do_user_addr_fault); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3b246ae40c8f..a4ac13cc3fdc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1452,6 +1452,15 @@ static unsigned long probe_memory_block_size(void) goto done; } + /* + * Use max block size to minimize overhead on bare metal, where + * alignment for memory hotplug isn't a concern. + */ + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + bz = MAX_BLOCK_SIZE; + goto done; + } + /* Find the largest allowed block size that aligns to memory end */ for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { if (IS_ALIGNED(boot_mem_end, bz)) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index b05f45e5e8e2..aa76ec2d359b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -929,5 +929,4 @@ int memory_add_physaddr_to_nid(u64 start) nid = numa_meminfo.blk[0].nid; return nid; } -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1aded63a95cb..218acbd5c7a0 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -19,6 +19,7 @@ config XEN_PV bool "Xen PV guest support" default y depends on XEN + depends on X86_64 select PARAVIRT_XXL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU @@ -50,7 +51,7 @@ config XEN_PVHVM_SMP config XEN_512GB bool "Limit Xen pv-domain memory to 512GB" - depends on XEN_PV && X86_64 + depends on XEN_PV default y help Limit paravirtualized user domains to 512GB of RAM. diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 5f1db522d06b..fc5c5ba4aacb 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_xen-asm.o := y ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities @@ -33,7 +33,6 @@ obj-$(CONFIG_XEN_PV) += mmu_pv.o obj-$(CONFIG_XEN_PV) += irq.o obj-$(CONFIG_XEN_PV) += multicalls.o obj-$(CONFIG_XEN_PV) += xen-asm.o -obj-$(CONFIG_XEN_PV) += xen-asm_$(BITS).o obj-$(CONFIG_XEN_PVH) += enlighten_pvh.o diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 1aff4ae65655..e82fd1910dae 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -60,10 +60,6 @@ static u32 xen_apic_read(u32 reg) if (reg == APIC_LVR) return 0x14; -#ifdef CONFIG_X86_32 - if (reg == APIC_LDR) - return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); -#endif if (reg != APIC_ID) return 0; @@ -129,14 +125,6 @@ static int xen_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -#ifdef CONFIG_X86_32 -static int xen_x86_32_early_logical_apicid(int cpu) -{ - /* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */ - return 1 << cpu; -} -#endif - static void xen_noop(void) { } @@ -199,11 +187,6 @@ static struct apic xen_pv_apic = { .icr_write = xen_apic_icr_write, .wait_icr_idle = xen_noop, .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, - -#ifdef CONFIG_X86_32 - /* generic_processor_info and setup_local_APIC. */ - .x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid, -#endif }; static void __init xen_apic_check(void) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 2aab43a13a8c..22e741e0b10c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -119,14 +119,6 @@ static void __init xen_banner(void) printk(KERN_INFO "Xen version: %d.%d%s%s\n", version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); - -#ifdef CONFIG_X86_32 - pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n" - "Support for running as 32-bit PV-guest under Xen will soon be removed\n" - "from the Linux kernel!\n" - "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n" - "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"); -#endif } static void __init xen_pv_init_platform(void) @@ -353,15 +345,13 @@ static void set_aliased_prot(void *v, pgprot_t prot) pte_t *ptep; pte_t pte; unsigned long pfn; - struct page *page; unsigned char dummy; + void *va; ptep = lookup_address((unsigned long)v, &level); BUG_ON(ptep == NULL); pfn = pte_pfn(*ptep); - page = pfn_to_page(pfn); - pte = pfn_pte(pfn, prot); /* @@ -391,14 +381,10 @@ static void set_aliased_prot(void *v, pgprot_t prot) if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); - if (!PageHighMem(page)) { - void *av = __va(PFN_PHYS(pfn)); + va = __va(PFN_PHYS(pfn)); - if (av != v) - if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) - BUG(); - } else - kmap_flush_unused(); + if (va != v && HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) + BUG(); preempt_enable(); } @@ -538,30 +524,12 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone - * and lazy gs handling is enabled, it means we're in a - * context switch, and %gs has just been saved. This means we - * can zero it out to prevent faults on exit from the - * hypervisor if the next process has no %gs. Either way, it - * has been saved, and the new value will get loaded properly. - * This will go away as soon as Xen has been modified to not - * save/restore %gs for normal hypercalls. - * - * On x86_64, this hack is not used for %gs, because gs points - * to KERNEL_GS_BASE (and uses it for PDA references), so we - * must not zero %gs on x86_64 - * - * For x86_64, we need to zero %fs, otherwise we may get an + * In lazy mode we need to zero %fs, otherwise we may get an * exception between the new %fs descriptor being loaded and * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { -#ifdef CONFIG_X86_32 - lazy_load_gs(0); -#else + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) loadsegment(fs, 0); -#endif - } xen_mc_batch(); @@ -572,13 +540,11 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) xen_mc_issue(PARAVIRT_LAZY_CPU); } -#ifdef CONFIG_X86_64 static void xen_load_gs_index(unsigned int idx) { if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) BUG(); } -#endif static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, const void *ptr) @@ -597,7 +563,6 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, preempt_enable(); } -#ifdef CONFIG_X86_64 void noist_exc_debug(struct pt_regs *regs); DEFINE_IDTENTRY_RAW(xenpv_exc_nmi) @@ -697,7 +662,6 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) return true; } -#endif static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) @@ -710,10 +674,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, info->vector = vector; addr = gate_offset(val); -#ifdef CONFIG_X86_64 if (!get_trap_addr((void **)&addr, val->bits.ist)) return 0; -#endif /* CONFIG_X86_64 */ info->address = addr; info->cs = gate_segment(val); @@ -958,15 +920,12 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; -#ifdef CONFIG_X86_64 unsigned int which; u64 base; -#endif ret = 0; switch (msr) { -#ifdef CONFIG_X86_64 case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; @@ -976,7 +935,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) if (HYPERVISOR_set_segment_base(which, base) != 0) ret = -EIO; break; -#endif case MSR_STAR: case MSR_CSTAR: @@ -1058,9 +1016,7 @@ void __init xen_setup_vcpu_info_placement(void) static const struct pv_info xen_info __initconst = { .shared_kernel_pmd = 0, -#ifdef CONFIG_X86_64 .extra_user_64bit_cs = FLAT_USER_CS64, -#endif .name = "Xen", }; @@ -1086,18 +1042,14 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_pmc = xen_read_pmc, .iret = xen_iret, -#ifdef CONFIG_X86_64 .usergs_sysret64 = xen_sysret64, -#endif .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, .load_gdt = xen_load_gdt, .load_idt = xen_load_idt, .load_tls = xen_load_tls, -#ifdef CONFIG_X86_64 .load_gs_index = xen_load_gs_index, -#endif .alloc_ldt = xen_alloc_ldt, .free_ldt = xen_free_ldt, @@ -1364,15 +1316,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ -#ifdef CONFIG_X86_32 - pv_info.kernel_rpl = 1; - if (xen_feature(XENFEAT_supervisor_mode_kernel)) - pv_info.kernel_rpl = 0; -#else pv_info.kernel_rpl = 0; -#endif - /* set the limit of our address space */ - xen_reserve_top(); /* * We used to do this in xen_arch_setup, but that is too late @@ -1384,12 +1328,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (rc != 0) xen_raw_printk("physdev_op failed %d\n", rc); -#ifdef CONFIG_X86_32 - /* set up basic CPUID stuff */ - cpu_detect(&new_cpu_data); - set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); - new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); -#endif if (xen_start_info->mod_start) { if (xen_start_info->flags & SIF_MOD_START_PFN) @@ -1458,12 +1396,8 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_efi_init(&boot_params); /* Start the world */ -#ifdef CONFIG_X86_32 - i386_start_kernel(); -#else cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */ x86_64_start_reservations((char *)__pa_symbol(&boot_params)); -#endif } static int xen_cpu_up_prepare_pv(unsigned int cpu) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index a58d9c69807a..3273c985d3dd 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -86,19 +86,8 @@ #include "mmu.h" #include "debugfs.h" -#ifdef CONFIG_X86_32 -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) -static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); -#endif -#ifdef CONFIG_X86_64 /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ /* * Protects atomic reservation decrease/increase against concurrent increases. @@ -280,10 +269,7 @@ static inline void __xen_set_pte(pte_t *ptep, pte_t pteval) if (!xen_batched_set_pte(ptep, pteval)) { /* * Could call native_set_pte() here and trap and - * emulate the PTE write but with 32-bit guests this - * needs two traps (one for each of the two 32-bit - * words in the PTE) so do one hypercall directly - * instead. + * emulate the PTE write, but a hypercall is much cheaper. */ struct mmu_update u; @@ -439,26 +425,6 @@ static void xen_set_pud(pud_t *ptr, pud_t val) xen_set_pud_hyper(ptr, val); } -#ifdef CONFIG_X86_PAE -static void xen_set_pte_atomic(pte_t *ptep, pte_t pte) -{ - trace_xen_mmu_set_pte_atomic(ptep, pte); - __xen_set_pte(ptep, pte); -} - -static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - trace_xen_mmu_pte_clear(mm, addr, ptep); - __xen_set_pte(ptep, native_make_pte(0)); -} - -static void xen_pmd_clear(pmd_t *pmdp) -{ - trace_xen_mmu_pmd_clear(pmdp); - set_pmd(pmdp, __pmd(0)); -} -#endif /* CONFIG_X86_PAE */ - __visible pmd_t xen_make_pmd(pmdval_t pmd) { pmd = pte_pfn_to_mfn(pmd); @@ -466,7 +432,6 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); -#ifdef CONFIG_X86_64 __visible pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); @@ -571,27 +536,27 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); #endif /* CONFIG_PGTABLE_LEVELS >= 5 */ -#endif /* CONFIG_X86_64 */ -static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int i, nr, flush = 0; + int i, nr; nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD; for (i = 0; i < nr; i++) { if (!pmd_none(pmd[i])) - flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE); + (*func)(mm, pmd_page(pmd[i]), PT_PTE); } - return flush; } -static int xen_pud_walk(struct mm_struct *mm, pud_t *pud, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_pud_walk(struct mm_struct *mm, pud_t *pud, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int i, nr, flush = 0; + int i, nr; nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD; for (i = 0; i < nr; i++) { @@ -602,29 +567,26 @@ static int xen_pud_walk(struct mm_struct *mm, pud_t *pud, pmd = pmd_offset(&pud[i], 0); if (PTRS_PER_PMD > 1) - flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); - flush |= xen_pmd_walk(mm, pmd, func, - last && i == nr - 1, limit); + (*func)(mm, virt_to_page(pmd), PT_PMD); + xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit); } - return flush; } -static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, - int (*func)(struct mm_struct *mm, struct page *, enum pt_level), - bool last, unsigned long limit) +static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + bool last, unsigned long limit) { - int flush = 0; pud_t *pud; if (p4d_none(*p4d)) - return flush; + return; pud = pud_offset(p4d, 0); if (PTRS_PER_PUD > 1) - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); - flush |= xen_pud_walk(mm, pud, func, last, limit); - return flush; + (*func)(mm, virt_to_page(pud), PT_PUD); + xen_pud_walk(mm, pud, func, last, limit); } /* @@ -636,32 +598,27 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, * will be STACK_TOP_MAX, but at boot we need to pin up to * FIXADDR_TOP. * - * For 32-bit the important bit is that we don't pin beyond there, - * because then we start getting into Xen's ptes. - * - * For 64-bit, we must skip the Xen hole in the middle of the address - * space, just after the big x86-64 virtual hole. + * We must skip the Xen hole in the middle of the address space, just after + * the big x86-64 virtual hole. */ -static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - int i, nr, flush = 0; + int i, nr; unsigned hole_low = 0, hole_high = 0; /* The limit is the last byte to be touched */ limit--; BUG_ON(limit >= FIXADDR_TOP); -#ifdef CONFIG_X86_64 /* * 64-bit has a great big hole in the middle of the address * space, which contains the Xen mappings. */ hole_low = pgd_index(GUARD_HOLE_BASE_ADDR); hole_high = pgd_index(GUARD_HOLE_END_ADDR); -#endif nr = pgd_index(limit) + 1; for (i = 0; i < nr; i++) { @@ -674,22 +631,20 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, continue; p4d = p4d_offset(&pgd[i], 0); - flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); + xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); } /* Do the top level last, so that the callbacks can use it as a cue to do final things like tlb flushes. */ - flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); - - return flush; + (*func)(mm, virt_to_page(pgd), PT_PGD); } -static int xen_pgd_walk(struct mm_struct *mm, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static void xen_pgd_walk(struct mm_struct *mm, + void (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - return __xen_pgd_walk(mm, mm->pgd, func, limit); + __xen_pgd_walk(mm, mm->pgd, func, limit); } /* If we're using split pte locks, then take the page's lock and @@ -722,26 +677,17 @@ static void xen_do_pin(unsigned level, unsigned long pfn) xen_extend_mmuext_op(&op); } -static int xen_pin_page(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void xen_pin_page(struct mm_struct *mm, struct page *page, + enum pt_level level) { unsigned pgfl = TestSetPagePinned(page); - int flush; - - if (pgfl) - flush = 0; /* already pinned */ - else if (PageHighMem(page)) - /* kmaps need flushing if we found an unpinned - highpage */ - flush = 1; - else { + + if (!pgfl) { void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); struct multicall_space mcs = __xen_mc_entry(0); spinlock_t *ptl; - flush = 0; - /* * We need to hold the pagetable lock between the time * we make the pagetable RO and when we actually pin @@ -778,8 +724,6 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, xen_mc_callback(xen_pte_unlock, ptl); } } - - return flush; } /* This is called just after a mm has been created, but it has not @@ -787,39 +731,22 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, read-only, and can be pinned. */ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + trace_xen_mmu_pgd_pin(mm, pgd); xen_mc_batch(); - if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { - /* re-enable interrupts for flushing */ - xen_mc_issue(0); + __xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT); - kmap_flush_unused(); + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); - xen_mc_batch(); + if (user_pgd) { + xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); + xen_do_pin(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa(user_pgd))); } -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); - - if (user_pgd) { - xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); - xen_do_pin(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa(user_pgd))); - } - } -#else /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PAE - /* Need to make sure unshared kernel PMD is pinnable */ - xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), - PT_PMD); -#endif - xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); -#endif /* CONFIG_X86_64 */ xen_mc_issue(0); } @@ -854,11 +781,10 @@ void xen_mm_pin_all(void) spin_unlock(&pgd_lock); } -static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page, + enum pt_level level) { SetPagePinned(page); - return 0; } /* @@ -870,18 +796,16 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, static void __init xen_after_bootmem(void) { static_branch_enable(&xen_struct_pages_ready); -#ifdef CONFIG_X86_64 SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } -static int xen_unpin_page(struct mm_struct *mm, struct page *page, - enum pt_level level) +static void xen_unpin_page(struct mm_struct *mm, struct page *page, + enum pt_level level) { unsigned pgfl = TestClearPagePinned(page); - if (pgfl && !PageHighMem(page)) { + if (pgfl) { void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); spinlock_t *ptl = NULL; @@ -912,36 +836,24 @@ static int xen_unpin_page(struct mm_struct *mm, struct page *page, xen_mc_callback(xen_pte_unlock, ptl); } } - - return 0; /* never need to flush on unpin */ } /* Release a pagetables pages back as normal RW */ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + trace_xen_mmu_pgd_unpin(mm, pgd); xen_mc_batch(); xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) { - xen_do_pin(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(user_pgd))); - xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); - } + if (user_pgd) { + xen_do_pin(MMUEXT_UNPIN_TABLE, + PFN_DOWN(__pa(user_pgd))); + xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); } -#endif - -#ifdef CONFIG_X86_PAE - /* Need to make sure unshared kernel PMD is unpinned */ - xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), - PT_PMD); -#endif __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); @@ -1089,7 +1001,6 @@ static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) BUG(); } -#ifdef CONFIG_X86_64 static void __init xen_cleanhighmap(unsigned long vaddr, unsigned long vaddr_end) { @@ -1273,17 +1184,15 @@ static void __init xen_pagetable_cleanhighmap(void) xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2)); xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); } -#endif static void __init xen_pagetable_p2m_setup(void) { xen_vmalloc_p2m_tree(); -#ifdef CONFIG_X86_64 xen_pagetable_p2m_free(); xen_pagetable_cleanhighmap(); -#endif + /* And revector! Bye bye old array */ xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; } @@ -1420,6 +1329,8 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) } static void xen_write_cr3(unsigned long cr3) { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + BUG_ON(preemptible()); xen_mc_batch(); /* disables interrupts */ @@ -1430,20 +1341,14 @@ static void xen_write_cr3(unsigned long cr3) __xen_write_cr3(true, cr3); -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } -#ifdef CONFIG_X86_64 /* * At the start of the day - when Xen launches a guest, it has already * built pagetables for the guest. We diligently look over them @@ -1478,49 +1383,39 @@ static void __init xen_write_cr3_init(unsigned long cr3) xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } -#endif static int xen_pgd_alloc(struct mm_struct *mm) { pgd_t *pgd = mm->pgd; - int ret = 0; + struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; + int ret = -ENOMEM; BUG_ON(PagePinned(virt_to_page(pgd))); + BUG_ON(page->private != 0); -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; - - if (user_pgd != NULL) { + if (user_pgd != NULL) { #ifdef CONFIG_X86_VSYSCALL_EMULATION - user_pgd[pgd_index(VSYSCALL_ADDR)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + user_pgd[pgd_index(VSYSCALL_ADDR)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); #endif - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + ret = 0; } -#endif + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + return ret; } static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) { -#ifdef CONFIG_X86_64 pgd_t *user_pgd = xen_get_user_pgd(pgd); if (user_pgd) free_page((unsigned long)user_pgd); -#endif } /* @@ -1539,7 +1434,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) */ __visible pte_t xen_make_pte_init(pteval_t pte) { -#ifdef CONFIG_X86_64 unsigned long pfn; /* @@ -1553,7 +1447,7 @@ __visible pte_t xen_make_pte_init(pteval_t pte) pfn >= xen_start_info->first_p2m_pfn && pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) pte &= ~_PAGE_RW; -#endif + pte = pte_pfn_to_mfn(pte); return native_make_pte(pte); } @@ -1561,13 +1455,6 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) { -#ifdef CONFIG_X86_32 - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_mfn(pte) != INVALID_P2M_ENTRY - && pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); -#endif __xen_set_pte(ptep, pte); } @@ -1642,20 +1529,14 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (static_branch_likely(&xen_struct_pages_ready)) SetPagePinned(page); - if (!PageHighMem(page)) { - xen_mc_batch(); + xen_mc_batch(); - __set_pfn_prot(pfn, PAGE_KERNEL_RO); + __set_pfn_prot(pfn, PAGE_KERNEL_RO); - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) - __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) + __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - xen_mc_issue(PARAVIRT_LAZY_MMU); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } + xen_mc_issue(PARAVIRT_LAZY_MMU); } } @@ -1678,16 +1559,15 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) trace_xen_mmu_release_ptpage(pfn, level, pinned); if (pinned) { - if (!PageHighMem(page)) { - xen_mc_batch(); + xen_mc_batch(); - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) - __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) + __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - __set_pfn_prot(pfn, PAGE_KERNEL); + __set_pfn_prot(pfn, PAGE_KERNEL); + + xen_mc_issue(PARAVIRT_LAZY_MMU); - xen_mc_issue(PARAVIRT_LAZY_MMU); - } ClearPagePinned(page); } } @@ -1702,7 +1582,6 @@ static void xen_release_pmd(unsigned long pfn) xen_release_ptpage(pfn, PT_PMD); } -#ifdef CONFIG_X86_64 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PUD); @@ -1712,20 +1591,6 @@ static void xen_release_pud(unsigned long pfn) { xen_release_ptpage(pfn, PT_PUD); } -#endif - -void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} /* * Like __va(), but returns address in the kernel mapping (which is @@ -1733,11 +1598,7 @@ void __init xen_reserve_top(void) */ static void * __init __ka(phys_addr_t paddr) { -#ifdef CONFIG_X86_64 return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif } /* Convert a machine address to physical address */ @@ -1771,56 +1632,7 @@ static void __init set_page_prot(void *addr, pgprot_t prot) { return set_page_prot_flags(addr, prot, UVMF_NONE); } -#ifdef CONFIG_X86_32 -static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES, - PAGE_SIZE); - - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == LEVEL1_IDENT_ENTRIES) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} -#endif void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; @@ -1831,13 +1643,8 @@ void __init xen_setup_machphys_mapping(void) } else { machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; } -#ifdef CONFIG_X86_32 - WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1)) - < machine_to_phys_mapping); -#endif } -#ifdef CONFIG_X86_64 static void __init convert_pfn_mfn(void *v) { pte_t *pte = v; @@ -2168,105 +1975,6 @@ void __init xen_relocate_p2m(void) xen_start_info->nr_p2m_frames = n_frames; } -#else /* !CONFIG_X86_64 */ -static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); -static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); -RESERVE_BRK(fixup_kernel_pmd, PAGE_SIZE); -RESERVE_BRK(fixup_kernel_pte, PAGE_SIZE); - -static void __init xen_write_cr3_init(unsigned long cr3) -{ - unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); - - BUG_ON(read_cr3_pa() != __pa(initial_page_table)); - BUG_ON(cr3 != __pa(swapper_pg_dir)); - - /* - * We are switching to swapper_pg_dir for the first time (from - * initial_page_table) and therefore need to mark that page - * read-only and then pin it. - * - * Xen disallows sharing of kernel PMDs for PAE - * guests. Therefore we must copy the kernel PMD from - * initial_page_table into a new kernel PMD to be used in - * swapper_pg_dir. - */ - swapper_kernel_pmd = - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - copy_page(swapper_kernel_pmd, initial_kernel_pmd); - swapper_pg_dir[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); - set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); - - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - xen_write_cr3(cr3); - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(initial_page_table))); - set_page_prot(initial_page_table, PAGE_KERNEL); - set_page_prot(initial_kernel_pmd, PAGE_KERNEL); - - pv_ops.mmu.write_cr3 = &xen_write_cr3; -} - -/* - * For 32 bit domains xen_start_info->pt_base is the pgd address which might be - * not the first page table in the page table pool. - * Iterate through the initial page tables to find the real page table base. - */ -static phys_addr_t __init xen_find_pt_base(pmd_t *pmd) -{ - phys_addr_t pt_base, paddr; - unsigned pmdidx; - - pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd)); - - for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) - if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) { - paddr = m2p(pmd[pmdidx].pmd); - pt_base = min(pt_base, paddr); - } - - return pt_base; -} - -void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - - xen_pt_base = xen_find_pt_base(kernel_pmd); - xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE; - - initial_kernel_pmd = - extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - - max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024); - - copy_page(initial_kernel_pmd, kernel_pmd); - - xen_map_identity_early(initial_kernel_pmd, max_pfn); - - copy_page(initial_page_table, pgd); - initial_page_table[KERNEL_PGD_BOUNDARY] = - __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); - - set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); - set_page_prot(initial_page_table, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, - PFN_DOWN(__pa(initial_page_table))); - xen_write_cr3(__pa(initial_page_table)); - - memblock_reserve(xen_pt_base, xen_pt_size); -} -#endif /* CONFIG_X86_64 */ - void __init xen_reserve_special_pages(void) { phys_addr_t paddr; @@ -2300,12 +2008,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#elif defined(CONFIG_X86_VSYSCALL_EMULATION) +#ifdef CONFIG_X86_VSYSCALL_EMULATION case VSYSCALL_PAGE: #endif /* All local page mappings */ @@ -2357,9 +2060,7 @@ static void __init xen_post_allocator_init(void) pv_ops.mmu.set_pte = xen_set_pte; pv_ops.mmu.set_pmd = xen_set_pmd; pv_ops.mmu.set_pud = xen_set_pud; -#ifdef CONFIG_X86_64 pv_ops.mmu.set_p4d = xen_set_p4d; -#endif /* This will work as long as patching hasn't happened yet (which it hasn't) */ @@ -2367,15 +2068,11 @@ static void __init xen_post_allocator_init(void) pv_ops.mmu.alloc_pmd = xen_alloc_pmd; pv_ops.mmu.release_pte = xen_release_pte; pv_ops.mmu.release_pmd = xen_release_pmd; -#ifdef CONFIG_X86_64 pv_ops.mmu.alloc_pud = xen_alloc_pud; pv_ops.mmu.release_pud = xen_release_pud; -#endif pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte); -#ifdef CONFIG_X86_64 pv_ops.mmu.write_cr3 = &xen_write_cr3; -#endif } static void xen_leave_lazy_mmu(void) @@ -2420,17 +2117,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ .set_pud = xen_set_pud_hyper, .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), -#ifdef CONFIG_X86_64 .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_p4d = xen_set_p4d_hyper, @@ -2442,7 +2133,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), #endif -#endif /* CONFIG_X86_64 */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 0acba2c712ab..be4151f42611 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -379,12 +379,8 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m) if (type == P2M_TYPE_PFN || i < chunk) { /* Use initial p2m page contents. */ -#ifdef CONFIG_X86_64 mfns = alloc_p2m_page(); copy_page(mfns, xen_p2m_addr + pfn); -#else - mfns = xen_p2m_addr + pfn; -#endif ptep = populate_extra_pte((unsigned long)(p2m + pfn)); set_pte(ptep, pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); @@ -467,7 +463,7 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine); * Allocate new pmd(s). It is checked whether the old pmd is still in place. * If not, nothing is changed. This is okay as the only reason for allocating * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual - * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! + * pmd. */ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) { diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3566e37241d7..7eab14d56369 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -32,7 +32,6 @@ #include <xen/features.h> #include <xen/hvc-console.h> #include "xen-ops.h" -#include "vdso.h" #include "mmu.h" #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) @@ -545,13 +544,10 @@ static unsigned long __init xen_get_pages_limit(void) { unsigned long limit; -#ifdef CONFIG_X86_32 - limit = GB(64) / PAGE_SIZE; -#else limit = MAXMEM / PAGE_SIZE; if (!xen_initial_domain() && xen_512gb_limit) limit = GB(512) / PAGE_SIZE; -#endif + return limit; } @@ -722,17 +718,8 @@ static void __init xen_reserve_xen_mfnlist(void) if (!xen_is_e820_reserved(start, size)) return; -#ifdef CONFIG_X86_32 - /* - * Relocating the p2m on 32 bit system to an arbitrary virtual address - * is not supported, so just give up. - */ - xen_raw_console_write("Xen hypervisor allocated p2m list conflicts with E820 map\n"); - BUG(); -#else xen_relocate_p2m(); memblock_free(start, size); -#endif } /** @@ -921,20 +908,6 @@ char * __init xen_memory_setup(void) return "Xen"; } -/* - * Set the bit indicating "nosegneg" library variants should be used. - * We only need to bother in pure 32-bit mode; compat 32-bit processes - * can have un-truncated segments, so wrapping around is allowed. - */ -static void __init fiddle_vdso(void) -{ -#ifdef CONFIG_X86_32 - u32 *mask = vdso_image_32.data + - vdso_image_32.sym_VDSO32_NOTE_MASK; - *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; -#endif -} - static int register_callback(unsigned type, const void *func) { struct callback_register callback = { @@ -951,11 +924,7 @@ void xen_enable_sysenter(void) int ret; unsigned sysenter_feature; -#ifdef CONFIG_X86_32 - sysenter_feature = X86_FEATURE_SEP; -#else sysenter_feature = X86_FEATURE_SYSENTER32; -#endif if (!boot_cpu_has(sysenter_feature)) return; @@ -967,7 +936,6 @@ void xen_enable_sysenter(void) void xen_enable_syscall(void) { -#ifdef CONFIG_X86_64 int ret; ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); @@ -983,7 +951,6 @@ void xen_enable_syscall(void) if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } -#endif /* CONFIG_X86_64 */ } static void __init xen_pvmmu_arch_setup(void) @@ -1024,7 +991,6 @@ void __init xen_arch_setup(void) disable_cpuidle(); disable_cpufreq(); WARN_ON(xen_set_default_idle()); - fiddle_vdso(); #ifdef CONFIG_NUMA numa_off = 1; #endif diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 47c8f4b444c9..c2ac319f11a4 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -211,15 +211,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) * sure the old memory can be recycled. */ make_lowmem_page_readwrite(xen_initial_gdt); -#ifdef CONFIG_X86_32 - /* - * Xen starts us with XEN_FLAT_RING1_DS, but linux code - * expects __USER_DS - */ - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); -#endif - xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); @@ -300,10 +291,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_rw(cpu); -#ifdef CONFIG_X86_32 - ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = __KERNEL_STACK_CANARY; -#endif memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); /* @@ -341,12 +328,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = task_top_of_stack(idle); -#ifdef CONFIG_X86_32 - ctxt->event_callback_cs = __KERNEL_CS; - ctxt->failsafe_callback_cs = __KERNEL_CS; -#else ctxt->gs_base_kernel = per_cpu_offset(cpu); -#endif ctxt->event_callback_eip = (unsigned long)xen_asm_exc_xen_hypervisor_callback; ctxt->failsafe_callback_eip = diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h deleted file mode 100644 index 873c54c488fe..000000000000 --- a/arch/x86/xen/vdso.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* Bit used for the pseudo-hwcap for non-negative segments. We use - bit 1 to avoid bugs in some versions of glibc when bit 0 is - used; the choice is otherwise arbitrary. */ -#define VDSO_NOTE_NONEGSEG_BIT 1 diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 508fe204520b..1cb0e84b9161 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -6,12 +6,18 @@ * operations here; the indirect forms are better handled in C. */ +#include <asm/errno.h> #include <asm/asm-offsets.h> #include <asm/percpu.h> #include <asm/processor-flags.h> -#include <asm/frame.h> +#include <asm/segment.h> +#include <asm/thread_info.h> #include <asm/asm.h> +#include <asm/frame.h> +#include <xen/interface/xen.h> + +#include <linux/init.h> #include <linux/linkage.h> /* @@ -76,11 +82,7 @@ SYM_FUNC_END(xen_save_fl_direct) */ SYM_FUNC_START(xen_restore_fl_direct) FRAME_BEGIN -#ifdef CONFIG_X86_64 testw $X86_EFLAGS_IF, %di -#else - testb $X86_EFLAGS_IF>>8, %ah -#endif setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* * Preempt here doesn't matter because that will deal with any @@ -104,15 +106,6 @@ SYM_FUNC_END(xen_restore_fl_direct) */ SYM_FUNC_START(check_events) FRAME_BEGIN -#ifdef CONFIG_X86_32 - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax -#else push %rax push %rcx push %rdx @@ -132,7 +125,6 @@ SYM_FUNC_START(check_events) pop %rdx pop %rcx pop %rax -#endif FRAME_END ret SYM_FUNC_END(check_events) @@ -151,3 +143,175 @@ SYM_FUNC_START(xen_read_cr2_direct) FRAME_END ret SYM_FUNC_END(xen_read_cr2_direct); + +.macro xen_pv_trap name +SYM_CODE_START(xen_\name) + pop %rcx + pop %r11 + jmp \name +SYM_CODE_END(xen_\name) +_ASM_NOKPROBE(xen_\name) +.endm + +xen_pv_trap asm_exc_divide_error +xen_pv_trap asm_xenpv_exc_debug +xen_pv_trap asm_exc_int3 +xen_pv_trap asm_xenpv_exc_nmi +xen_pv_trap asm_exc_overflow +xen_pv_trap asm_exc_bounds +xen_pv_trap asm_exc_invalid_op +xen_pv_trap asm_exc_device_not_available +xen_pv_trap asm_exc_double_fault +xen_pv_trap asm_exc_coproc_segment_overrun +xen_pv_trap asm_exc_invalid_tss +xen_pv_trap asm_exc_segment_not_present +xen_pv_trap asm_exc_stack_segment +xen_pv_trap asm_exc_general_protection +xen_pv_trap asm_exc_page_fault +xen_pv_trap asm_exc_spurious_interrupt_bug +xen_pv_trap asm_exc_coprocessor_error +xen_pv_trap asm_exc_alignment_check +#ifdef CONFIG_X86_MCE +xen_pv_trap asm_exc_machine_check +#endif /* CONFIG_X86_MCE */ +xen_pv_trap asm_exc_simd_coprocessor_error +#ifdef CONFIG_IA32_EMULATION +xen_pv_trap entry_INT80_compat +#endif +xen_pv_trap asm_exc_xen_hypervisor_callback + + __INIT +SYM_CODE_START(xen_early_idt_handler_array) + i = 0 + .rept NUM_EXCEPTION_VECTORS + pop %rcx + pop %r11 + jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE + i = i + 1 + .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc + .endr +SYM_CODE_END(xen_early_idt_handler_array) + __FINIT + +hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 +/* + * Xen64 iret frame: + * + * ss + * rsp + * rflags + * cs + * rip <-- standard iret frame + * + * flags + * + * rcx } + * r11 }<-- pushed by hypercall page + * rsp->rax } + */ +SYM_CODE_START(xen_iret) + pushq $0 + jmp hypercall_iret +SYM_CODE_END(xen_iret) + +SYM_CODE_START(xen_sysret64) + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back. + * + * tss.sp2 is scratch space. + */ + movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + pushq $__USER_DS + pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) + pushq %r11 + pushq $__USER_CS + pushq %rcx + + pushq $VGCF_in_syscall + jmp hypercall_iret +SYM_CODE_END(xen_sysret64) + +/* + * Xen handles syscall callbacks much like ordinary exceptions, which + * means we have: + * - kernel gs + * - kernel rsp + * - an iret-like stack frame on the stack (including rcx and r11): + * ss + * rsp + * rflags + * cs + * rip + * r11 + * rsp->rcx + */ + +/* Normal 64-bit system call target */ +SYM_FUNC_START(xen_syscall_target) + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER_DS and __USER_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER_DS, 4*8(%rsp) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe +SYM_FUNC_END(xen_syscall_target) + +#ifdef CONFIG_IA32_EMULATION + +/* 32-bit compat syscall target */ +SYM_FUNC_START(xen_syscall32_target) + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER32_DS and __USER32_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER32_DS, 4*8(%rsp) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe +SYM_FUNC_END(xen_syscall32_target) + +/* 32-bit compat sysenter target */ +SYM_FUNC_START(xen_sysenter_target) + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means + * that we don't need to guard against single step exceptions here. + */ + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER32_DS and __USER32_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER32_DS, 4*8(%rsp) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe +SYM_FUNC_END(xen_sysenter_target) + +#else /* !CONFIG_IA32_EMULATION */ + +SYM_FUNC_START_ALIAS(xen_syscall32_target) +SYM_FUNC_START(xen_sysenter_target) + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 + jmp hypercall_iret +SYM_FUNC_END(xen_sysenter_target) +SYM_FUNC_END_ALIAS(xen_syscall32_target) + +#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S deleted file mode 100644 index 4757cec33abe..000000000000 --- a/arch/x86/xen/xen-asm_32.S +++ /dev/null @@ -1,185 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Asm versions of Xen pv-ops, suitable for direct use. - * - * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C. - */ - -#include <asm/thread_info.h> -#include <asm/processor-flags.h> -#include <asm/segment.h> -#include <asm/asm.h> - -#include <xen/interface/xen.h> - -#include <linux/linkage.h> - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -/* - * This is run where a normal iret would be run, with the same stack setup: - * 8: eflags - * 4: cs - * esp-> 0: eip - * - * This attempts to make sure that any pending events are dealt with - * on return to usermode, but there is a small window in which an - * event can happen just before entering usermode. If the nested - * interrupt ends up setting one of the TIF_WORK_MASK pending work - * flags, they will not be tested again before returning to - * usermode. This means that a process can end up with pending work, - * which will be unprocessed until the process enters and leaves the - * kernel again, which could be an unbounded amount of time. This - * means that a pending signal or reschedule event could be - * indefinitely delayed. - * - * The fix is to notice a nested interrupt in the critical window, and - * if one occurs, then fold the nested interrupt into the current - * interrupt stack frame, and re-process it iteratively rather than - * recursively. This means that it will exit via the normal path, and - * all pending work will be dealt with appropriately. - * - * Because the nested interrupt handler needs to deal with the current - * stack state in whatever form its in, we keep things simple by only - * using a single register which is pushed/popped on the stack. - */ - -.macro POP_FS -1: - popw %fs -.pushsection .fixup, "ax" -2: movw $0, (%esp) - jmp 1b -.popsection - _ASM_EXTABLE(1b,2b) -.endm - -SYM_CODE_START(xen_iret) - /* test eflags for special cases */ - testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) - jnz hyper_iret - - push %eax - ESP_OFFSET=4 # bytes pushed onto stack - - /* Store vcpu_info pointer for easy access */ -#ifdef CONFIG_SMP - pushw %fs - movl $(__KERNEL_PERCPU), %eax - movl %eax, %fs - movl %fs:xen_vcpu, %eax - POP_FS -#else - movl %ss:xen_vcpu, %eax -#endif - - /* check IF state we're restoring */ - testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - - /* - * Maybe enable events. Once this happens we could get a - * recursive event, so the critical region starts immediately - * afterwards. However, if that happens we don't end up - * resuming the code, so we don't have to be worried about - * being preempted to another CPU. - */ - setz %ss:XEN_vcpu_info_mask(%eax) -xen_iret_start_crit: - - /* check for unmasked and pending */ - cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) - - /* - * If there's something pending, mask events again so we can - * jump back into exc_xen_hypervisor_callback. Otherwise do not - * touch XEN_vcpu_info_mask. - */ - jne 1f - movb $1, %ss:XEN_vcpu_info_mask(%eax) - -1: popl %eax - - /* - * From this point on the registers are restored and the stack - * updated, so we don't need to worry about it if we're - * preempted - */ -iret_restore_end: - - /* - * Jump to hypervisor_callback after fixing up the stack. - * Events are masked, so jumping out of the critical region is - * OK. - */ - je xen_asm_exc_xen_hypervisor_callback - -1: iret -xen_iret_end_crit: - _ASM_EXTABLE(1b, asm_iret_error) - -hyper_iret: - /* put this out of line since its very rarely used */ - jmp hypercall_page + __HYPERVISOR_iret * 32 -SYM_CODE_END(xen_iret) - - .globl xen_iret_start_crit, xen_iret_end_crit - -/* - * This is called by xen_asm_exc_xen_hypervisor_callback in entry_32.S when it sees - * that the EIP at the time of interrupt was between - * xen_iret_start_crit and xen_iret_end_crit. - * - * The stack format at this point is: - * ---------------- - * ss : (ss/esp may be present if we came from usermode) - * esp : - * eflags } outer exception info - * cs } - * eip } - * ---------------- - * eax : outer eax if it hasn't been restored - * ---------------- - * eflags } - * cs } nested exception info - * eip } - * return address : (into xen_asm_exc_xen_hypervisor_callback) - * - * In order to deliver the nested exception properly, we need to discard the - * nested exception frame such that when we handle the exception, we do it - * in the context of the outer exception rather than starting a new one. - * - * The only caveat is that if the outer eax hasn't been restored yet (i.e. - * it's still on stack), we need to restore its value here. -*/ -.pushsection .noinstr.text, "ax" -SYM_CODE_START(xen_iret_crit_fixup) - /* - * Paranoia: Make sure we're really coming from kernel space. - * One could imagine a case where userspace jumps into the - * critical range address, but just before the CPU delivers a - * PF, it decides to deliver an interrupt instead. Unlikely? - * Definitely. Easy to avoid? Yes. - */ - testb $2, 2*4(%esp) /* nested CS */ - jnz 2f - - /* - * If eip is before iret_restore_end then stack - * hasn't been restored yet. - */ - cmpl $iret_restore_end, 1*4(%esp) - jae 1f - - movl 4*4(%esp), %eax /* load outer EAX */ - ret $4*4 /* discard nested EIP, CS, and EFLAGS as - * well as the just restored EAX */ - -1: - ret $3*4 /* discard nested EIP, CS, and EFLAGS */ - -2: - ret -SYM_CODE_END(xen_iret_crit_fixup) -.popsection diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S deleted file mode 100644 index aab1d99b2b48..000000000000 --- a/arch/x86/xen/xen-asm_64.S +++ /dev/null @@ -1,192 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Asm versions of Xen pv-ops, suitable for direct use. - * - * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C. - */ - -#include <asm/errno.h> -#include <asm/percpu.h> -#include <asm/processor-flags.h> -#include <asm/segment.h> -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> -#include <asm/asm.h> - -#include <xen/interface/xen.h> - -#include <linux/init.h> -#include <linux/linkage.h> - -.macro xen_pv_trap name -SYM_CODE_START(xen_\name) - pop %rcx - pop %r11 - jmp \name -SYM_CODE_END(xen_\name) -_ASM_NOKPROBE(xen_\name) -.endm - -xen_pv_trap asm_exc_divide_error -xen_pv_trap asm_xenpv_exc_debug -xen_pv_trap asm_exc_int3 -xen_pv_trap asm_xenpv_exc_nmi -xen_pv_trap asm_exc_overflow -xen_pv_trap asm_exc_bounds -xen_pv_trap asm_exc_invalid_op -xen_pv_trap asm_exc_device_not_available -xen_pv_trap asm_exc_double_fault -xen_pv_trap asm_exc_coproc_segment_overrun -xen_pv_trap asm_exc_invalid_tss -xen_pv_trap asm_exc_segment_not_present -xen_pv_trap asm_exc_stack_segment -xen_pv_trap asm_exc_general_protection -xen_pv_trap asm_exc_page_fault -xen_pv_trap asm_exc_spurious_interrupt_bug -xen_pv_trap asm_exc_coprocessor_error -xen_pv_trap asm_exc_alignment_check -#ifdef CONFIG_X86_MCE -xen_pv_trap asm_exc_machine_check -#endif /* CONFIG_X86_MCE */ -xen_pv_trap asm_exc_simd_coprocessor_error -#ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat -#endif -xen_pv_trap asm_exc_xen_hypervisor_callback - - __INIT -SYM_CODE_START(xen_early_idt_handler_array) - i = 0 - .rept NUM_EXCEPTION_VECTORS - pop %rcx - pop %r11 - jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - i = i + 1 - .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc - .endr -SYM_CODE_END(xen_early_idt_handler_array) - __FINIT - -hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 -/* - * Xen64 iret frame: - * - * ss - * rsp - * rflags - * cs - * rip <-- standard iret frame - * - * flags - * - * rcx } - * r11 }<-- pushed by hypercall page - * rsp->rax } - */ -SYM_CODE_START(xen_iret) - pushq $0 - jmp hypercall_iret -SYM_CODE_END(xen_iret) - -SYM_CODE_START(xen_sysret64) - /* - * We're already on the usermode stack at this point, but - * still with the kernel gs, so we can easily switch back. - * - * tss.sp2 is scratch space. - */ - movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - - pushq $__USER_DS - pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) - pushq %r11 - pushq $__USER_CS - pushq %rcx - - pushq $VGCF_in_syscall - jmp hypercall_iret -SYM_CODE_END(xen_sysret64) - -/* - * Xen handles syscall callbacks much like ordinary exceptions, which - * means we have: - * - kernel gs - * - kernel rsp - * - an iret-like stack frame on the stack (including rcx and r11): - * ss - * rsp - * rflags - * cs - * rip - * r11 - * rsp->rcx - */ - -/* Normal 64-bit system call target */ -SYM_FUNC_START(xen_syscall_target) - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER_DS and __USER_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER_DS, 4*8(%rsp) - movq $__USER_CS, 1*8(%rsp) - - jmp entry_SYSCALL_64_after_hwframe -SYM_FUNC_END(xen_syscall_target) - -#ifdef CONFIG_IA32_EMULATION - -/* 32-bit compat syscall target */ -SYM_FUNC_START(xen_syscall32_target) - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER32_DS and __USER32_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER32_DS, 4*8(%rsp) - movq $__USER32_CS, 1*8(%rsp) - - jmp entry_SYSCALL_compat_after_hwframe -SYM_FUNC_END(xen_syscall32_target) - -/* 32-bit compat sysenter target */ -SYM_FUNC_START(xen_sysenter_target) - /* - * NB: Xen is polite and clears TF from EFLAGS for us. This means - * that we don't need to guard against single step exceptions here. - */ - popq %rcx - popq %r11 - - /* - * Neither Xen nor the kernel really knows what the old SS and - * CS were. The kernel expects __USER32_DS and __USER32_CS, so - * report those values even though Xen will guess its own values. - */ - movq $__USER32_DS, 4*8(%rsp) - movq $__USER32_CS, 1*8(%rsp) - - jmp entry_SYSENTER_compat_after_hwframe -SYM_FUNC_END(xen_sysenter_target) - -#else /* !CONFIG_IA32_EMULATION */ - -SYM_FUNC_START_ALIAS(xen_syscall32_target) -SYM_FUNC_START(xen_sysenter_target) - lea 16(%rsp), %rsp /* strip %rcx, %r11 */ - mov $-ENOSYS, %rax - pushq $0 - jmp hypercall_iret -SYM_FUNC_END(xen_sysenter_target) -SYM_FUNC_END_ALIAS(xen_syscall32_target) - -#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 1ba601df3a37..2d7c8f34f56c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -35,13 +35,8 @@ SYM_CODE_START(startup_xen) rep __ASM_SIZE(stos) mov %_ASM_SI, xen_start_info -#ifdef CONFIG_X86_64 mov initial_stack(%rip), %rsp -#else - mov initial_stack, %esp -#endif -#ifdef CONFIG_X86_64 /* Set up %gs. * * The base of %gs always points to fixed_percpu_data. If the @@ -53,7 +48,6 @@ SYM_CODE_START(startup_xen) movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax cdq wrmsr -#endif call xen_start_kernel SYM_CODE_END(startup_xen) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 53b224fd6177..45d556f71858 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -33,7 +33,6 @@ void xen_setup_mfn_list_list(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); -void xen_reserve_top(void); void __init xen_reserve_special_pages(void); void __init xen_pt_check_e820(void); diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index e57f0d0a88d8..b9758119feca 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -35,7 +35,7 @@ #define get_fs() (current->thread.current_ds) #define set_fs(val) (current->thread.current_ds = (val)) -#define segment_eq(a, b) ((a).seg == (b).seg) +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #define __kernel_ok (uaccess_kernel()) #define __user_ok(addr, size) \ diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index c128dcc7c85b..7666408ce12a 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -72,6 +72,9 @@ void do_page_fault(struct pt_regs *regs) if (user_mode(regs)) flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + retry: mmap_read_lock(mm); vma = find_vma(mm, address); @@ -107,7 +110,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags); + fault = handle_mm_fault(vma, address, flags, regs); if (fault_signal_pending(fault, regs)) return; @@ -122,10 +125,6 @@ good_area: BUG(); } if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; if (fault & VM_FAULT_RETRY) { flags |= FAULT_FLAG_TRIED; @@ -139,12 +138,6 @@ good_area: } mmap_read_unlock(mm); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); - if (flags & VM_FAULT_MAJOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - else - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); - return; /* Something tried to access memory that isn't in our memory map.. diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index c17678447937..21efa786f09c 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -554,12 +554,6 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; - ctx->used = 0; - atomic_set(&ctx->rcvused, 0); - ctx->more = 0; - ctx->merge = 0; - ctx->enc = 0; - ctx->aead_assoclen = 0; crypto_init_wait(&ctx->wait); ask->private = ctx; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 5c112b24f678..478f3b8f5bd5 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -329,6 +329,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) return -ENOMEM; + memset(ctx, 0, len); ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(tfm), GFP_KERNEL); @@ -336,16 +337,10 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) sock_kfree_s(sk, ctx, len); return -ENOMEM; } - memset(ctx->iv, 0, crypto_skcipher_ivsize(tfm)); INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; - ctx->used = 0; - atomic_set(&ctx->rcvused, 0); - ctx->more = 0; - ctx->merge = 0; - ctx->enc = 0; crypto_init_wait(&ctx->wait); ask->private = ctx; diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 690a2587e0c5..4026fac9fac3 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -50,7 +50,7 @@ source "drivers/clk/versatile/Kconfig" config CLK_HSDK bool "PLL Driver for HSDK platform" depends on OF || COMPILE_TEST - depends on IOMEM + depends on HAS_IOMEM help This driver supports the HSDK core, system, ddr, tunnel and hdmi PLLs control. diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index ca9af11d3391..da8fcf147eb1 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_COMMON_CLK_CDCE925) += clk-cdce925.o obj-$(CONFIG_ARCH_CLPS711X) += clk-clps711x.o obj-$(CONFIG_COMMON_CLK_CS2000_CP) += clk-cs2000-cp.o obj-$(CONFIG_ARCH_EFM32) += clk-efm32gg.o +obj-$(CONFIG_ARCH_SPARX5) += clk-sparx5.o obj-$(CONFIG_COMMON_CLK_FIXED_MMIO) += clk-fixed-mmio.o obj-$(CONFIG_COMMON_CLK_FSL_SAI) += clk-fsl-sai.o obj-$(CONFIG_COMMON_CLK_GEMINI) += clk-gemini.o diff --git a/drivers/clk/actions/owl-s500.c b/drivers/clk/actions/owl-s500.c index e2007ac4d235..61bb224f6330 100644 --- a/drivers/clk/actions/owl-s500.c +++ b/drivers/clk/actions/owl-s500.c @@ -23,8 +23,10 @@ #include "owl-gate.h" #include "owl-mux.h" #include "owl-pll.h" +#include "owl-reset.h" #include <dt-bindings/clock/actions,s500-cmu.h> +#include <dt-bindings/reset/actions,s500-reset.h> #define CMU_COREPLL (0x0000) #define CMU_DEVPLL (0x0004) @@ -175,6 +177,8 @@ static OWL_MUX(dev_clk, "dev_clk", dev_clk_mux_p, CMU_DEVPLL, 12, 1, CLK_SET_RAT static OWL_MUX(ahbprediv_clk, "ahbprediv_clk", ahbprediv_clk_mux_p, CMU_BUSCLK1, 8, 3, CLK_SET_RATE_PARENT); /* gate clocks */ +static OWL_GATE(gpio_clk, "gpio_clk", "apb_clk", CMU_DEVCLKEN0, 18, 0, 0); +static OWL_GATE(dmac_clk, "dmac_clk", "h_clk", CMU_DEVCLKEN0, 1, 0, 0); static OWL_GATE(spi0_clk, "spi0_clk", "ahb_clk", CMU_DEVCLKEN1, 10, 0, CLK_IGNORE_UNUSED); static OWL_GATE(spi1_clk, "spi1_clk", "ahb_clk", CMU_DEVCLKEN1, 11, 0, CLK_IGNORE_UNUSED); static OWL_GATE(spi2_clk, "spi2_clk", "ahb_clk", CMU_DEVCLKEN1, 12, 0, CLK_IGNORE_UNUSED); @@ -183,7 +187,8 @@ static OWL_GATE(timer_clk, "timer_clk", "hosc", CMU_DEVCLKEN1, 27, 0, 0); static OWL_GATE(hdmi_clk, "hdmi_clk", "hosc", CMU_DEVCLKEN1, 3, 0, 0); /* divider clocks */ -static OWL_DIVIDER(h_clk, "h_clk", "ahbprevdiv_clk", CMU_BUSCLK1, 12, 2, NULL, 0, 0); +static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 12, 2, NULL, 0, 0); +static OWL_DIVIDER(apb_clk, "apb_clk", "ahb_clk", CMU_BUSCLK1, 14, 2, NULL, 0, 0); static OWL_DIVIDER(rmii_ref_clk, "rmii_ref_clk", "ethernet_pll_clk", CMU_ETHERNETPLL, 1, 1, rmii_ref_div_table, 0, 0); /* factor clocks */ @@ -428,6 +433,9 @@ static struct owl_clk_common *s500_clks[] = { &spdif_clk.common, &nand_clk.common, &ecc_clk.common, + &apb_clk.common, + &dmac_clk.common, + &gpio_clk.common, }; static struct clk_hw_onecell_data s500_hw_clks = { @@ -484,24 +492,103 @@ static struct clk_hw_onecell_data s500_hw_clks = { [CLK_SPDIF] = &spdif_clk.common.hw, [CLK_NAND] = &nand_clk.common.hw, [CLK_ECC] = &ecc_clk.common.hw, + [CLK_APB] = &apb_clk.common.hw, + [CLK_DMAC] = &dmac_clk.common.hw, + [CLK_GPIO] = &gpio_clk.common.hw, }, .num = CLK_NR_CLKS, }; +static const struct owl_reset_map s500_resets[] = { + [RESET_DMAC] = { CMU_DEVRST0, BIT(0) }, + [RESET_NORIF] = { CMU_DEVRST0, BIT(1) }, + [RESET_DDR] = { CMU_DEVRST0, BIT(2) }, + [RESET_NANDC] = { CMU_DEVRST0, BIT(3) }, + [RESET_SD0] = { CMU_DEVRST0, BIT(4) }, + [RESET_SD1] = { CMU_DEVRST0, BIT(5) }, + [RESET_PCM1] = { CMU_DEVRST0, BIT(6) }, + [RESET_DE] = { CMU_DEVRST0, BIT(7) }, + [RESET_LCD] = { CMU_DEVRST0, BIT(8) }, + [RESET_SD2] = { CMU_DEVRST0, BIT(9) }, + [RESET_DSI] = { CMU_DEVRST0, BIT(10) }, + [RESET_CSI] = { CMU_DEVRST0, BIT(11) }, + [RESET_BISP] = { CMU_DEVRST0, BIT(12) }, + [RESET_KEY] = { CMU_DEVRST0, BIT(14) }, + [RESET_GPIO] = { CMU_DEVRST0, BIT(15) }, + [RESET_AUDIO] = { CMU_DEVRST0, BIT(17) }, + [RESET_PCM0] = { CMU_DEVRST0, BIT(18) }, + [RESET_VDE] = { CMU_DEVRST0, BIT(19) }, + [RESET_VCE] = { CMU_DEVRST0, BIT(20) }, + [RESET_GPU3D] = { CMU_DEVRST0, BIT(22) }, + [RESET_NIC301] = { CMU_DEVRST0, BIT(23) }, + [RESET_LENS] = { CMU_DEVRST0, BIT(26) }, + [RESET_PERIPHRESET] = { CMU_DEVRST0, BIT(27) }, + [RESET_USB2_0] = { CMU_DEVRST1, BIT(0) }, + [RESET_TVOUT] = { CMU_DEVRST1, BIT(1) }, + [RESET_HDMI] = { CMU_DEVRST1, BIT(2) }, + [RESET_HDCP2TX] = { CMU_DEVRST1, BIT(3) }, + [RESET_UART6] = { CMU_DEVRST1, BIT(4) }, + [RESET_UART0] = { CMU_DEVRST1, BIT(5) }, + [RESET_UART1] = { CMU_DEVRST1, BIT(6) }, + [RESET_UART2] = { CMU_DEVRST1, BIT(7) }, + [RESET_SPI0] = { CMU_DEVRST1, BIT(8) }, + [RESET_SPI1] = { CMU_DEVRST1, BIT(9) }, + [RESET_SPI2] = { CMU_DEVRST1, BIT(10) }, + [RESET_SPI3] = { CMU_DEVRST1, BIT(11) }, + [RESET_I2C0] = { CMU_DEVRST1, BIT(12) }, + [RESET_I2C1] = { CMU_DEVRST1, BIT(13) }, + [RESET_USB3] = { CMU_DEVRST1, BIT(14) }, + [RESET_UART3] = { CMU_DEVRST1, BIT(15) }, + [RESET_UART4] = { CMU_DEVRST1, BIT(16) }, + [RESET_UART5] = { CMU_DEVRST1, BIT(17) }, + [RESET_I2C2] = { CMU_DEVRST1, BIT(18) }, + [RESET_I2C3] = { CMU_DEVRST1, BIT(19) }, + [RESET_ETHERNET] = { CMU_DEVRST1, BIT(20) }, + [RESET_CHIPID] = { CMU_DEVRST1, BIT(21) }, + [RESET_USB2_1] = { CMU_DEVRST1, BIT(22) }, + [RESET_WD0RESET] = { CMU_DEVRST1, BIT(24) }, + [RESET_WD1RESET] = { CMU_DEVRST1, BIT(25) }, + [RESET_WD2RESET] = { CMU_DEVRST1, BIT(26) }, + [RESET_WD3RESET] = { CMU_DEVRST1, BIT(27) }, + [RESET_DBG0RESET] = { CMU_DEVRST1, BIT(28) }, + [RESET_DBG1RESET] = { CMU_DEVRST1, BIT(29) }, + [RESET_DBG2RESET] = { CMU_DEVRST1, BIT(30) }, + [RESET_DBG3RESET] = { CMU_DEVRST1, BIT(31) }, +}; + static struct owl_clk_desc s500_clk_desc = { .clks = s500_clks, .num_clks = ARRAY_SIZE(s500_clks), .hw_clks = &s500_hw_clks, + + .resets = s500_resets, + .num_resets = ARRAY_SIZE(s500_resets), }; static int s500_clk_probe(struct platform_device *pdev) { struct owl_clk_desc *desc; + struct owl_reset *reset; + int ret; desc = &s500_clk_desc; owl_clk_regmap_init(pdev, desc); + reset = devm_kzalloc(&pdev->dev, sizeof(*reset), GFP_KERNEL); + if (!reset) + return -ENOMEM; + + reset->rcdev.of_node = pdev->dev.of_node; + reset->rcdev.ops = &owl_reset_ops; + reset->rcdev.nr_resets = desc->num_resets; + reset->reset_map = desc->resets; + reset->regmap = desc->regmap; + + ret = devm_reset_controller_register(&pdev->dev, &reset->rcdev); + if (ret) + dev_err(&pdev->dev, "Failed to register reset controller\n"); + return owl_clk_probe(&pdev->dev, desc->hw_clks); } diff --git a/drivers/clk/at91/Makefile b/drivers/clk/at91/Makefile index 8b90357f2a93..79301e1c1c36 100644 --- a/drivers/clk/at91/Makefile +++ b/drivers/clk/at91/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_SOC_SAM9X60) += sam9x60.o obj-$(CONFIG_SOC_SAMA5D3) += sama5d3.o obj-$(CONFIG_SOC_SAMA5D4) += sama5d4.o obj-$(CONFIG_SOC_SAMA5D2) += sama5d2.o +obj-$(CONFIG_SOC_SAMA7G5) += sama7g5.o diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index 38bdb4981315..2c3d8e6ca63c 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -160,7 +160,8 @@ static void __init at91rm9200_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 4, i, - &at91rm9200_programmable_layout); + &at91rm9200_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9260.c b/drivers/clk/at91/at91sam9260.c index 6d0723aa8b13..bb81ff731ad8 100644 --- a/drivers/clk/at91/at91sam9260.c +++ b/drivers/clk/at91/at91sam9260.c @@ -436,7 +436,8 @@ static void __init at91sam926x_pmc_setup(struct device_node *np, hw = at91_clk_register_programmable(regmap, name, parent_names, 4, i, - &at91rm9200_programmable_layout); + &at91rm9200_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9g45.c b/drivers/clk/at91/at91sam9g45.c index 9873b583c260..c88ee20bee31 100644 --- a/drivers/clk/at91/at91sam9g45.c +++ b/drivers/clk/at91/at91sam9g45.c @@ -111,7 +111,7 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np) return; mainxtal_name = of_clk_get_parent_name(np, i); - regmap = syscon_node_to_regmap(np); + regmap = device_node_to_regmap(np); if (IS_ERR(regmap)) return; @@ -181,7 +181,8 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9g45_programmable_layout); + &at91sam9g45_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9n12.c b/drivers/clk/at91/at91sam9n12.c index 630dc5d87171..93f7eb216122 100644 --- a/drivers/clk/at91/at91sam9n12.c +++ b/drivers/clk/at91/at91sam9n12.c @@ -124,7 +124,7 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) return; mainxtal_name = of_clk_get_parent_name(np, i); - regmap = syscon_node_to_regmap(np); + regmap = device_node_to_regmap(np); if (IS_ERR(regmap)) return; @@ -199,7 +199,8 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9x5_programmable_layout); + &at91sam9x5_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; @@ -222,7 +223,7 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np) at91sam9n12_periphck[i].n, "masterck", at91sam9n12_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9rl.c b/drivers/clk/at91/at91sam9rl.c index 0d1cc44b056f..a343eb69bb35 100644 --- a/drivers/clk/at91/at91sam9rl.c +++ b/drivers/clk/at91/at91sam9rl.c @@ -137,7 +137,8 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91rm9200_programmable_layout); + &at91rm9200_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/at91sam9x5.c b/drivers/clk/at91/at91sam9x5.c index 0ce3da080287..22b9aad9efb8 100644 --- a/drivers/clk/at91/at91sam9x5.c +++ b/drivers/clk/at91/at91sam9x5.c @@ -226,7 +226,8 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9x5_programmable_layout); + &at91sam9x5_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; @@ -257,7 +258,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, at91sam9x5_periphck[i].n, "masterck", at91sam9x5_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; @@ -270,7 +271,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np, extra_pcks[i].n, "masterck", extra_pcks[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 44a46dcc0518..b4fc8d71daf2 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -18,18 +18,17 @@ #define GENERATED_MAX_DIV 255 -#define GCK_INDEX_DT_AUDIO_PLL 5 - struct clk_generated { struct clk_hw hw; struct regmap *regmap; struct clk_range range; spinlock_t *lock; + u32 *mux_table; u32 id; u32 gckdiv; const struct clk_pcr_layout *layout; u8 parent_id; - bool audio_pll_allowed; + int chg_pid; }; #define to_clk_generated(hw) \ @@ -83,7 +82,7 @@ static int clk_generated_is_enabled(struct clk_hw *hw) regmap_read(gck->regmap, gck->layout->offset, &status); spin_unlock_irqrestore(gck->lock, flags); - return status & AT91_PMC_PCR_GCKEN ? 1 : 0; + return !!(status & AT91_PMC_PCR_GCKEN); } static unsigned long @@ -109,7 +108,7 @@ static void clk_generated_best_diff(struct clk_rate_request *req, tmp_rate = parent_rate / div; tmp_diff = abs(req->rate - tmp_rate); - if (*best_diff < 0 || *best_diff > tmp_diff) { + if (*best_diff < 0 || *best_diff >= tmp_diff) { *best_rate = tmp_rate; *best_diff = tmp_diff; req->best_parent_rate = parent_rate; @@ -129,7 +128,10 @@ static int clk_generated_determine_rate(struct clk_hw *hw, int i; u32 div; - for (i = 0; i < clk_hw_get_num_parents(hw) - 1; i++) { + for (i = 0; i < clk_hw_get_num_parents(hw); i++) { + if (gck->chg_pid == i) + continue; + parent = clk_hw_get_parent_by_index(hw, i); if (!parent) continue; @@ -161,16 +163,17 @@ static int clk_generated_determine_rate(struct clk_hw *hw, * that the only clks able to modify gck rate are those of audio IPs. */ - if (!gck->audio_pll_allowed) + if (gck->chg_pid < 0) goto end; - parent = clk_hw_get_parent_by_index(hw, GCK_INDEX_DT_AUDIO_PLL); + parent = clk_hw_get_parent_by_index(hw, gck->chg_pid); if (!parent) goto end; for (div = 1; div < GENERATED_MAX_DIV + 2; div++) { req_parent.rate = req->rate * div; - __clk_determine_rate(parent, &req_parent); + if (__clk_determine_rate(parent, &req_parent)) + continue; clk_generated_best_diff(req, parent, req_parent.rate, div, &best_diff, &best_rate); @@ -184,8 +187,8 @@ end: __clk_get_name((req->best_parent_hw)->clk), req->best_parent_rate); - if (best_rate < 0) - return best_rate; + if (best_rate < 0 || (gck->range.max && best_rate > gck->range.max)) + return -EINVAL; req->rate = best_rate; return 0; @@ -199,7 +202,11 @@ static int clk_generated_set_parent(struct clk_hw *hw, u8 index) if (index >= clk_hw_get_num_parents(hw)) return -EINVAL; - gck->parent_id = index; + if (gck->mux_table) + gck->parent_id = clk_mux_index_to_val(gck->mux_table, 0, index); + else + gck->parent_id = index; + return 0; } @@ -271,8 +278,9 @@ struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char **parent_names, - u8 num_parents, u8 id, bool pll_audio, - const struct clk_range *range) + u32 *mux_table, u8 num_parents, u8 id, + const struct clk_range *range, + int chg_pid) { struct clk_generated *gck; struct clk_init_data init; @@ -287,16 +295,18 @@ at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, init.ops = &generated_ops; init.parent_names = parent_names; init.num_parents = num_parents; - init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | - CLK_SET_RATE_PARENT; + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE; + if (chg_pid >= 0) + init.flags |= CLK_SET_RATE_PARENT; gck->id = id; gck->hw.init = &init; gck->regmap = regmap; gck->lock = lock; gck->range = *range; - gck->audio_pll_allowed = pll_audio; + gck->chg_pid = chg_pid; gck->layout = layout; + gck->mux_table = mux_table; clk_generated_startup(gck); hw = &gck->hw; diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index 37c22667e831..5c83e899084f 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -175,7 +175,7 @@ static bool clk_main_rc_osc_ready(struct regmap *regmap) regmap_read(regmap, AT91_PMC_SR, &status); - return status & AT91_PMC_MOSCRCS; + return !!(status & AT91_PMC_MOSCRCS); } static int clk_main_rc_osc_prepare(struct clk_hw *hw) @@ -336,7 +336,7 @@ static int clk_rm9200_main_is_prepared(struct clk_hw *hw) regmap_read(clkmain->regmap, AT91_CKGR_MCFR, &status); - return status & AT91_PMC_MAINRDY ? 1 : 0; + return !!(status & AT91_PMC_MAINRDY); } static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw, @@ -398,7 +398,7 @@ static inline bool clk_sam9x5_main_ready(struct regmap *regmap) regmap_read(regmap, AT91_PMC_SR, &status); - return status & AT91_PMC_MOSCSELS ? 1 : 0; + return !!(status & AT91_PMC_MOSCSELS); } static int clk_sam9x5_main_prepare(struct clk_hw *hw) diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index e7e0ba652de1..bd0d8a69a2cf 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -17,30 +17,49 @@ #define MASTER_DIV_SHIFT 8 #define MASTER_DIV_MASK 0x3 +#define PMC_MCR 0x30 +#define PMC_MCR_ID_MSK GENMASK(3, 0) +#define PMC_MCR_CMD BIT(7) +#define PMC_MCR_DIV GENMASK(10, 8) +#define PMC_MCR_CSS GENMASK(20, 16) +#define PMC_MCR_CSS_SHIFT (16) +#define PMC_MCR_EN BIT(28) + +#define PMC_MCR_ID(x) ((x) & PMC_MCR_ID_MSK) + +#define MASTER_MAX_ID 4 + #define to_clk_master(hw) container_of(hw, struct clk_master, hw) struct clk_master { struct clk_hw hw; struct regmap *regmap; + spinlock_t *lock; const struct clk_master_layout *layout; const struct clk_master_characteristics *characteristics; + u32 *mux_table; u32 mckr; + int chg_pid; + u8 id; + u8 parent; + u8 div; }; -static inline bool clk_master_ready(struct regmap *regmap) +static inline bool clk_master_ready(struct clk_master *master) { + unsigned int bit = master->id ? AT91_PMC_MCKXRDY : AT91_PMC_MCKRDY; unsigned int status; - regmap_read(regmap, AT91_PMC_SR, &status); + regmap_read(master->regmap, AT91_PMC_SR, &status); - return status & AT91_PMC_MCKRDY ? 1 : 0; + return !!(status & bit); } static int clk_master_prepare(struct clk_hw *hw) { struct clk_master *master = to_clk_master(hw); - while (!clk_master_ready(master->regmap)) + while (!clk_master_ready(master)) cpu_relax(); return 0; @@ -50,7 +69,7 @@ static int clk_master_is_prepared(struct clk_hw *hw) { struct clk_master *master = to_clk_master(hw); - return clk_master_ready(master->regmap); + return clk_master_ready(master); } static unsigned long clk_master_recalc_rate(struct clk_hw *hw, @@ -143,6 +162,287 @@ at91_clk_register_master(struct regmap *regmap, return hw; } +static unsigned long +clk_sama7g5_master_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_master *master = to_clk_master(hw); + + return DIV_ROUND_CLOSEST_ULL(parent_rate, (1 << master->div)); +} + +static void clk_sama7g5_master_best_diff(struct clk_rate_request *req, + struct clk_hw *parent, + unsigned long parent_rate, + long *best_rate, + long *best_diff, + u32 div) +{ + unsigned long tmp_rate, tmp_diff; + + if (div == MASTER_PRES_MAX) + tmp_rate = parent_rate / 3; + else + tmp_rate = parent_rate >> div; + + tmp_diff = abs(req->rate - tmp_rate); + + if (*best_diff < 0 || *best_diff >= tmp_diff) { + *best_rate = tmp_rate; + *best_diff = tmp_diff; + req->best_parent_rate = parent_rate; + req->best_parent_hw = parent; + } +} + +static int clk_sama7g5_master_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_master *master = to_clk_master(hw); + struct clk_rate_request req_parent = *req; + struct clk_hw *parent; + long best_rate = LONG_MIN, best_diff = LONG_MIN; + unsigned long parent_rate; + unsigned int div, i; + + /* First: check the dividers of MCR. */ + for (i = 0; i < clk_hw_get_num_parents(hw); i++) { + parent = clk_hw_get_parent_by_index(hw, i); + if (!parent) + continue; + + parent_rate = clk_hw_get_rate(parent); + if (!parent_rate) + continue; + + for (div = 0; div < MASTER_PRES_MAX + 1; div++) { + clk_sama7g5_master_best_diff(req, parent, parent_rate, + &best_rate, &best_diff, + div); + if (!best_diff) + break; + } + + if (!best_diff) + break; + } + + /* Second: try to request rate form changeable parent. */ + if (master->chg_pid < 0) + goto end; + + parent = clk_hw_get_parent_by_index(hw, master->chg_pid); + if (!parent) + goto end; + + for (div = 0; div < MASTER_PRES_MAX + 1; div++) { + if (div == MASTER_PRES_MAX) + req_parent.rate = req->rate * 3; + else + req_parent.rate = req->rate << div; + + if (__clk_determine_rate(parent, &req_parent)) + continue; + + clk_sama7g5_master_best_diff(req, parent, req_parent.rate, + &best_rate, &best_diff, div); + + if (!best_diff) + break; + } + +end: + pr_debug("MCK: %s, best_rate = %ld, parent clk: %s @ %ld\n", + __func__, best_rate, + __clk_get_name((req->best_parent_hw)->clk), + req->best_parent_rate); + + if (best_rate < 0) + return -EINVAL; + + req->rate = best_rate; + + return 0; +} + +static u8 clk_sama7g5_master_get_parent(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + u8 index; + + spin_lock_irqsave(master->lock, flags); + index = clk_mux_val_to_index(&master->hw, master->mux_table, 0, + master->parent); + spin_unlock_irqrestore(master->lock, flags); + + return index; +} + +static int clk_sama7g5_master_set_parent(struct clk_hw *hw, u8 index) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + + if (index >= clk_hw_get_num_parents(hw)) + return -EINVAL; + + spin_lock_irqsave(master->lock, flags); + master->parent = clk_mux_index_to_val(master->mux_table, 0, index); + spin_unlock_irqrestore(master->lock, flags); + + return 0; +} + +static int clk_sama7g5_master_enable(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + unsigned int val, cparent; + + spin_lock_irqsave(master->lock, flags); + + regmap_write(master->regmap, PMC_MCR, PMC_MCR_ID(master->id)); + regmap_read(master->regmap, PMC_MCR, &val); + regmap_update_bits(master->regmap, PMC_MCR, + PMC_MCR_EN | PMC_MCR_CSS | PMC_MCR_DIV | + PMC_MCR_CMD | PMC_MCR_ID_MSK, + PMC_MCR_EN | (master->parent << PMC_MCR_CSS_SHIFT) | + (master->div << MASTER_DIV_SHIFT) | + PMC_MCR_CMD | PMC_MCR_ID(master->id)); + + cparent = (val & PMC_MCR_CSS) >> PMC_MCR_CSS_SHIFT; + + /* Wait here only if parent is being changed. */ + while ((cparent != master->parent) && !clk_master_ready(master)) + cpu_relax(); + + spin_unlock_irqrestore(master->lock, flags); + + return 0; +} + +static void clk_sama7g5_master_disable(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + + spin_lock_irqsave(master->lock, flags); + + regmap_write(master->regmap, PMC_MCR, master->id); + regmap_update_bits(master->regmap, PMC_MCR, + PMC_MCR_EN | PMC_MCR_CMD | PMC_MCR_ID_MSK, + PMC_MCR_CMD | PMC_MCR_ID(master->id)); + + spin_unlock_irqrestore(master->lock, flags); +} + +static int clk_sama7g5_master_is_enabled(struct clk_hw *hw) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long flags; + unsigned int val; + + spin_lock_irqsave(master->lock, flags); + + regmap_write(master->regmap, PMC_MCR, master->id); + regmap_read(master->regmap, PMC_MCR, &val); + + spin_unlock_irqrestore(master->lock, flags); + + return !!(val & PMC_MCR_EN); +} + +static int clk_sama7g5_master_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_master *master = to_clk_master(hw); + unsigned long div, flags; + + div = DIV_ROUND_CLOSEST(parent_rate, rate); + if ((div > (1 << (MASTER_PRES_MAX - 1))) || (div & (div - 1))) + return -EINVAL; + + if (div == 3) + div = MASTER_PRES_MAX; + else + div = ffs(div) - 1; + + spin_lock_irqsave(master->lock, flags); + master->div = div; + spin_unlock_irqrestore(master->lock, flags); + + return 0; +} + +static const struct clk_ops sama7g5_master_ops = { + .enable = clk_sama7g5_master_enable, + .disable = clk_sama7g5_master_disable, + .is_enabled = clk_sama7g5_master_is_enabled, + .recalc_rate = clk_sama7g5_master_recalc_rate, + .determine_rate = clk_sama7g5_master_determine_rate, + .set_rate = clk_sama7g5_master_set_rate, + .get_parent = clk_sama7g5_master_get_parent, + .set_parent = clk_sama7g5_master_set_parent, +}; + +struct clk_hw * __init +at91_clk_sama7g5_register_master(struct regmap *regmap, + const char *name, int num_parents, + const char **parent_names, + u32 *mux_table, + spinlock_t *lock, u8 id, + bool critical, int chg_pid) +{ + struct clk_master *master; + struct clk_hw *hw; + struct clk_init_data init; + unsigned long flags; + unsigned int val; + int ret; + + if (!name || !num_parents || !parent_names || !mux_table || + !lock || id > MASTER_MAX_ID) + return ERR_PTR(-EINVAL); + + master = kzalloc(sizeof(*master), GFP_KERNEL); + if (!master) + return ERR_PTR(-ENOMEM); + + init.name = name; + init.ops = &sama7g5_master_ops; + init.parent_names = parent_names; + init.num_parents = num_parents; + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE; + if (chg_pid >= 0) + init.flags |= CLK_SET_RATE_PARENT; + if (critical) + init.flags |= CLK_IS_CRITICAL; + + master->hw.init = &init; + master->regmap = regmap; + master->id = id; + master->chg_pid = chg_pid; + master->lock = lock; + master->mux_table = mux_table; + + spin_lock_irqsave(master->lock, flags); + regmap_write(master->regmap, PMC_MCR, master->id); + regmap_read(master->regmap, PMC_MCR, &val); + master->parent = (val & PMC_MCR_CSS) >> PMC_MCR_CSS_SHIFT; + master->div = (val & PMC_MCR_DIV) >> MASTER_DIV_SHIFT; + spin_unlock_irqrestore(master->lock, flags); + + hw = &master->hw; + ret = clk_hw_register(NULL, &master->hw); + if (ret) { + kfree(master); + hw = ERR_PTR(ret); + } + + return hw; +} + const struct clk_master_layout at91rm9200_master_layout = { .mask = 0x31F, .pres_shift = 2, diff --git a/drivers/clk/at91/clk-peripheral.c b/drivers/clk/at91/clk-peripheral.c index c2ab4860a2bf..7867eaf0447f 100644 --- a/drivers/clk/at91/clk-peripheral.c +++ b/drivers/clk/at91/clk-peripheral.c @@ -38,6 +38,7 @@ struct clk_sam9x5_peripheral { u32 div; const struct clk_pcr_layout *layout; bool auto_div; + int chg_pid; }; #define to_clk_sam9x5_peripheral(hw) \ @@ -208,7 +209,7 @@ static int clk_sam9x5_peripheral_is_enabled(struct clk_hw *hw) regmap_read(periph->regmap, periph->layout->offset, &status); spin_unlock_irqrestore(periph->lock, flags); - return status & AT91_PMC_PCR_EN ? 1 : 0; + return !!(status & AT91_PMC_PCR_EN); } static unsigned long @@ -238,6 +239,87 @@ clk_sam9x5_peripheral_recalc_rate(struct clk_hw *hw, return parent_rate >> periph->div; } +static void clk_sam9x5_peripheral_best_diff(struct clk_rate_request *req, + struct clk_hw *parent, + unsigned long parent_rate, + u32 shift, long *best_diff, + long *best_rate) +{ + unsigned long tmp_rate = parent_rate >> shift; + unsigned long tmp_diff = abs(req->rate - tmp_rate); + + if (*best_diff < 0 || *best_diff >= tmp_diff) { + *best_rate = tmp_rate; + *best_diff = tmp_diff; + req->best_parent_rate = parent_rate; + req->best_parent_hw = parent; + } +} + +static int clk_sam9x5_peripheral_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw); + struct clk_hw *parent = clk_hw_get_parent(hw); + struct clk_rate_request req_parent = *req; + unsigned long parent_rate = clk_hw_get_rate(parent); + unsigned long tmp_rate; + long best_rate = LONG_MIN; + long best_diff = LONG_MIN; + u32 shift; + + if (periph->id < PERIPHERAL_ID_MIN || !periph->range.max) + return parent_rate; + + /* Fist step: check the available dividers. */ + for (shift = 0; shift <= PERIPHERAL_MAX_SHIFT; shift++) { + tmp_rate = parent_rate >> shift; + + if (periph->range.max && tmp_rate > periph->range.max) + continue; + + clk_sam9x5_peripheral_best_diff(req, parent, parent_rate, + shift, &best_diff, &best_rate); + + if (!best_diff || best_rate <= req->rate) + break; + } + + if (periph->chg_pid < 0) + goto end; + + /* Step two: try to request rate from parent. */ + parent = clk_hw_get_parent_by_index(hw, periph->chg_pid); + if (!parent) + goto end; + + for (shift = 0; shift <= PERIPHERAL_MAX_SHIFT; shift++) { + req_parent.rate = req->rate << shift; + + if (__clk_determine_rate(parent, &req_parent)) + continue; + + clk_sam9x5_peripheral_best_diff(req, parent, req_parent.rate, + shift, &best_diff, &best_rate); + + if (!best_diff) + break; + } +end: + if (best_rate < 0 || + (periph->range.max && best_rate > periph->range.max)) + return -EINVAL; + + pr_debug("PCK: %s, best_rate = %ld, parent clk: %s @ %ld\n", + __func__, best_rate, + __clk_get_name((req->best_parent_hw)->clk), + req->best_parent_rate); + + req->rate = best_rate; + + return 0; +} + static long clk_sam9x5_peripheral_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate) @@ -320,11 +402,21 @@ static const struct clk_ops sam9x5_peripheral_ops = { .set_rate = clk_sam9x5_peripheral_set_rate, }; +static const struct clk_ops sam9x5_peripheral_chg_ops = { + .enable = clk_sam9x5_peripheral_enable, + .disable = clk_sam9x5_peripheral_disable, + .is_enabled = clk_sam9x5_peripheral_is_enabled, + .recalc_rate = clk_sam9x5_peripheral_recalc_rate, + .determine_rate = clk_sam9x5_peripheral_determine_rate, + .set_rate = clk_sam9x5_peripheral_set_rate, +}; + struct clk_hw * __init at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char *parent_name, - u32 id, const struct clk_range *range) + u32 id, const struct clk_range *range, + int chg_pid) { struct clk_sam9x5_peripheral *periph; struct clk_init_data init; @@ -339,10 +431,16 @@ at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, return ERR_PTR(-ENOMEM); init.name = name; - init.ops = &sam9x5_peripheral_ops; - init.parent_names = (parent_name ? &parent_name : NULL); - init.num_parents = (parent_name ? 1 : 0); - init.flags = 0; + init.parent_names = &parent_name; + init.num_parents = 1; + if (chg_pid < 0) { + init.flags = 0; + init.ops = &sam9x5_peripheral_ops; + } else { + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT; + init.ops = &sam9x5_peripheral_chg_ops; + } periph->id = id; periph->hw.init = &init; @@ -353,6 +451,7 @@ at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, periph->auto_div = true; periph->layout = layout; periph->range = *range; + periph->chg_pid = chg_pid; hw = &periph->hw; ret = clk_hw_register(NULL, &periph->hw); diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c index 8ee66fbee3d9..fcf8f6a1c2c6 100644 --- a/drivers/clk/at91/clk-programmable.c +++ b/drivers/clk/at91/clk-programmable.c @@ -21,6 +21,7 @@ struct clk_programmable { struct clk_hw hw; struct regmap *regmap; + u32 *mux_table; u8 id; const struct clk_programmable_layout *layout; }; @@ -108,6 +109,9 @@ static int clk_programmable_set_parent(struct clk_hw *hw, u8 index) if (layout->have_slck_mck) mask |= AT91_PMC_CSSMCK_MCK; + if (prog->mux_table) + pckr = clk_mux_index_to_val(prog->mux_table, 0, index); + if (index > layout->css_mask) { if (index > PROG_MAX_RM9200_CSS && !layout->have_slck_mck) return -EINVAL; @@ -134,6 +138,9 @@ static u8 clk_programmable_get_parent(struct clk_hw *hw) if (layout->have_slck_mck && (pckr & AT91_PMC_CSSMCK_MCK) && !ret) ret = PROG_MAX_RM9200_CSS + 1; + if (prog->mux_table) + ret = clk_mux_val_to_index(&prog->hw, prog->mux_table, 0, ret); + return ret; } @@ -182,7 +189,8 @@ struct clk_hw * __init at91_clk_register_programmable(struct regmap *regmap, const char *name, const char **parent_names, u8 num_parents, u8 id, - const struct clk_programmable_layout *layout) + const struct clk_programmable_layout *layout, + u32 *mux_table) { struct clk_programmable *prog; struct clk_hw *hw; @@ -206,6 +214,7 @@ at91_clk_register_programmable(struct regmap *regmap, prog->layout = layout; prog->hw.init = &init; prog->regmap = regmap; + prog->mux_table = mux_table; hw = &prog->hw; ret = clk_hw_register(NULL, &prog->hw); diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index e699803986e5..b473298ef7e6 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -15,26 +15,41 @@ #include "pmc.h" #define PMC_PLL_CTRL0_DIV_MSK GENMASK(7, 0) -#define PMC_PLL_CTRL1_MUL_MSK GENMASK(30, 24) +#define PMC_PLL_CTRL1_MUL_MSK GENMASK(31, 24) +#define PMC_PLL_CTRL1_FRACR_MSK GENMASK(21, 0) #define PLL_DIV_MAX (FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, UINT_MAX) + 1) #define UPLL_DIV 2 #define PLL_MUL_MAX (FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, UINT_MAX) + 1) -#define PLL_MAX_ID 1 +#define FCORE_MIN (600000000) +#define FCORE_MAX (1200000000) -struct sam9x60_pll { - struct clk_hw hw; +#define PLL_MAX_ID 7 + +struct sam9x60_pll_core { struct regmap *regmap; spinlock_t *lock; const struct clk_pll_characteristics *characteristics; - u32 frac; + const struct clk_pll_layout *layout; + struct clk_hw hw; u8 id; - u8 div; +}; + +struct sam9x60_frac { + struct sam9x60_pll_core core; + u32 frac; u16 mul; }; -#define to_sam9x60_pll(hw) container_of(hw, struct sam9x60_pll, hw) +struct sam9x60_div { + struct sam9x60_pll_core core; + u8 div; +}; + +#define to_sam9x60_pll_core(hw) container_of(hw, struct sam9x60_pll_core, hw) +#define to_sam9x60_frac(core) container_of(core, struct sam9x60_frac, core) +#define to_sam9x60_div(core) container_of(core, struct sam9x60_div, core) static inline bool sam9x60_pll_ready(struct regmap *regmap, int id) { @@ -45,41 +60,53 @@ static inline bool sam9x60_pll_ready(struct regmap *regmap, int id) return !!(status & BIT(id)); } -static int sam9x60_pll_prepare(struct clk_hw *hw) +static bool sam9x60_frac_pll_ready(struct regmap *regmap, u8 id) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); - struct regmap *regmap = pll->regmap; - unsigned long flags; - u8 div; - u16 mul; - u32 val; + return sam9x60_pll_ready(regmap, id); +} - spin_lock_irqsave(pll->lock, flags); - regmap_write(regmap, AT91_PMC_PLL_UPDT, pll->id); +static unsigned long sam9x60_frac_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_frac *frac = to_sam9x60_frac(core); - regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); - div = FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, val); + return (parent_rate * (frac->mul + 1) + + ((u64)parent_rate * frac->frac >> 22)); +} +static int sam9x60_frac_pll_prepare(struct clk_hw *hw) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_frac *frac = to_sam9x60_frac(core); + struct regmap *regmap = core->regmap; + unsigned int val, cfrac, cmul; + unsigned long flags; + + spin_lock_irqsave(core->lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); regmap_read(regmap, AT91_PMC_PLL_CTRL1, &val); - mul = FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, val); + cmul = (val & core->layout->mul_mask) >> core->layout->mul_shift; + cfrac = (val & core->layout->frac_mask) >> core->layout->frac_shift; - if (sam9x60_pll_ready(regmap, pll->id) && - (div == pll->div && mul == pll->mul)) { - spin_unlock_irqrestore(pll->lock, flags); - return 0; - } + if (sam9x60_frac_pll_ready(regmap, core->id) && + (cmul == frac->mul && cfrac == frac->frac)) + goto unlock; - /* Recommended value for AT91_PMC_PLL_ACR */ - if (pll->characteristics->upll) + /* Recommended value for PMC_PLL_ACR */ + if (core->characteristics->upll) val = AT91_PMC_PLL_ACR_DEFAULT_UPLL; else val = AT91_PMC_PLL_ACR_DEFAULT_PLLA; regmap_write(regmap, AT91_PMC_PLL_ACR, val); regmap_write(regmap, AT91_PMC_PLL_CTRL1, - FIELD_PREP(PMC_PLL_CTRL1_MUL_MSK, pll->mul)); + (frac->mul << core->layout->mul_shift) | + (frac->frac << core->layout->frac_shift)); - if (pll->characteristics->upll) { + if (core->characteristics->upll) { /* Enable the UTMI internal bandgap */ val |= AT91_PMC_PLL_ACR_UTMIBG; regmap_write(regmap, AT91_PMC_PLL_ACR, val); @@ -94,221 +121,409 @@ static int sam9x60_pll_prepare(struct clk_hw *hw) } regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, - AT91_PMC_PLL_UPDT_UPDATE, AT91_PMC_PLL_UPDT_UPDATE); + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); - regmap_write(regmap, AT91_PMC_PLL_CTRL0, - AT91_PMC_PLL_CTRL0_ENLOCK | AT91_PMC_PLL_CTRL0_ENPLL | - AT91_PMC_PLL_CTRL0_ENPLLCK | pll->div); + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, + AT91_PMC_PLL_CTRL0_ENLOCK | AT91_PMC_PLL_CTRL0_ENPLL, + AT91_PMC_PLL_CTRL0_ENLOCK | AT91_PMC_PLL_CTRL0_ENPLL); regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, - AT91_PMC_PLL_UPDT_UPDATE, AT91_PMC_PLL_UPDT_UPDATE); + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); - while (!sam9x60_pll_ready(regmap, pll->id)) + while (!sam9x60_pll_ready(regmap, core->id)) cpu_relax(); - spin_unlock_irqrestore(pll->lock, flags); +unlock: + spin_unlock_irqrestore(core->lock, flags); return 0; } -static int sam9x60_pll_is_prepared(struct clk_hw *hw) +static void sam9x60_frac_pll_unprepare(struct clk_hw *hw) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct regmap *regmap = core->regmap; + unsigned long flags; + + spin_lock_irqsave(core->lock, flags); - return sam9x60_pll_ready(pll->regmap, pll->id); + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); + + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, AT91_PMC_PLL_CTRL0_ENPLL, 0); + + if (core->characteristics->upll) + regmap_update_bits(regmap, AT91_PMC_PLL_ACR, + AT91_PMC_PLL_ACR_UTMIBG | AT91_PMC_PLL_ACR_UTMIVR, 0); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); + + spin_unlock_irqrestore(core->lock, flags); } -static void sam9x60_pll_unprepare(struct clk_hw *hw) +static int sam9x60_frac_pll_is_prepared(struct clk_hw *hw) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); - unsigned long flags; + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); - spin_lock_irqsave(pll->lock, flags); + return sam9x60_pll_ready(core->regmap, core->id); +} - regmap_write(pll->regmap, AT91_PMC_PLL_UPDT, pll->id); +static long sam9x60_frac_pll_compute_mul_frac(struct sam9x60_pll_core *core, + unsigned long rate, + unsigned long parent_rate, + bool update) +{ + struct sam9x60_frac *frac = to_sam9x60_frac(core); + unsigned long tmprate, remainder; + unsigned long nmul = 0; + unsigned long nfrac = 0; - regmap_update_bits(pll->regmap, AT91_PMC_PLL_CTRL0, - AT91_PMC_PLL_CTRL0_ENPLLCK, 0); + if (rate < FCORE_MIN || rate > FCORE_MAX) + return -ERANGE; - regmap_update_bits(pll->regmap, AT91_PMC_PLL_UPDT, - AT91_PMC_PLL_UPDT_UPDATE, AT91_PMC_PLL_UPDT_UPDATE); + /* + * Calculate the multiplier associated with the current + * divider that provide the closest rate to the requested one. + */ + nmul = mult_frac(rate, 1, parent_rate); + tmprate = mult_frac(parent_rate, nmul, 1); + remainder = rate - tmprate; - regmap_update_bits(pll->regmap, AT91_PMC_PLL_CTRL0, - AT91_PMC_PLL_CTRL0_ENPLL, 0); + if (remainder) { + nfrac = DIV_ROUND_CLOSEST_ULL((u64)remainder * (1 << 22), + parent_rate); - if (pll->characteristics->upll) - regmap_update_bits(pll->regmap, AT91_PMC_PLL_ACR, - AT91_PMC_PLL_ACR_UTMIBG | - AT91_PMC_PLL_ACR_UTMIVR, 0); + tmprate += DIV_ROUND_CLOSEST_ULL((u64)nfrac * parent_rate, + (1 << 22)); + } - regmap_update_bits(pll->regmap, AT91_PMC_PLL_UPDT, - AT91_PMC_PLL_UPDT_UPDATE, AT91_PMC_PLL_UPDT_UPDATE); + /* Check if resulted rate is a valid. */ + if (tmprate < FCORE_MIN || tmprate > FCORE_MAX) + return -ERANGE; - spin_unlock_irqrestore(pll->lock, flags); + if (update) { + frac->mul = nmul - 1; + frac->frac = nfrac; + } + + return tmprate; } -static unsigned long sam9x60_pll_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) +static long sam9x60_frac_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); - return (parent_rate * (pll->mul + 1)) / (pll->div + 1); + return sam9x60_frac_pll_compute_mul_frac(core, rate, *parent_rate, false); } -static long sam9x60_pll_get_best_div_mul(struct sam9x60_pll *pll, - unsigned long rate, - unsigned long parent_rate, - bool update) +static int sam9x60_frac_pll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) { - const struct clk_pll_characteristics *characteristics = - pll->characteristics; - unsigned long bestremainder = ULONG_MAX; - unsigned long maxdiv, mindiv, tmpdiv; - long bestrate = -ERANGE; - unsigned long bestdiv = 0; - unsigned long bestmul = 0; - unsigned long bestfrac = 0; + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); - if (rate < characteristics->output[0].min || - rate > characteristics->output[0].max) - return -ERANGE; + return sam9x60_frac_pll_compute_mul_frac(core, rate, parent_rate, true); +} - if (!pll->characteristics->upll) { - mindiv = parent_rate / rate; - if (mindiv < 2) - mindiv = 2; +static const struct clk_ops sam9x60_frac_pll_ops = { + .prepare = sam9x60_frac_pll_prepare, + .unprepare = sam9x60_frac_pll_unprepare, + .is_prepared = sam9x60_frac_pll_is_prepared, + .recalc_rate = sam9x60_frac_pll_recalc_rate, + .round_rate = sam9x60_frac_pll_round_rate, + .set_rate = sam9x60_frac_pll_set_rate, +}; - maxdiv = DIV_ROUND_UP(parent_rate * PLL_MUL_MAX, rate); - if (maxdiv > PLL_DIV_MAX) - maxdiv = PLL_DIV_MAX; - } else { - mindiv = maxdiv = UPLL_DIV; - } +static int sam9x60_div_pll_prepare(struct clk_hw *hw) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_div *div = to_sam9x60_div(core); + struct regmap *regmap = core->regmap; + unsigned long flags; + unsigned int val, cdiv; - for (tmpdiv = mindiv; tmpdiv <= maxdiv; tmpdiv++) { - unsigned long remainder; - unsigned long tmprate; - unsigned long tmpmul; - unsigned long tmpfrac = 0; + spin_lock_irqsave(core->lock, flags); + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); + regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); + cdiv = (val & core->layout->div_mask) >> core->layout->div_shift; - /* - * Calculate the multiplier associated with the current - * divider that provide the closest rate to the requested one. - */ - tmpmul = mult_frac(rate, tmpdiv, parent_rate); - tmprate = mult_frac(parent_rate, tmpmul, tmpdiv); - remainder = rate - tmprate; + /* Stop if enabled an nothing changed. */ + if (!!(val & core->layout->endiv_mask) && cdiv == div->div) + goto unlock; - if (remainder) { - tmpfrac = DIV_ROUND_CLOSEST_ULL((u64)remainder * tmpdiv * (1 << 22), - parent_rate); + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, + core->layout->div_mask | core->layout->endiv_mask, + (div->div << core->layout->div_shift) | + (1 << core->layout->endiv_shift)); - tmprate += DIV_ROUND_CLOSEST_ULL((u64)tmpfrac * parent_rate, - tmpdiv * (1 << 22)); + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); - if (tmprate > rate) - remainder = tmprate - rate; - else - remainder = rate - tmprate; - } + while (!sam9x60_pll_ready(regmap, core->id)) + cpu_relax(); - /* - * Compare the remainder with the best remainder found until - * now and elect a new best multiplier/divider pair if the - * current remainder is smaller than the best one. - */ - if (remainder < bestremainder) { - bestremainder = remainder; - bestdiv = tmpdiv; - bestmul = tmpmul; - bestrate = tmprate; - bestfrac = tmpfrac; +unlock: + spin_unlock_irqrestore(core->lock, flags); + + return 0; +} + +static void sam9x60_div_pll_unprepare(struct clk_hw *hw) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct regmap *regmap = core->regmap; + unsigned long flags; + + spin_lock_irqsave(core->lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); + + regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0, + core->layout->endiv_mask, 0); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK, + AT91_PMC_PLL_UPDT_UPDATE | core->id); + + spin_unlock_irqrestore(core->lock, flags); +} + +static int sam9x60_div_pll_is_prepared(struct clk_hw *hw) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct regmap *regmap = core->regmap; + unsigned long flags; + unsigned int val; + + spin_lock_irqsave(core->lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, core->id); + regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); + + spin_unlock_irqrestore(core->lock, flags); + + return !!(val & core->layout->endiv_mask); +} + +static unsigned long sam9x60_div_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_div *div = to_sam9x60_div(core); + + return DIV_ROUND_CLOSEST_ULL(parent_rate, (div->div + 1)); +} + +static long sam9x60_div_pll_compute_div(struct sam9x60_pll_core *core, + unsigned long *parent_rate, + unsigned long rate) +{ + const struct clk_pll_characteristics *characteristics = + core->characteristics; + struct clk_hw *parent = clk_hw_get_parent(&core->hw); + unsigned long tmp_rate, tmp_parent_rate, tmp_diff; + long best_diff = -1, best_rate = -EINVAL; + u32 divid, best_div; + + if (!rate) + return 0; + + if (rate < characteristics->output[0].min || + rate > characteristics->output[0].max) + return -ERANGE; + + for (divid = 1; divid < core->layout->div_mask; divid++) { + tmp_parent_rate = clk_hw_round_rate(parent, rate * divid); + if (!tmp_parent_rate) + continue; + + tmp_rate = DIV_ROUND_CLOSEST_ULL(tmp_parent_rate, divid); + tmp_diff = abs(rate - tmp_rate); + + if (best_diff < 0 || best_diff > tmp_diff) { + *parent_rate = tmp_parent_rate; + best_rate = tmp_rate; + best_diff = tmp_diff; + best_div = divid; } - /* We've found a perfect match! */ - if (!remainder) + if (!best_diff) break; } - /* Check if bestrate is a valid output rate */ - if (bestrate < characteristics->output[0].min && - bestrate > characteristics->output[0].max) + if (best_rate < characteristics->output[0].min || + best_rate > characteristics->output[0].max) return -ERANGE; - if (update) { - pll->div = bestdiv - 1; - pll->mul = bestmul - 1; - pll->frac = bestfrac; - } - - return bestrate; + return best_rate; } -static long sam9x60_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) +static long sam9x60_div_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); - return sam9x60_pll_get_best_div_mul(pll, rate, *parent_rate, false); + return sam9x60_div_pll_compute_div(core, parent_rate, rate); } -static int sam9x60_pll_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) +static int sam9x60_div_pll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) { - struct sam9x60_pll *pll = to_sam9x60_pll(hw); + struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); + struct sam9x60_div *div = to_sam9x60_div(core); + + div->div = DIV_ROUND_CLOSEST(parent_rate, rate) - 1; - return sam9x60_pll_get_best_div_mul(pll, rate, parent_rate, true); + return 0; } -static const struct clk_ops pll_ops = { - .prepare = sam9x60_pll_prepare, - .unprepare = sam9x60_pll_unprepare, - .is_prepared = sam9x60_pll_is_prepared, - .recalc_rate = sam9x60_pll_recalc_rate, - .round_rate = sam9x60_pll_round_rate, - .set_rate = sam9x60_pll_set_rate, +static const struct clk_ops sam9x60_div_pll_ops = { + .prepare = sam9x60_div_pll_prepare, + .unprepare = sam9x60_div_pll_unprepare, + .is_prepared = sam9x60_div_pll_is_prepared, + .recalc_rate = sam9x60_div_pll_recalc_rate, + .round_rate = sam9x60_div_pll_round_rate, + .set_rate = sam9x60_div_pll_set_rate, }; struct clk_hw * __init -sam9x60_clk_register_pll(struct regmap *regmap, spinlock_t *lock, - const char *name, const char *parent_name, u8 id, - const struct clk_pll_characteristics *characteristics) +sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, + const char *name, const char *parent_name, + struct clk_hw *parent_hw, u8 id, + const struct clk_pll_characteristics *characteristics, + const struct clk_pll_layout *layout, bool critical) { - struct sam9x60_pll *pll; + struct sam9x60_frac *frac; struct clk_hw *hw; struct clk_init_data init; - unsigned int pllr; + unsigned long parent_rate, flags; + unsigned int val; int ret; - if (id > PLL_MAX_ID) + if (id > PLL_MAX_ID || !lock || !parent_hw) return ERR_PTR(-EINVAL); - pll = kzalloc(sizeof(*pll), GFP_KERNEL); - if (!pll) + frac = kzalloc(sizeof(*frac), GFP_KERNEL); + if (!frac) return ERR_PTR(-ENOMEM); init.name = name; - init.ops = &pll_ops; init.parent_names = &parent_name; init.num_parents = 1; + init.ops = &sam9x60_frac_pll_ops; init.flags = CLK_SET_RATE_GATE; + if (critical) + init.flags |= CLK_IS_CRITICAL; + + frac->core.id = id; + frac->core.hw.init = &init; + frac->core.characteristics = characteristics; + frac->core.layout = layout; + frac->core.regmap = regmap; + frac->core.lock = lock; + + spin_lock_irqsave(frac->core.lock, flags); + if (sam9x60_pll_ready(regmap, id)) { + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, id); + regmap_read(regmap, AT91_PMC_PLL_CTRL1, &val); + frac->mul = FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, val); + frac->frac = FIELD_GET(PMC_PLL_CTRL1_FRACR_MSK, val); + } else { + /* + * This means the PLL is not setup by bootloaders. In this + * case we need to set the minimum rate for it. Otherwise + * a clock child of this PLL may be enabled before setting + * its rate leading to enabling this PLL with unsupported + * rate. This will lead to PLL not being locked at all. + */ + parent_rate = clk_hw_get_rate(parent_hw); + if (!parent_rate) { + hw = ERR_PTR(-EINVAL); + goto free; + } + + ret = sam9x60_frac_pll_compute_mul_frac(&frac->core, FCORE_MIN, + parent_rate, true); + if (ret <= 0) { + hw = ERR_PTR(ret); + goto free; + } + } + spin_unlock_irqrestore(frac->core.lock, flags); + + hw = &frac->core.hw; + ret = clk_hw_register(NULL, hw); + if (ret) { + kfree(frac); + hw = ERR_PTR(ret); + } - pll->id = id; - pll->hw.init = &init; - pll->characteristics = characteristics; - pll->regmap = regmap; - pll->lock = lock; + return hw; + +free: + spin_unlock_irqrestore(frac->core.lock, flags); + kfree(frac); + return hw; +} + +struct clk_hw * __init +sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock, + const char *name, const char *parent_name, u8 id, + const struct clk_pll_characteristics *characteristics, + const struct clk_pll_layout *layout, bool critical) +{ + struct sam9x60_div *div; + struct clk_hw *hw; + struct clk_init_data init; + unsigned long flags; + unsigned int val; + int ret; + + if (id > PLL_MAX_ID || !lock) + return ERR_PTR(-EINVAL); + + div = kzalloc(sizeof(*div), GFP_KERNEL); + if (!div) + return ERR_PTR(-ENOMEM); + + init.name = name; + init.parent_names = &parent_name; + init.num_parents = 1; + init.ops = &sam9x60_div_pll_ops; + init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE | + CLK_SET_RATE_PARENT; + if (critical) + init.flags |= CLK_IS_CRITICAL; + + div->core.id = id; + div->core.hw.init = &init; + div->core.characteristics = characteristics; + div->core.layout = layout; + div->core.regmap = regmap; + div->core.lock = lock; + + spin_lock_irqsave(div->core.lock, flags); + + regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, + AT91_PMC_PLL_UPDT_ID_MSK, id); + regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val); + div->div = FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, val); - regmap_write(regmap, AT91_PMC_PLL_UPDT, id); - regmap_read(regmap, AT91_PMC_PLL_CTRL0, &pllr); - pll->div = FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, pllr); - regmap_read(regmap, AT91_PMC_PLL_CTRL1, &pllr); - pll->mul = FIELD_GET(PMC_PLL_CTRL1_MUL_MSK, pllr); + spin_unlock_irqrestore(div->core.lock, flags); - hw = &pll->hw; + hw = &div->core.hw; ret = clk_hw_register(NULL, hw); if (ret) { - kfree(pll); + kfree(div); hw = ERR_PTR(ret); } diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c index c4b3877aa445..f83ec0de86c3 100644 --- a/drivers/clk/at91/clk-system.c +++ b/drivers/clk/at91/clk-system.c @@ -34,7 +34,7 @@ static inline bool clk_system_ready(struct regmap *regmap, int id) regmap_read(regmap, AT91_PMC_SR, &status); - return status & (1 << id) ? 1 : 0; + return !!(status & (1 << id)); } static int clk_system_prepare(struct clk_hw *hw) @@ -74,7 +74,7 @@ static int clk_system_is_prepared(struct clk_hw *hw) regmap_read(sys->regmap, AT91_PMC_SR, &status); - return status & (1 << sys->id) ? 1 : 0; + return !!(status & (1 << sys->id)); } static const struct clk_ops system_ops = { diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c index f1ef4e1f41a9..df9f3fc3b6a6 100644 --- a/drivers/clk/at91/clk-utmi.c +++ b/drivers/clk/at91/clk-utmi.c @@ -120,9 +120,11 @@ static const struct clk_ops utmi_ops = { .recalc_rate = clk_utmi_recalc_rate, }; -struct clk_hw * __init -at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, - const char *name, const char *parent_name) +static struct clk_hw * __init +at91_clk_register_utmi_internal(struct regmap *regmap_pmc, + struct regmap *regmap_sfr, + const char *name, const char *parent_name, + const struct clk_ops *ops, unsigned long flags) { struct clk_utmi *utmi; struct clk_hw *hw; @@ -134,10 +136,10 @@ at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, return ERR_PTR(-ENOMEM); init.name = name; - init.ops = &utmi_ops; + init.ops = ops; init.parent_names = parent_name ? &parent_name : NULL; init.num_parents = parent_name ? 1 : 0; - init.flags = CLK_SET_RATE_GATE; + init.flags = flags; utmi->hw.init = &init; utmi->regmap_pmc = regmap_pmc; @@ -152,3 +154,94 @@ at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, return hw; } + +struct clk_hw * __init +at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, + const char *name, const char *parent_name) +{ + return at91_clk_register_utmi_internal(regmap_pmc, regmap_sfr, name, + parent_name, &utmi_ops, CLK_SET_RATE_GATE); +} + +static int clk_utmi_sama7g5_prepare(struct clk_hw *hw) +{ + struct clk_utmi *utmi = to_clk_utmi(hw); + struct clk_hw *hw_parent; + unsigned long parent_rate; + unsigned int val; + + hw_parent = clk_hw_get_parent(hw); + parent_rate = clk_hw_get_rate(hw_parent); + + switch (parent_rate) { + case 16000000: + val = 0; + break; + case 20000000: + val = 2; + break; + case 24000000: + val = 3; + break; + case 32000000: + val = 5; + break; + default: + pr_err("UTMICK: unsupported main_xtal rate\n"); + return -EINVAL; + } + + regmap_write(utmi->regmap_pmc, AT91_PMC_XTALF, val); + + return 0; + +} + +static int clk_utmi_sama7g5_is_prepared(struct clk_hw *hw) +{ + struct clk_utmi *utmi = to_clk_utmi(hw); + struct clk_hw *hw_parent; + unsigned long parent_rate; + unsigned int val; + + hw_parent = clk_hw_get_parent(hw); + parent_rate = clk_hw_get_rate(hw_parent); + + regmap_read(utmi->regmap_pmc, AT91_PMC_XTALF, &val); + switch (val & 0x7) { + case 0: + if (parent_rate == 16000000) + return 1; + break; + case 2: + if (parent_rate == 20000000) + return 1; + break; + case 3: + if (parent_rate == 24000000) + return 1; + break; + case 5: + if (parent_rate == 32000000) + return 1; + break; + default: + break; + } + + return 0; +} + +static const struct clk_ops sama7g5_utmi_ops = { + .prepare = clk_utmi_sama7g5_prepare, + .is_prepared = clk_utmi_sama7g5_is_prepared, + .recalc_rate = clk_utmi_recalc_rate, +}; + +struct clk_hw * __init +at91_clk_sama7g5_register_utmi(struct regmap *regmap_pmc, const char *name, + const char *parent_name) +{ + return at91_clk_register_utmi_internal(regmap_pmc, NULL, name, + parent_name, &sama7g5_utmi_ops, 0); +} diff --git a/drivers/clk/at91/dt-compat.c b/drivers/clk/at91/dt-compat.c index aa1754eac59f..a50084de97d4 100644 --- a/drivers/clk/at91/dt-compat.c +++ b/drivers/clk/at91/dt-compat.c @@ -22,6 +22,8 @@ #define SYSTEM_MAX_ID 31 +#define GCK_INDEX_DT_AUDIO_PLL 5 + #ifdef CONFIG_HAVE_AT91_AUDIO_PLL static void __init of_sama5d2_clk_audio_pll_frac_setup(struct device_node *np) { @@ -135,7 +137,7 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) return; for_each_child_of_node(np, gcknp) { - bool pll_audio = false; + int chg_pid = INT_MIN; if (of_property_read_u32(gcknp, "reg", &id)) continue; @@ -152,12 +154,13 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) if (of_device_is_compatible(np, "atmel,sama5d2-clk-generated") && (id == GCK_ID_I2S0 || id == GCK_ID_I2S1 || id == GCK_ID_CLASSD)) - pll_audio = true; + chg_pid = GCK_INDEX_DT_AUDIO_PLL; hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, &dt_pcr_layout, name, - parent_names, num_parents, - id, pll_audio, &range); + parent_names, NULL, + num_parents, id, &range, + chg_pid); if (IS_ERR(hw)) continue; @@ -460,7 +463,8 @@ of_at91_clk_periph_setup(struct device_node *np, u8 type) &dt_pcr_layout, name, parent_name, - id, &range); + id, &range, + INT_MIN); } if (IS_ERR(hw)) @@ -673,7 +677,8 @@ CLK_OF_DECLARE(at91sam9x5_clk_plldiv, "atmel,at91sam9x5-clk-plldiv", static void __init of_at91_clk_prog_setup(struct device_node *np, - const struct clk_programmable_layout *layout) + const struct clk_programmable_layout *layout, + u32 *mux_table) { int num; u32 id; @@ -707,7 +712,7 @@ of_at91_clk_prog_setup(struct device_node *np, hw = at91_clk_register_programmable(regmap, name, parent_names, num_parents, - id, layout); + id, layout, mux_table); if (IS_ERR(hw)) continue; @@ -717,21 +722,21 @@ of_at91_clk_prog_setup(struct device_node *np, static void __init of_at91rm9200_clk_prog_setup(struct device_node *np) { - of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout); + of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout, NULL); } CLK_OF_DECLARE(at91rm9200_clk_prog, "atmel,at91rm9200-clk-programmable", of_at91rm9200_clk_prog_setup); static void __init of_at91sam9g45_clk_prog_setup(struct device_node *np) { - of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout); + of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout, NULL); } CLK_OF_DECLARE(at91sam9g45_clk_prog, "atmel,at91sam9g45-clk-programmable", of_at91sam9g45_clk_prog_setup); static void __init of_at91sam9x5_clk_prog_setup(struct device_node *np) { - of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout); + of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout, NULL); } CLK_OF_DECLARE(at91sam9x5_clk_prog, "atmel,at91sam9x5-clk-programmable", of_at91sam9x5_clk_prog_setup); diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index df616f2937e7..7b86affc6d7c 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -54,8 +54,14 @@ struct clk_master_characteristics { struct clk_pll_layout { u32 pllr_mask; - u16 mul_mask; + u32 mul_mask; + u32 frac_mask; + u32 div_mask; + u32 endiv_mask; u8 mul_shift; + u8 frac_shift; + u8 div_shift; + u8 endiv_shift; }; extern const struct clk_pll_layout at91rm9200_pll_layout; @@ -122,8 +128,8 @@ struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char **parent_names, - u8 num_parents, u8 id, bool pll_audio, - const struct clk_range *range); + u32 *mux_table, u8 num_parents, u8 id, + const struct clk_range *range, int chg_pid); struct clk_hw * __init at91_clk_register_h32mx(struct regmap *regmap, const char *name, @@ -155,13 +161,21 @@ at91_clk_register_master(struct regmap *regmap, const char *name, const struct clk_master_characteristics *characteristics); struct clk_hw * __init +at91_clk_sama7g5_register_master(struct regmap *regmap, + const char *name, int num_parents, + const char **parent_names, u32 *mux_table, + spinlock_t *lock, u8 id, bool critical, + int chg_pid); + +struct clk_hw * __init at91_clk_register_peripheral(struct regmap *regmap, const char *name, const char *parent_name, u32 id); struct clk_hw * __init at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, const struct clk_pcr_layout *layout, const char *name, const char *parent_name, - u32 id, const struct clk_range *range); + u32 id, const struct clk_range *range, + int chg_pid); struct clk_hw * __init at91_clk_register_pll(struct regmap *regmap, const char *name, @@ -173,14 +187,23 @@ at91_clk_register_plldiv(struct regmap *regmap, const char *name, const char *parent_name); struct clk_hw * __init -sam9x60_clk_register_pll(struct regmap *regmap, spinlock_t *lock, - const char *name, const char *parent_name, u8 id, - const struct clk_pll_characteristics *characteristics); +sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock, + const char *name, const char *parent_name, u8 id, + const struct clk_pll_characteristics *characteristics, + const struct clk_pll_layout *layout, bool critical); + +struct clk_hw * __init +sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock, + const char *name, const char *parent_name, + struct clk_hw *parent_hw, u8 id, + const struct clk_pll_characteristics *characteristics, + const struct clk_pll_layout *layout, bool critical); struct clk_hw * __init at91_clk_register_programmable(struct regmap *regmap, const char *name, const char **parent_names, u8 num_parents, u8 id, - const struct clk_programmable_layout *layout); + const struct clk_programmable_layout *layout, + u32 *mux_table); struct clk_hw * __init at91_clk_register_sam9260_slow(struct regmap *regmap, @@ -213,6 +236,10 @@ struct clk_hw * __init at91_clk_register_utmi(struct regmap *regmap_pmc, struct regmap *regmap_sfr, const char *name, const char *parent_name); +struct clk_hw * __init +at91_clk_sama7g5_register_utmi(struct regmap *regmap, const char *name, + const char *parent_name); + #ifdef CONFIG_PM void pmc_register_id(u8 id); void pmc_register_pck(u8 pck); diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 3e20aa68259f..ab6318c0589e 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -22,7 +22,7 @@ static const struct clk_master_layout sam9x60_master_layout = { }; static const struct clk_range plla_outputs[] = { - { .min = 300000000, .max = 600000000 }, + { .min = 2343750, .max = 1200000000 }, }; static const struct clk_pll_characteristics plla_characteristics = { @@ -42,6 +42,20 @@ static const struct clk_pll_characteristics upll_characteristics = { .upll = true, }; +static const struct clk_pll_layout pll_frac_layout = { + .mul_mask = GENMASK(31, 24), + .frac_mask = GENMASK(21, 0), + .mul_shift = 24, + .frac_shift = 0, +}; + +static const struct clk_pll_layout pll_div_layout = { + .div_mask = GENMASK(7, 0), + .endiv_mask = BIT(29), + .div_shift = 0, + .endiv_shift = 29, +}; + static const struct clk_programmable_layout sam9x60_programmable_layout = { .pres_mask = 0xff, .pres_shift = 8, @@ -156,6 +170,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) const char *td_slck_name, *md_slck_name, *mainxtal_name; struct pmc_data *sam9x60_pmc; const char *parent_names[6]; + struct clk_hw *main_osc_hw; struct regmap *regmap; struct clk_hw *hw; int i; @@ -178,7 +193,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) return; mainxtal_name = of_clk_get_parent_name(np, i); - regmap = syscon_node_to_regmap(np); + regmap = device_node_to_regmap(np); if (IS_ERR(regmap)) return; @@ -189,7 +204,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) if (!sam9x60_pmc) return; - hw = at91_clk_register_main_rc_osc(regmap, "main_rc_osc", 24000000, + hw = at91_clk_register_main_rc_osc(regmap, "main_rc_osc", 12000000, 50000000); if (IS_ERR(hw)) goto err_free; @@ -200,6 +215,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) bypass); if (IS_ERR(hw)) goto err_free; + main_osc_hw = hw; parent_names[0] = "main_rc_osc"; parent_names[1] = "main_osc"; @@ -209,15 +225,31 @@ static void __init sam9x60_pmc_setup(struct device_node *np) sam9x60_pmc->chws[PMC_MAIN] = hw; - hw = sam9x60_clk_register_pll(regmap, &pmc_pll_lock, "pllack", - "mainck", 0, &plla_characteristics); + hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, "pllack_fracck", + "mainck", sam9x60_pmc->chws[PMC_MAIN], + 0, &plla_characteristics, + &pll_frac_layout, true); + if (IS_ERR(hw)) + goto err_free; + + hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, "pllack_divck", + "pllack_fracck", 0, &plla_characteristics, + &pll_div_layout, true); if (IS_ERR(hw)) goto err_free; sam9x60_pmc->chws[PMC_PLLACK] = hw; - hw = sam9x60_clk_register_pll(regmap, &pmc_pll_lock, "upllck", - "main_osc", 1, &upll_characteristics); + hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, "upllck_fracck", + "main_osc", main_osc_hw, 1, + &upll_characteristics, + &pll_frac_layout, false); + if (IS_ERR(hw)) + goto err_free; + + hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, "upllck_divck", + "upllck_fracck", 1, &upll_characteristics, + &pll_div_layout, false); if (IS_ERR(hw)) goto err_free; @@ -225,7 +257,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) parent_names[0] = md_slck_name; parent_names[1] = "mainck"; - parent_names[2] = "pllack"; + parent_names[2] = "pllack_divck"; hw = at91_clk_register_master(regmap, "masterck", 3, parent_names, &sam9x60_master_layout, &mck_characteristics); @@ -234,8 +266,8 @@ static void __init sam9x60_pmc_setup(struct device_node *np) sam9x60_pmc->chws[PMC_MCK] = hw; - parent_names[0] = "pllack"; - parent_names[1] = "upllck"; + parent_names[0] = "pllack_divck"; + parent_names[1] = "upllck_divck"; parent_names[2] = "main_osc"; hw = sam9x60_clk_register_usb(regmap, "usbck", parent_names, 3); if (IS_ERR(hw)) @@ -245,8 +277,8 @@ static void __init sam9x60_pmc_setup(struct device_node *np) parent_names[1] = td_slck_name; parent_names[2] = "mainck"; parent_names[3] = "masterck"; - parent_names[4] = "pllack"; - parent_names[5] = "upllck"; + parent_names[4] = "pllack_divck"; + parent_names[5] = "upllck_divck"; for (i = 0; i < 8; i++) { char name[6]; @@ -254,7 +286,8 @@ static void __init sam9x60_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 6, i, - &sam9x60_programmable_layout); + &sam9x60_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; @@ -277,7 +310,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np) sam9x60_periphck[i].n, "masterck", sam9x60_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; @@ -288,10 +321,9 @@ static void __init sam9x60_pmc_setup(struct device_node *np) hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, &sam9x60_pcr_layout, sam9x60_gck[i].n, - parent_names, 6, + parent_names, NULL, 6, sam9x60_gck[i].id, - false, - &sam9x60_gck[i].r); + &sam9x60_gck[i].r, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c index d69421d71daf..8b220762941a 100644 --- a/drivers/clk/at91/sama5d2.c +++ b/drivers/clk/at91/sama5d2.c @@ -116,21 +116,20 @@ static const struct { char *n; u8 id; struct clk_range r; - bool pll; + int chg_pid; } sama5d2_gck[] = { - { .n = "sdmmc0_gclk", .id = 31, }, - { .n = "sdmmc1_gclk", .id = 32, }, - { .n = "tcb0_gclk", .id = 35, .r = { .min = 0, .max = 83000000 }, }, - { .n = "tcb1_gclk", .id = 36, .r = { .min = 0, .max = 83000000 }, }, - { .n = "pwm_gclk", .id = 38, .r = { .min = 0, .max = 83000000 }, }, - { .n = "isc_gclk", .id = 46, }, - { .n = "pdmic_gclk", .id = 48, }, - { .n = "i2s0_gclk", .id = 54, .pll = true }, - { .n = "i2s1_gclk", .id = 55, .pll = true }, - { .n = "can0_gclk", .id = 56, .r = { .min = 0, .max = 80000000 }, }, - { .n = "can1_gclk", .id = 57, .r = { .min = 0, .max = 80000000 }, }, - { .n = "classd_gclk", .id = 59, .r = { .min = 0, .max = 100000000 }, - .pll = true }, + { .n = "sdmmc0_gclk", .id = 31, .chg_pid = INT_MIN, }, + { .n = "sdmmc1_gclk", .id = 32, .chg_pid = INT_MIN, }, + { .n = "tcb0_gclk", .id = 35, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "tcb1_gclk", .id = 36, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "pwm_gclk", .id = 38, .chg_pid = INT_MIN, .r = { .min = 0, .max = 83000000 }, }, + { .n = "isc_gclk", .id = 46, .chg_pid = INT_MIN, }, + { .n = "pdmic_gclk", .id = 48, .chg_pid = INT_MIN, }, + { .n = "i2s0_gclk", .id = 54, .chg_pid = 5, }, + { .n = "i2s1_gclk", .id = 55, .chg_pid = 5, }, + { .n = "can0_gclk", .id = 56, .chg_pid = INT_MIN, .r = { .min = 0, .max = 80000000 }, }, + { .n = "can1_gclk", .id = 57, .chg_pid = INT_MIN, .r = { .min = 0, .max = 80000000 }, }, + { .n = "classd_gclk", .id = 59, .chg_pid = 5, .r = { .min = 0, .max = 100000000 }, }, }; static const struct clk_programmable_layout sama5d2_programmable_layout = { @@ -269,7 +268,8 @@ static void __init sama5d2_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 6, i, - &sama5d2_programmable_layout); + &sama5d2_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; @@ -292,7 +292,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np) sama5d2_periphck[i].n, "masterck", sama5d2_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; @@ -305,7 +305,8 @@ static void __init sama5d2_pmc_setup(struct device_node *np) sama5d2_periph32ck[i].n, "h32mxck", sama5d2_periph32ck[i].id, - &sama5d2_periph32ck[i].r); + &sama5d2_periph32ck[i].r, + INT_MIN); if (IS_ERR(hw)) goto err_free; @@ -322,10 +323,10 @@ static void __init sama5d2_pmc_setup(struct device_node *np) hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, &sama5d2_pcr_layout, sama5d2_gck[i].n, - parent_names, 6, + parent_names, NULL, 6, sama5d2_gck[i].id, - sama5d2_gck[i].pll, - &sama5d2_gck[i].r); + &sama5d2_gck[i].r, + sama5d2_gck[i].chg_pid); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d3.c b/drivers/clk/at91/sama5d3.c index 5e4e44dd4c37..7c6e0a5b9dc8 100644 --- a/drivers/clk/at91/sama5d3.c +++ b/drivers/clk/at91/sama5d3.c @@ -121,7 +121,7 @@ static void __init sama5d3_pmc_setup(struct device_node *np) return; mainxtal_name = of_clk_get_parent_name(np, i); - regmap = syscon_node_to_regmap(np); + regmap = device_node_to_regmap(np); if (IS_ERR(regmap)) return; @@ -200,7 +200,8 @@ static void __init sama5d3_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9x5_programmable_layout); + &at91sam9x5_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; @@ -223,7 +224,8 @@ static void __init sama5d3_pmc_setup(struct device_node *np) sama5d3_periphck[i].n, "masterck", sama5d3_periphck[i].id, - &sama5d3_periphck[i].r); + &sama5d3_periphck[i].r, + INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama5d4.c b/drivers/clk/at91/sama5d4.c index 662ff5fa6e98..92d8d4141b43 100644 --- a/drivers/clk/at91/sama5d4.c +++ b/drivers/clk/at91/sama5d4.c @@ -223,7 +223,8 @@ static void __init sama5d4_pmc_setup(struct device_node *np) hw = at91_clk_register_programmable(regmap, name, parent_names, 5, i, - &at91sam9x5_programmable_layout); + &at91sam9x5_programmable_layout, + NULL); if (IS_ERR(hw)) goto err_free; @@ -246,7 +247,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np) sama5d4_periphck[i].n, "masterck", sama5d4_periphck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; @@ -259,7 +260,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np) sama5d4_periph32ck[i].n, "h32mxck", sama5d4_periph32ck[i].id, - &range); + &range, INT_MIN); if (IS_ERR(hw)) goto err_free; diff --git a/drivers/clk/at91/sama7g5.c b/drivers/clk/at91/sama7g5.c new file mode 100644 index 000000000000..0db2ab3eca14 --- /dev/null +++ b/drivers/clk/at91/sama7g5.c @@ -0,0 +1,1059 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SAMA7G5 PMC code. + * + * Copyright (C) 2020 Microchip Technology Inc. and its subsidiaries + * + * Author: Claudiu Beznea <[email protected]> + * + */ +#include <linux/clk.h> +#include <linux/clk-provider.h> +#include <linux/mfd/syscon.h> +#include <linux/slab.h> + +#include <dt-bindings/clock/at91.h> + +#include "pmc.h" + +#define SAMA7G5_INIT_TABLE(_table, _count) \ + do { \ + u8 _i; \ + for (_i = 0; _i < (_count); _i++) \ + (_table)[_i] = _i; \ + } while (0) + +#define SAMA7G5_FILL_TABLE(_to, _from, _count) \ + do { \ + u8 _i; \ + for (_i = 0; _i < (_count); _i++) { \ + (_to)[_i] = (_from)[_i]; \ + } \ + } while (0) + +static DEFINE_SPINLOCK(pmc_pll_lock); +static DEFINE_SPINLOCK(pmc_mckX_lock); + +/** + * PLL clocks identifiers + * @PLL_ID_CPU: CPU PLL identifier + * @PLL_ID_SYS: System PLL identifier + * @PLL_ID_DDR: DDR PLL identifier + * @PLL_ID_IMG: Image subsystem PLL identifier + * @PLL_ID_BAUD: Baud PLL identifier + * @PLL_ID_AUDIO: Audio PLL identifier + * @PLL_ID_ETH: Ethernet PLL identifier + */ +enum pll_ids { + PLL_ID_CPU, + PLL_ID_SYS, + PLL_ID_DDR, + PLL_ID_IMG, + PLL_ID_BAUD, + PLL_ID_AUDIO, + PLL_ID_ETH, + PLL_ID_MAX, +}; + +/** + * PLL type identifiers + * @PLL_TYPE_FRAC: fractional PLL identifier + * @PLL_TYPE_DIV: divider PLL identifier + */ +enum pll_type { + PLL_TYPE_FRAC, + PLL_TYPE_DIV, +}; + +/* Layout for fractional PLLs. */ +static const struct clk_pll_layout pll_layout_frac = { + .mul_mask = GENMASK(31, 24), + .frac_mask = GENMASK(21, 0), + .mul_shift = 24, + .frac_shift = 0, +}; + +/* Layout for DIVPMC dividers. */ +static const struct clk_pll_layout pll_layout_divpmc = { + .div_mask = GENMASK(7, 0), + .endiv_mask = BIT(29), + .div_shift = 0, + .endiv_shift = 29, +}; + +/* Layout for DIVIO dividers. */ +static const struct clk_pll_layout pll_layout_divio = { + .div_mask = GENMASK(19, 12), + .endiv_mask = BIT(30), + .div_shift = 12, + .endiv_shift = 30, +}; + +/** + * PLL clocks description + * @n: clock name + * @p: clock parent + * @l: clock layout + * @t: clock type + * @f: true if clock is critical and cannot be disabled + * @eid: export index in sama7g5->chws[] array + */ +static const struct { + const char *n; + const char *p; + const struct clk_pll_layout *l; + u8 t; + u8 c; + u8 eid; +} sama7g5_plls[][PLL_ID_MAX] = { + [PLL_ID_CPU] = { + { .n = "cpupll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, + .c = 1, }, + + { .n = "cpupll_divpmcck", + .p = "cpupll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, + .c = 1, }, + }, + + [PLL_ID_SYS] = { + { .n = "syspll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, + .c = 1, }, + + { .n = "syspll_divpmcck", + .p = "syspll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, + .c = 1, }, + }, + + [PLL_ID_DDR] = { + { .n = "ddrpll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, + .c = 1, }, + + { .n = "ddrpll_divpmcck", + .p = "ddrpll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, + .c = 1, }, + }, + + [PLL_ID_IMG] = { + { .n = "imgpll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, }, + + { .n = "imgpll_divpmcck", + .p = "imgpll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, }, + }, + + [PLL_ID_BAUD] = { + { .n = "baudpll_fracck", + .p = "mainck", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, }, + + { .n = "baudpll_divpmcck", + .p = "baudpll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, }, + }, + + [PLL_ID_AUDIO] = { + { .n = "audiopll_fracck", + .p = "main_xtal", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, }, + + { .n = "audiopll_divpmcck", + .p = "audiopll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, + .eid = PMC_I2S0_MUX, }, + + { .n = "audiopll_diviock", + .p = "audiopll_fracck", + .l = &pll_layout_divio, + .t = PLL_TYPE_DIV, + .eid = PMC_I2S1_MUX, }, + }, + + [PLL_ID_ETH] = { + { .n = "ethpll_fracck", + .p = "main_xtal", + .l = &pll_layout_frac, + .t = PLL_TYPE_FRAC, }, + + { .n = "ethpll_divpmcck", + .p = "ethpll_fracck", + .l = &pll_layout_divpmc, + .t = PLL_TYPE_DIV, }, + }, +}; + +/** + * Master clock (MCK[1..4]) description + * @n: clock name + * @ep: extra parents names array + * @ep_chg_chg_id: index in parents array that specifies the changeable + * parent + * @ep_count: extra parents count + * @ep_mux_table: mux table for extra parents + * @id: clock id + * @c: true if clock is critical and cannot be disabled + */ +static const struct { + const char *n; + const char *ep[4]; + int ep_chg_id; + u8 ep_count; + u8 ep_mux_table[4]; + u8 id; + u8 c; +} sama7g5_mckx[] = { + { .n = "mck1", + .id = 1, + .ep = { "syspll_divpmcck", }, + .ep_mux_table = { 5, }, + .ep_count = 1, + .ep_chg_id = INT_MIN, + .c = 1, }, + + { .n = "mck2", + .id = 2, + .ep = { "ddrpll_divpmcck", }, + .ep_mux_table = { 6, }, + .ep_count = 1, + .ep_chg_id = INT_MIN, + .c = 1, }, + + { .n = "mck3", + .id = 3, + .ep = { "syspll_divpmcck", "ddrpll_divpmcck", "imgpll_divpmcck", }, + .ep_mux_table = { 5, 6, 7, }, + .ep_count = 3, + .ep_chg_id = 6, }, + + { .n = "mck4", + .id = 4, + .ep = { "syspll_divpmcck", }, + .ep_mux_table = { 5, }, + .ep_count = 1, + .ep_chg_id = INT_MIN, + .c = 1, }, +}; + +/** + * System clock description + * @n: clock name + * @p: clock parent name + * @id: clock id + */ +static const struct { + const char *n; + const char *p; + u8 id; +} sama7g5_systemck[] = { + { .n = "pck0", .p = "prog0", .id = 8, }, + { .n = "pck1", .p = "prog1", .id = 9, }, + { .n = "pck2", .p = "prog2", .id = 10, }, + { .n = "pck3", .p = "prog3", .id = 11, }, + { .n = "pck4", .p = "prog4", .id = 12, }, + { .n = "pck5", .p = "prog5", .id = 13, }, + { .n = "pck6", .p = "prog6", .id = 14, }, + { .n = "pck7", .p = "prog7", .id = 15, }, +}; + +/* Mux table for programmable clocks. */ +static u32 sama7g5_prog_mux_table[] = { 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, }; + +/** + * Peripheral clock description + * @n: clock name + * @p: clock parent name + * @r: clock range values + * @id: clock id + * @chgp: index in parent array of the changeable parent + */ +static const struct { + const char *n; + const char *p; + struct clk_range r; + u8 chgp; + u8 id; +} sama7g5_periphck[] = { + { .n = "pioA_clk", .p = "mck0", .id = 11, }, + { .n = "sfr_clk", .p = "mck1", .id = 19, }, + { .n = "hsmc_clk", .p = "mck1", .id = 21, }, + { .n = "xdmac0_clk", .p = "mck1", .id = 22, }, + { .n = "xdmac1_clk", .p = "mck1", .id = 23, }, + { .n = "xdmac2_clk", .p = "mck1", .id = 24, }, + { .n = "acc_clk", .p = "mck1", .id = 25, }, + { .n = "aes_clk", .p = "mck1", .id = 27, }, + { .n = "tzaesbasc_clk", .p = "mck1", .id = 28, }, + { .n = "asrc_clk", .p = "mck1", .id = 30, .r = { .max = 200000000, }, }, + { .n = "cpkcc_clk", .p = "mck0", .id = 32, }, + { .n = "csi_clk", .p = "mck3", .id = 33, .r = { .max = 266000000, }, .chgp = 1, }, + { .n = "csi2dc_clk", .p = "mck3", .id = 34, .r = { .max = 266000000, }, .chgp = 1, }, + { .n = "eic_clk", .p = "mck1", .id = 37, }, + { .n = "flex0_clk", .p = "mck1", .id = 38, }, + { .n = "flex1_clk", .p = "mck1", .id = 39, }, + { .n = "flex2_clk", .p = "mck1", .id = 40, }, + { .n = "flex3_clk", .p = "mck1", .id = 41, }, + { .n = "flex4_clk", .p = "mck1", .id = 42, }, + { .n = "flex5_clk", .p = "mck1", .id = 43, }, + { .n = "flex6_clk", .p = "mck1", .id = 44, }, + { .n = "flex7_clk", .p = "mck1", .id = 45, }, + { .n = "flex8_clk", .p = "mck1", .id = 46, }, + { .n = "flex9_clk", .p = "mck1", .id = 47, }, + { .n = "flex10_clk", .p = "mck1", .id = 48, }, + { .n = "flex11_clk", .p = "mck1", .id = 49, }, + { .n = "gmac0_clk", .p = "mck1", .id = 51, }, + { .n = "gmac1_clk", .p = "mck1", .id = 52, }, + { .n = "icm_clk", .p = "mck1", .id = 55, }, + { .n = "isc_clk", .p = "mck3", .id = 56, .r = { .max = 266000000, }, .chgp = 1, }, + { .n = "i2smcc0_clk", .p = "mck1", .id = 57, .r = { .max = 200000000, }, }, + { .n = "i2smcc1_clk", .p = "mck1", .id = 58, .r = { .max = 200000000, }, }, + { .n = "matrix_clk", .p = "mck1", .id = 60, }, + { .n = "mcan0_clk", .p = "mck1", .id = 61, .r = { .max = 200000000, }, }, + { .n = "mcan1_clk", .p = "mck1", .id = 62, .r = { .max = 200000000, }, }, + { .n = "mcan2_clk", .p = "mck1", .id = 63, .r = { .max = 200000000, }, }, + { .n = "mcan3_clk", .p = "mck1", .id = 64, .r = { .max = 200000000, }, }, + { .n = "mcan4_clk", .p = "mck1", .id = 65, .r = { .max = 200000000, }, }, + { .n = "mcan5_clk", .p = "mck1", .id = 66, .r = { .max = 200000000, }, }, + { .n = "pdmc0_clk", .p = "mck1", .id = 68, .r = { .max = 200000000, }, }, + { .n = "pdmc1_clk", .p = "mck1", .id = 69, .r = { .max = 200000000, }, }, + { .n = "pit64b0_clk", .p = "mck1", .id = 70, }, + { .n = "pit64b1_clk", .p = "mck1", .id = 71, }, + { .n = "pit64b2_clk", .p = "mck1", .id = 72, }, + { .n = "pit64b3_clk", .p = "mck1", .id = 73, }, + { .n = "pit64b4_clk", .p = "mck1", .id = 74, }, + { .n = "pit64b5_clk", .p = "mck1", .id = 75, }, + { .n = "pwm_clk", .p = "mck1", .id = 77, }, + { .n = "qspi0_clk", .p = "mck1", .id = 78, }, + { .n = "qspi1_clk", .p = "mck1", .id = 79, }, + { .n = "sdmmc0_clk", .p = "mck1", .id = 80, }, + { .n = "sdmmc1_clk", .p = "mck1", .id = 81, }, + { .n = "sdmmc2_clk", .p = "mck1", .id = 82, }, + { .n = "sha_clk", .p = "mck1", .id = 83, }, + { .n = "spdifrx_clk", .p = "mck1", .id = 84, .r = { .max = 200000000, }, }, + { .n = "spdiftx_clk", .p = "mck1", .id = 85, .r = { .max = 200000000, }, }, + { .n = "ssc0_clk", .p = "mck1", .id = 86, .r = { .max = 200000000, }, }, + { .n = "ssc1_clk", .p = "mck1", .id = 87, .r = { .max = 200000000, }, }, + { .n = "tcb0_ch0_clk", .p = "mck1", .id = 88, .r = { .max = 200000000, }, }, + { .n = "tcb0_ch1_clk", .p = "mck1", .id = 89, .r = { .max = 200000000, }, }, + { .n = "tcb0_ch2_clk", .p = "mck1", .id = 90, .r = { .max = 200000000, }, }, + { .n = "tcb1_ch0_clk", .p = "mck1", .id = 91, .r = { .max = 200000000, }, }, + { .n = "tcb1_ch1_clk", .p = "mck1", .id = 92, .r = { .max = 200000000, }, }, + { .n = "tcb1_ch2_clk", .p = "mck1", .id = 93, .r = { .max = 200000000, }, }, + { .n = "tcpca_clk", .p = "mck1", .id = 94, }, + { .n = "tcpcb_clk", .p = "mck1", .id = 95, }, + { .n = "tdes_clk", .p = "mck1", .id = 96, }, + { .n = "trng_clk", .p = "mck1", .id = 97, }, + { .n = "udphsa_clk", .p = "mck1", .id = 104, }, + { .n = "udphsb_clk", .p = "mck1", .id = 105, }, + { .n = "uhphs_clk", .p = "mck1", .id = 106, }, +}; + +/** + * Generic clock description + * @n: clock name + * @pp: PLL parents + * @pp_mux_table: PLL parents mux table + * @r: clock output range + * @pp_chg_id: id in parrent array of changeable PLL parent + * @pp_count: PLL parents count + * @id: clock id + */ +static const struct { + const char *n; + const char *pp[8]; + const char pp_mux_table[8]; + struct clk_range r; + int pp_chg_id; + u8 pp_count; + u8 id; +} sama7g5_gck[] = { + { .n = "adc_gclk", + .id = 26, + .r = { .max = 100000000, }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 7, 9, }, + .pp_count = 3, + .pp_chg_id = INT_MIN, }, + + { .n = "asrc_gclk", + .id = 30, + .r = { .max = 200000000 }, + .pp = { "audiopll_divpmcck", }, + .pp_mux_table = { 9, }, + .pp_count = 1, + .pp_chg_id = 4, }, + + { .n = "csi_gclk", + .id = 33, + .r = { .max = 27000000 }, + .pp = { "ddrpll_divpmcck", "imgpll_divpmcck", }, + .pp_mux_table = { 6, 7, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex0_gclk", + .id = 38, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex1_gclk", + .id = 39, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex2_gclk", + .id = 40, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex3_gclk", + .id = 41, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex4_gclk", + .id = 42, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex5_gclk", + .id = 43, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex6_gclk", + .id = 44, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex7_gclk", + .id = 45, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex8_gclk", + .id = 46, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex9_gclk", + .id = 47, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex10_gclk", + .id = 48, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "flex11_gclk", + .id = 49, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "gmac0_gclk", + .id = 51, + .r = { .max = 125000000 }, + .pp = { "ethpll_divpmcck", }, + .pp_mux_table = { 10, }, + .pp_count = 1, + .pp_chg_id = 4, }, + + { .n = "gmac1_gclk", + .id = 52, + .r = { .max = 50000000 }, + .pp = { "ethpll_divpmcck", }, + .pp_mux_table = { 10, }, + .pp_count = 1, + .pp_chg_id = INT_MIN, }, + + { .n = "gmac0_tsu_gclk", + .id = 53, + .r = { .max = 300000000 }, + .pp = { "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 9, 10, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "gmac1_tsu_gclk", + .id = 54, + .r = { .max = 300000000 }, + .pp = { "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 9, 10, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "i2smcc0_gclk", + .id = 57, + .r = { .max = 100000000 }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "i2smcc1_gclk", + .id = 58, + .r = { .max = 100000000 }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "mcan0_gclk", + .id = 61, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan1_gclk", + .id = 62, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan2_gclk", + .id = 63, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan3_gclk", + .id = 64, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan4_gclk", + .id = 65, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "mcan5_gclk", + .id = 66, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "pdmc0_gclk", + .id = 68, + .r = { .max = 50000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "pdmc1_gclk", + .id = 69, + .r = { .max = 50000000, }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b0_gclk", + .id = 70, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b1_gclk", + .id = 71, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b2_gclk", + .id = 72, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b3_gclk", + .id = 73, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b4_gclk", + .id = 74, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "pit64b5_gclk", + .id = 75, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "qspi0_gclk", + .id = 78, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "qspi1_gclk", + .id = 79, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = INT_MIN, }, + + { .n = "sdmmc0_gclk", + .id = 80, + .r = { .max = 208000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "sdmmc1_gclk", + .id = 81, + .r = { .max = 208000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "sdmmc2_gclk", + .id = 82, + .r = { .max = 208000000 }, + .pp = { "syspll_divpmcck", "baudpll_divpmcck", }, + .pp_mux_table = { 5, 8, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "spdifrx_gclk", + .id = 84, + .r = { .max = 150000000 }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "spdiftx_gclk", + .id = 85, + .r = { .max = 25000000 }, + .pp = { "syspll_divpmcck", "audiopll_divpmcck", }, + .pp_mux_table = { 5, 9, }, + .pp_count = 2, + .pp_chg_id = 5, }, + + { .n = "tcb0_ch0_gclk", + .id = 88, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "tcb1_ch0_gclk", + .id = 91, + .r = { .max = 200000000 }, + .pp = { "syspll_divpmcck", "imgpll_divpmcck", "baudpll_divpmcck", + "audiopll_divpmcck", "ethpll_divpmcck", }, + .pp_mux_table = { 5, 7, 8, 9, 10, }, + .pp_count = 5, + .pp_chg_id = INT_MIN, }, + + { .n = "tcpca_gclk", + .id = 94, + .r = { .max = 32768, }, + .pp_chg_id = INT_MIN, }, + + { .n = "tcpcb_gclk", + .id = 95, + .r = { .max = 32768, }, + .pp_chg_id = INT_MIN, }, +}; + +/* PLL output range. */ +static const struct clk_range pll_outputs[] = { + { .min = 2343750, .max = 1200000000 }, +}; + +/* PLL characteristics. */ +static const struct clk_pll_characteristics pll_characteristics = { + .input = { .min = 12000000, .max = 50000000 }, + .num_output = ARRAY_SIZE(pll_outputs), + .output = pll_outputs, +}; + +/* MCK0 characteristics. */ +static const struct clk_master_characteristics mck0_characteristics = { + .output = { .min = 140000000, .max = 200000000 }, + .divisors = { 1, 2, 4, 3 }, + .have_div3_pres = 1, +}; + +/* MCK0 layout. */ +static const struct clk_master_layout mck0_layout = { + .mask = 0x373, + .pres_shift = 4, + .offset = 0x28, +}; + +/* Programmable clock layout. */ +static const struct clk_programmable_layout programmable_layout = { + .pres_mask = 0xff, + .pres_shift = 8, + .css_mask = 0x1f, + .have_slck_mck = 0, + .is_pres_direct = 1, +}; + +/* Peripheral clock layout. */ +static const struct clk_pcr_layout sama7g5_pcr_layout = { + .offset = 0x88, + .cmd = BIT(31), + .gckcss_mask = GENMASK(12, 8), + .pid_mask = GENMASK(6, 0), +}; + +static void __init sama7g5_pmc_setup(struct device_node *np) +{ + const char *td_slck_name, *md_slck_name, *mainxtal_name; + struct pmc_data *sama7g5_pmc; + const char *parent_names[10]; + void **alloc_mem = NULL; + int alloc_mem_size = 0; + struct regmap *regmap; + struct clk_hw *hw; + bool bypass; + int i, j; + + i = of_property_match_string(np, "clock-names", "td_slck"); + if (i < 0) + return; + + td_slck_name = of_clk_get_parent_name(np, i); + + i = of_property_match_string(np, "clock-names", "md_slck"); + if (i < 0) + return; + + md_slck_name = of_clk_get_parent_name(np, i); + + i = of_property_match_string(np, "clock-names", "main_xtal"); + if (i < 0) + return; + + mainxtal_name = of_clk_get_parent_name(np, i); + + regmap = device_node_to_regmap(np); + if (IS_ERR(regmap)) + return; + + sama7g5_pmc = pmc_data_allocate(PMC_I2S1_MUX + 1, + nck(sama7g5_systemck), + nck(sama7g5_periphck), + nck(sama7g5_gck)); + if (!sama7g5_pmc) + return; + + alloc_mem = kmalloc(sizeof(void *) * + (ARRAY_SIZE(sama7g5_mckx) + ARRAY_SIZE(sama7g5_gck)), + GFP_KERNEL); + if (!alloc_mem) + goto err_free; + + hw = at91_clk_register_main_rc_osc(regmap, "main_rc_osc", 12000000, + 50000000); + if (IS_ERR(hw)) + goto err_free; + + bypass = of_property_read_bool(np, "atmel,osc-bypass"); + + hw = at91_clk_register_main_osc(regmap, "main_osc", mainxtal_name, + bypass); + if (IS_ERR(hw)) + goto err_free; + + parent_names[0] = "main_rc_osc"; + parent_names[1] = "main_osc"; + hw = at91_clk_register_sam9x5_main(regmap, "mainck", parent_names, 2); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->chws[PMC_MAIN] = hw; + + for (i = 0; i < PLL_ID_MAX; i++) { + for (j = 0; j < 3; j++) { + struct clk_hw *parent_hw; + + if (!sama7g5_plls[i][j].n) + continue; + + switch (sama7g5_plls[i][j].t) { + case PLL_TYPE_FRAC: + if (!strcmp(sama7g5_plls[i][j].p, "mainck")) + parent_hw = sama7g5_pmc->chws[PMC_MAIN]; + else + parent_hw = __clk_get_hw(of_clk_get_by_name(np, + sama7g5_plls[i][j].p)); + + hw = sam9x60_clk_register_frac_pll(regmap, + &pmc_pll_lock, sama7g5_plls[i][j].n, + sama7g5_plls[i][j].p, parent_hw, i, + &pll_characteristics, + sama7g5_plls[i][j].l, + sama7g5_plls[i][j].c); + break; + + case PLL_TYPE_DIV: + hw = sam9x60_clk_register_div_pll(regmap, + &pmc_pll_lock, sama7g5_plls[i][j].n, + sama7g5_plls[i][j].p, i, + &pll_characteristics, + sama7g5_plls[i][j].l, + sama7g5_plls[i][j].c); + break; + + default: + continue; + } + + if (IS_ERR(hw)) + goto err_free; + + if (sama7g5_plls[i][j].eid) + sama7g5_pmc->chws[sama7g5_plls[i][j].eid] = hw; + } + } + + parent_names[0] = md_slck_name; + parent_names[1] = "mainck"; + parent_names[2] = "cpupll_divpmcck"; + parent_names[3] = "syspll_divpmcck"; + hw = at91_clk_register_master(regmap, "mck0", 4, parent_names, + &mck0_layout, &mck0_characteristics); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->chws[PMC_MCK] = hw; + + parent_names[0] = md_slck_name; + parent_names[1] = td_slck_name; + parent_names[2] = "mainck"; + parent_names[3] = "mck0"; + for (i = 0; i < ARRAY_SIZE(sama7g5_mckx); i++) { + u8 num_parents = 4 + sama7g5_mckx[i].ep_count; + u32 *mux_table; + + mux_table = kmalloc_array(num_parents, sizeof(*mux_table), + GFP_KERNEL); + if (!mux_table) + goto err_free; + + SAMA7G5_INIT_TABLE(mux_table, 4); + SAMA7G5_FILL_TABLE(&mux_table[4], sama7g5_mckx[i].ep_mux_table, + sama7g5_mckx[i].ep_count); + SAMA7G5_FILL_TABLE(&parent_names[4], sama7g5_mckx[i].ep, + sama7g5_mckx[i].ep_count); + + hw = at91_clk_sama7g5_register_master(regmap, sama7g5_mckx[i].n, + num_parents, parent_names, mux_table, + &pmc_mckX_lock, sama7g5_mckx[i].id, + sama7g5_mckx[i].c, + sama7g5_mckx[i].ep_chg_id); + if (IS_ERR(hw)) + goto err_free; + + alloc_mem[alloc_mem_size++] = mux_table; + } + + hw = at91_clk_sama7g5_register_utmi(regmap, "utmick", "main_xtal"); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->chws[PMC_UTMI] = hw; + + parent_names[0] = md_slck_name; + parent_names[1] = td_slck_name; + parent_names[2] = "mainck"; + parent_names[3] = "mck0"; + parent_names[4] = "syspll_divpmcck"; + parent_names[5] = "ddrpll_divpmcck"; + parent_names[6] = "imgpll_divpmcck"; + parent_names[7] = "baudpll_divpmcck"; + parent_names[8] = "audiopll_divpmcck"; + parent_names[9] = "ethpll_divpmcck"; + for (i = 0; i < 8; i++) { + char name[6]; + + snprintf(name, sizeof(name), "prog%d", i); + + hw = at91_clk_register_programmable(regmap, name, parent_names, + 10, i, + &programmable_layout, + sama7g5_prog_mux_table); + if (IS_ERR(hw)) + goto err_free; + } + + for (i = 0; i < ARRAY_SIZE(sama7g5_systemck); i++) { + hw = at91_clk_register_system(regmap, sama7g5_systemck[i].n, + sama7g5_systemck[i].p, + sama7g5_systemck[i].id); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->shws[sama7g5_systemck[i].id] = hw; + } + + for (i = 0; i < ARRAY_SIZE(sama7g5_periphck); i++) { + hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock, + &sama7g5_pcr_layout, + sama7g5_periphck[i].n, + sama7g5_periphck[i].p, + sama7g5_periphck[i].id, + &sama7g5_periphck[i].r, + sama7g5_periphck[i].chgp ? 0 : + INT_MIN); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->phws[sama7g5_periphck[i].id] = hw; + } + + parent_names[0] = md_slck_name; + parent_names[1] = td_slck_name; + parent_names[2] = "mainck"; + parent_names[3] = "mck0"; + for (i = 0; i < ARRAY_SIZE(sama7g5_gck); i++) { + u8 num_parents = 4 + sama7g5_gck[i].pp_count; + u32 *mux_table; + + mux_table = kmalloc_array(num_parents, sizeof(*mux_table), + GFP_KERNEL); + if (!mux_table) + goto err_free; + + SAMA7G5_INIT_TABLE(mux_table, 4); + SAMA7G5_FILL_TABLE(&mux_table[4], sama7g5_gck[i].pp_mux_table, + sama7g5_gck[i].pp_count); + SAMA7G5_FILL_TABLE(&parent_names[4], sama7g5_gck[i].pp, + sama7g5_gck[i].pp_count); + + hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, + &sama7g5_pcr_layout, + sama7g5_gck[i].n, + parent_names, mux_table, + num_parents, + sama7g5_gck[i].id, + &sama7g5_gck[i].r, + sama7g5_gck[i].pp_chg_id); + if (IS_ERR(hw)) + goto err_free; + + sama7g5_pmc->ghws[sama7g5_gck[i].id] = hw; + alloc_mem[alloc_mem_size++] = mux_table; + } + + of_clk_add_hw_provider(np, of_clk_hw_pmc_get, sama7g5_pmc); + + return; + +err_free: + if (alloc_mem) { + for (i = 0; i < alloc_mem_size; i++) + kfree(alloc_mem[i]); + kfree(alloc_mem); + } + + pmc_data_free(sama7g5_pmc); +} + +/* Some clks are used for a clocksource */ +CLK_OF_DECLARE(sama7g5_pmc, "microchip,sama7g5-pmc", sama7g5_pmc_setup); diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c index 15dc4cd86d76..2d65770d8665 100644 --- a/drivers/clk/at91/sckc.c +++ b/drivers/clk/at91/sckc.c @@ -471,8 +471,9 @@ static void __init of_sam9x60_sckc_setup(struct device_node *np) if (!regbase) return; - slow_rc = clk_hw_register_fixed_rate(NULL, parent_names[0], NULL, 0, - 32768); + slow_rc = clk_hw_register_fixed_rate_with_accuracy(NULL, parent_names[0], + NULL, 0, 32768, + 93750000); if (IS_ERR(slow_rc)) return; diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 027eba31f793..3439bc65bb4e 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -314,6 +314,7 @@ struct bcm2835_cprman { struct device *dev; void __iomem *regs; spinlock_t regs_lock; /* spinlock for all clocks */ + unsigned int soc; /* * Real names of cprman clock parents looked up through @@ -526,6 +527,20 @@ static int bcm2835_pll_is_on(struct clk_hw *hw) A2W_PLL_CTRL_PRST_DISABLE; } +static u32 bcm2835_pll_get_prediv_mask(struct bcm2835_cprman *cprman, + const struct bcm2835_pll_data *data) +{ + /* + * On BCM2711 there isn't a pre-divisor available in the PLL feedback + * loop. Bits 13:14 of ANA1 (PLLA,PLLB,PLLC,PLLD) have been re-purposed + * for to for VCO RANGE bits. + */ + if (cprman->soc & SOC_BCM2711) + return 0; + + return data->ana->fb_prediv_mask; +} + static void bcm2835_pll_choose_ndiv_and_fdiv(unsigned long rate, unsigned long parent_rate, u32 *ndiv, u32 *fdiv) @@ -583,7 +598,7 @@ static unsigned long bcm2835_pll_get_rate(struct clk_hw *hw, ndiv = (a2wctrl & A2W_PLL_CTRL_NDIV_MASK) >> A2W_PLL_CTRL_NDIV_SHIFT; pdiv = (a2wctrl & A2W_PLL_CTRL_PDIV_MASK) >> A2W_PLL_CTRL_PDIV_SHIFT; using_prediv = cprman_read(cprman, data->ana_reg_base + 4) & - data->ana->fb_prediv_mask; + bcm2835_pll_get_prediv_mask(cprman, data); if (using_prediv) { ndiv *= 2; @@ -666,6 +681,7 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, struct bcm2835_pll *pll = container_of(hw, struct bcm2835_pll, hw); struct bcm2835_cprman *cprman = pll->cprman; const struct bcm2835_pll_data *data = pll->data; + u32 prediv_mask = bcm2835_pll_get_prediv_mask(cprman, data); bool was_using_prediv, use_fb_prediv, do_ana_setup_first; u32 ndiv, fdiv, a2w_ctl; u32 ana[4]; @@ -683,7 +699,7 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, for (i = 3; i >= 0; i--) ana[i] = cprman_read(cprman, data->ana_reg_base + i * 4); - was_using_prediv = ana[1] & data->ana->fb_prediv_mask; + was_using_prediv = ana[1] & prediv_mask; ana[0] &= ~data->ana->mask0; ana[0] |= data->ana->set0; @@ -693,10 +709,10 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, ana[3] |= data->ana->set3; if (was_using_prediv && !use_fb_prediv) { - ana[1] &= ~data->ana->fb_prediv_mask; + ana[1] &= ~prediv_mask; do_ana_setup_first = true; } else if (!was_using_prediv && use_fb_prediv) { - ana[1] |= data->ana->fb_prediv_mask; + ana[1] |= prediv_mask; do_ana_setup_first = false; } else { do_ana_setup_first = true; @@ -2262,6 +2278,7 @@ static int bcm2835_clk_probe(struct platform_device *pdev) platform_set_drvdata(pdev, cprman); cprman->onecell.num = asize; + cprman->soc = pdata->soc; hws = cprman->onecell.hws; for (i = 0; i < asize; i++) { diff --git a/drivers/clk/bcm/clk-iproc-asiu.c b/drivers/clk/bcm/clk-iproc-asiu.c index 6fb8af506777..e062dd4992ea 100644 --- a/drivers/clk/bcm/clk-iproc-asiu.c +++ b/drivers/clk/bcm/clk-iproc-asiu.c @@ -119,7 +119,7 @@ static long iproc_asiu_clk_round_rate(struct clk_hw *hw, unsigned long rate, if (rate == *parent_rate) return *parent_rate; - div = DIV_ROUND_UP(*parent_rate, rate); + div = DIV_ROUND_CLOSEST(*parent_rate, rate); if (div < 2) return *parent_rate; @@ -145,7 +145,7 @@ static int iproc_asiu_clk_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } - div = DIV_ROUND_UP(parent_rate, rate); + div = DIV_ROUND_CLOSEST(parent_rate, rate); if (div < 2) return -EINVAL; diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c index 374afcab89af..5942e9874bc0 100644 --- a/drivers/clk/clk-qoriq.c +++ b/drivers/clk/clk-qoriq.c @@ -244,6 +244,14 @@ static const struct clockgen_muxinfo clockgen2_cmux_cgb = { }, }; +static const struct clockgen_muxinfo ls1021a_cmux = { + { + { CLKSEL_VALID, CGA_PLL1, PLL_DIV1 }, + { CLKSEL_VALID, CGA_PLL1, PLL_DIV2 }, + { CLKSEL_VALID, CGA_PLL1, PLL_DIV4 }, + } +}; + static const struct clockgen_muxinfo ls1028a_hwa1 = { { { CLKSEL_VALID, PLATFORM_PLL, PLL_DIV1 }, @@ -577,7 +585,7 @@ static const struct clockgen_chipinfo chipinfo[] = { { .compat = "fsl,ls1021a-clockgen", .cmux_groups = { - &t1023_cmux + &ls1021a_cmux }, .cmux_to_group = { 0, -1 diff --git a/drivers/clk/clk-sparx5.c b/drivers/clk/clk-sparx5.c new file mode 100644 index 000000000000..0fad0c1a0186 --- /dev/null +++ b/drivers/clk/clk-sparx5.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Microchip Sparx5 SoC Clock driver. + * + * Copyright (c) 2019 Microchip Inc. + * + * Author: Lars Povlsen <[email protected]> + */ + +#include <linux/io.h> +#include <linux/module.h> +#include <linux/clk-provider.h> +#include <linux/bitfield.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/platform_device.h> +#include <dt-bindings/clock/microchip,sparx5.h> + +#define PLL_DIV GENMASK(7, 0) +#define PLL_PRE_DIV GENMASK(10, 8) +#define PLL_ROT_DIR BIT(11) +#define PLL_ROT_SEL GENMASK(13, 12) +#define PLL_ROT_ENA BIT(14) +#define PLL_CLK_ENA BIT(15) + +#define MAX_SEL 4 +#define MAX_PRE BIT(3) + +static const u8 sel_rates[MAX_SEL] = { 0, 2*8, 2*4, 2*2 }; + +static const char *clk_names[N_CLOCKS] = { + "core", "ddr", "cpu2", "arm2", + "aux1", "aux2", "aux3", "aux4", + "synce", +}; + +struct s5_hw_clk { + struct clk_hw hw; + void __iomem *reg; +}; + +struct s5_clk_data { + void __iomem *base; + struct s5_hw_clk s5_hw[N_CLOCKS]; +}; + +struct s5_pll_conf { + unsigned long freq; + u8 div; + bool rot_ena; + u8 rot_sel; + u8 rot_dir; + u8 pre_div; +}; + +#define to_s5_pll(hw) container_of(hw, struct s5_hw_clk, hw) + +static unsigned long s5_calc_freq(unsigned long parent_rate, + const struct s5_pll_conf *conf) +{ + unsigned long rate = parent_rate / conf->div; + + if (conf->rot_ena) { + int sign = conf->rot_dir ? -1 : 1; + int divt = sel_rates[conf->rot_sel] * (1 + conf->pre_div); + int divb = divt + sign; + + rate = mult_frac(rate, divt, divb); + rate = roundup(rate, 1000); + } + + return rate; +} + +static void s5_search_fractional(unsigned long rate, + unsigned long parent_rate, + int div, + struct s5_pll_conf *conf) +{ + struct s5_pll_conf best; + ulong cur_offset, best_offset = rate; + int d, i, j; + + memset(conf, 0, sizeof(*conf)); + conf->div = div; + conf->rot_ena = 1; /* Fractional rate */ + + for (d = 0; best_offset > 0 && d <= 1 ; d++) { + conf->rot_dir = !!d; + for (i = 0; best_offset > 0 && i < MAX_PRE; i++) { + conf->pre_div = i; + for (j = 1; best_offset > 0 && j < MAX_SEL; j++) { + conf->rot_sel = j; + conf->freq = s5_calc_freq(parent_rate, conf); + cur_offset = abs(rate - conf->freq); + if (cur_offset < best_offset) { + best_offset = cur_offset; + best = *conf; + } + } + } + } + + /* Best match */ + *conf = best; +} + +static unsigned long s5_calc_params(unsigned long rate, + unsigned long parent_rate, + struct s5_pll_conf *conf) +{ + if (parent_rate % rate) { + struct s5_pll_conf alt1, alt2; + int div; + + div = DIV_ROUND_CLOSEST_ULL(parent_rate, rate); + s5_search_fractional(rate, parent_rate, div, &alt1); + + /* Straight match? */ + if (alt1.freq == rate) { + *conf = alt1; + } else { + /* Try without rounding divider */ + div = parent_rate / rate; + if (div != alt1.div) { + s5_search_fractional(rate, parent_rate, div, + &alt2); + /* Select the better match */ + if (abs(rate - alt1.freq) < + abs(rate - alt2.freq)) + *conf = alt1; + else + *conf = alt2; + } + } + } else { + /* Straight fit */ + memset(conf, 0, sizeof(*conf)); + conf->div = parent_rate / rate; + } + + return conf->freq; +} + +static int s5_pll_enable(struct clk_hw *hw) +{ + struct s5_hw_clk *pll = to_s5_pll(hw); + u32 val = readl(pll->reg); + + val |= PLL_CLK_ENA; + writel(val, pll->reg); + + return 0; +} + +static void s5_pll_disable(struct clk_hw *hw) +{ + struct s5_hw_clk *pll = to_s5_pll(hw); + u32 val = readl(pll->reg); + + val &= ~PLL_CLK_ENA; + writel(val, pll->reg); +} + +static int s5_pll_set_rate(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate) +{ + struct s5_hw_clk *pll = to_s5_pll(hw); + struct s5_pll_conf conf; + unsigned long eff_rate; + u32 val; + + eff_rate = s5_calc_params(rate, parent_rate, &conf); + if (eff_rate != rate) + return -EOPNOTSUPP; + + val = readl(pll->reg) & PLL_CLK_ENA; + val |= FIELD_PREP(PLL_DIV, conf.div); + if (conf.rot_ena) { + val |= PLL_ROT_ENA; + val |= FIELD_PREP(PLL_ROT_SEL, conf.rot_sel); + val |= FIELD_PREP(PLL_PRE_DIV, conf.pre_div); + if (conf.rot_dir) + val |= PLL_ROT_DIR; + } + writel(val, pll->reg); + + return 0; +} + +static unsigned long s5_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct s5_hw_clk *pll = to_s5_pll(hw); + struct s5_pll_conf conf; + u32 val; + + val = readl(pll->reg); + + if (val & PLL_CLK_ENA) { + conf.div = FIELD_GET(PLL_DIV, val); + conf.pre_div = FIELD_GET(PLL_PRE_DIV, val); + conf.rot_ena = FIELD_GET(PLL_ROT_ENA, val); + conf.rot_dir = FIELD_GET(PLL_ROT_DIR, val); + conf.rot_sel = FIELD_GET(PLL_ROT_SEL, val); + + conf.freq = s5_calc_freq(parent_rate, &conf); + } else { + conf.freq = 0; + } + + return conf.freq; +} + +static long s5_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + struct s5_pll_conf conf; + + return s5_calc_params(rate, *parent_rate, &conf); +} + +static const struct clk_ops s5_pll_ops = { + .enable = s5_pll_enable, + .disable = s5_pll_disable, + .set_rate = s5_pll_set_rate, + .round_rate = s5_pll_round_rate, + .recalc_rate = s5_pll_recalc_rate, +}; + +static struct clk_hw *s5_clk_hw_get(struct of_phandle_args *clkspec, void *data) +{ + struct s5_clk_data *s5_clk = data; + unsigned int idx = clkspec->args[0]; + + if (idx >= N_CLOCKS) { + pr_err("%s: invalid index %u\n", __func__, idx); + return ERR_PTR(-EINVAL); + } + + return &s5_clk->s5_hw[idx].hw; +} + +static int s5_clk_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int i, ret; + struct s5_clk_data *s5_clk; + struct clk_parent_data pdata = { .index = 0 }; + struct clk_init_data init = { + .ops = &s5_pll_ops, + .num_parents = 1, + .parent_data = &pdata, + }; + + s5_clk = devm_kzalloc(dev, sizeof(*s5_clk), GFP_KERNEL); + if (!s5_clk) + return -ENOMEM; + + s5_clk->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(s5_clk->base)) + return PTR_ERR(s5_clk->base); + + for (i = 0; i < N_CLOCKS; i++) { + struct s5_hw_clk *s5_hw = &s5_clk->s5_hw[i]; + + init.name = clk_names[i]; + s5_hw->reg = s5_clk->base + (i * 4); + s5_hw->hw.init = &init; + ret = devm_clk_hw_register(dev, &s5_hw->hw); + if (ret) { + dev_err(dev, "failed to register %s clock\n", + init.name); + return ret; + } + } + + return devm_of_clk_add_hw_provider(dev, s5_clk_hw_get, s5_clk); +} + +static const struct of_device_id s5_clk_dt_ids[] = { + { .compatible = "microchip,sparx5-dpll", }, + { } +}; +MODULE_DEVICE_TABLE(of, s5_clk_dt_ids); + +static struct platform_driver s5_clk_driver = { + .probe = s5_clk_probe, + .driver = { + .name = "sparx5-clk", + .of_match_table = s5_clk_dt_ids, + }, +}; +builtin_platform_driver(s5_clk_driver); diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index 9a5fb3834b9a..c90460e7ef21 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -167,6 +167,12 @@ struct vc5_hw_data { u32 div_int; u32 div_frc; unsigned int num; +}; + +struct vc5_out_data { + struct clk_hw hw; + struct vc5_driver_data *vc5; + unsigned int num; unsigned int clk_output_cfg0; unsigned int clk_output_cfg0_mask; }; @@ -184,7 +190,7 @@ struct vc5_driver_data { struct clk_hw clk_pfd; struct vc5_hw_data clk_pll; struct vc5_hw_data clk_fod[VC5_MAX_FOD_NUM]; - struct vc5_hw_data clk_out[VC5_MAX_CLK_OUT_NUM]; + struct vc5_out_data clk_out[VC5_MAX_CLK_OUT_NUM]; }; /* @@ -567,7 +573,7 @@ static const struct clk_ops vc5_fod_ops = { static int vc5_clk_out_prepare(struct clk_hw *hw) { - struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw); + struct vc5_out_data *hwdata = container_of(hw, struct vc5_out_data, hw); struct vc5_driver_data *vc5 = hwdata->vc5; const u8 mask = VC5_OUT_DIV_CONTROL_SELB_NORM | VC5_OUT_DIV_CONTROL_SEL_EXT | @@ -609,7 +615,7 @@ static int vc5_clk_out_prepare(struct clk_hw *hw) static void vc5_clk_out_unprepare(struct clk_hw *hw) { - struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw); + struct vc5_out_data *hwdata = container_of(hw, struct vc5_out_data, hw); struct vc5_driver_data *vc5 = hwdata->vc5; /* Disable the clock buffer */ @@ -619,7 +625,7 @@ static void vc5_clk_out_unprepare(struct clk_hw *hw) static unsigned char vc5_clk_out_get_parent(struct clk_hw *hw) { - struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw); + struct vc5_out_data *hwdata = container_of(hw, struct vc5_out_data, hw); struct vc5_driver_data *vc5 = hwdata->vc5; const u8 mask = VC5_OUT_DIV_CONTROL_SELB_NORM | VC5_OUT_DIV_CONTROL_SEL_EXT | @@ -649,7 +655,7 @@ static unsigned char vc5_clk_out_get_parent(struct clk_hw *hw) static int vc5_clk_out_set_parent(struct clk_hw *hw, u8 index) { - struct vc5_hw_data *hwdata = container_of(hw, struct vc5_hw_data, hw); + struct vc5_out_data *hwdata = container_of(hw, struct vc5_out_data, hw); struct vc5_driver_data *vc5 = hwdata->vc5; const u8 mask = VC5_OUT_DIV_CONTROL_RESET | VC5_OUT_DIV_CONTROL_SELB_NORM | @@ -704,7 +710,7 @@ static int vc5_map_index_to_output(const enum vc5_model model, } static int vc5_update_mode(struct device_node *np_output, - struct vc5_hw_data *clk_out) + struct vc5_out_data *clk_out) { u32 value; @@ -729,7 +735,7 @@ static int vc5_update_mode(struct device_node *np_output, } static int vc5_update_power(struct device_node *np_output, - struct vc5_hw_data *clk_out) + struct vc5_out_data *clk_out) { u32 value; @@ -754,7 +760,7 @@ static int vc5_update_power(struct device_node *np_output, } static int vc5_update_slew(struct device_node *np_output, - struct vc5_hw_data *clk_out) + struct vc5_out_data *clk_out) { u32 value; @@ -782,17 +788,20 @@ static int vc5_update_slew(struct device_node *np_output, } static int vc5_get_output_config(struct i2c_client *client, - struct vc5_hw_data *clk_out) + struct vc5_out_data *clk_out) { struct device_node *np_output; char *child_name; int ret = 0; child_name = kasprintf(GFP_KERNEL, "OUT%d", clk_out->num + 1); + if (!child_name) + return -ENOMEM; + np_output = of_get_child_by_name(client->dev.of_node, child_name); kfree(child_name); if (!np_output) - goto output_done; + return 0; ret = vc5_update_mode(np_output, clk_out); if (ret) @@ -813,7 +822,6 @@ output_error: of_node_put(np_output); -output_done: return ret; } @@ -828,7 +836,7 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) int ret; vc5 = devm_kzalloc(&client->dev, sizeof(*vc5), GFP_KERNEL); - if (vc5 == NULL) + if (!vc5) return -ENOMEM; i2c_set_clientdata(client, vc5); @@ -882,11 +890,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) init.parent_names = parent_names; vc5->clk_mux.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_mux); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", init.name); - goto err_clk; - } if (vc5->chip_info->flags & VC5_HAS_PFD_FREQ_DBL) { /* Register frequency doubler */ @@ -900,12 +906,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) init.num_parents = 1; vc5->clk_mul.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_mul); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", - init.name); - goto err_clk; - } } /* Register PFD */ @@ -921,11 +924,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) init.num_parents = 1; vc5->clk_pfd.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_pfd); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", init.name); - goto err_clk; - } /* Register PLL */ memset(&init, 0, sizeof(init)); @@ -939,11 +940,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) vc5->clk_pll.vc5 = vc5; vc5->clk_pll.hw.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_pll.hw); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", init.name); - goto err_clk; - } /* Register FODs */ for (n = 0; n < vc5->chip_info->clk_fod_cnt; n++) { @@ -960,12 +959,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) vc5->clk_fod[n].vc5 = vc5; vc5->clk_fod[n].hw.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_fod[n].hw); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", - init.name); - goto err_clk; - } } /* Register MUX-connected OUT0_I2C_SELB output */ @@ -981,11 +977,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) vc5->clk_out[0].vc5 = vc5; vc5->clk_out[0].hw.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_out[0].hw); - kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", init.name); - goto err_clk; - } + if (ret) + goto err_clk_register; + kfree(init.name); /* clock framework made a copy of the name */ /* Register FOD-connected OUTx outputs */ for (n = 1; n < vc5->chip_info->clk_out_cnt; n++) { @@ -1008,12 +1002,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) vc5->clk_out[n].vc5 = vc5; vc5->clk_out[n].hw.init = &init; ret = devm_clk_hw_register(&client->dev, &vc5->clk_out[n].hw); + if (ret) + goto err_clk_register; kfree(init.name); /* clock framework made a copy of the name */ - if (ret) { - dev_err(&client->dev, "unable to register %s\n", - init.name); - goto err_clk; - } /* Fetch Clock Output configuration from DT (if specified) */ ret = vc5_get_output_config(client, &vc5->clk_out[n]); @@ -1029,6 +1020,9 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) return 0; +err_clk_register: + dev_err(&client->dev, "unable to register %s\n", init.name); + kfree(init.name); /* clock framework made a copy of the name */ err_clk: if (vc5->chip_info->flags & VC5_HAS_INTERNAL_XTAL) clk_unregister_fixed_rate(vc5->pin_xin); diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 236923b25543..0a9261a099bd 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -500,12 +500,6 @@ static unsigned long clk_core_get_accuracy_no_lock(struct clk_core *core) return core->accuracy; } -unsigned long __clk_get_flags(struct clk *clk) -{ - return !clk ? 0 : clk->core->flags; -} -EXPORT_SYMBOL_GPL(__clk_get_flags); - unsigned long clk_hw_get_flags(const struct clk_hw *hw) { return hw->core->flags; @@ -3054,6 +3048,31 @@ static int clk_rate_set(void *data, u64 val) } #define clk_rate_mode 0644 + +static int clk_prepare_enable_set(void *data, u64 val) +{ + struct clk_core *core = data; + int ret = 0; + + if (val) + ret = clk_prepare_enable(core->hw->clk); + else + clk_disable_unprepare(core->hw->clk); + + return ret; +} + +static int clk_prepare_enable_get(void *data, u64 *val) +{ + struct clk_core *core = data; + + *val = core->enable_count && core->prepare_count; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(clk_prepare_enable_fops, clk_prepare_enable_get, + clk_prepare_enable_set, "%llu\n"); + #else #define clk_rate_set NULL #define clk_rate_mode 0444 @@ -3231,6 +3250,10 @@ static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry) debugfs_create_u32("clk_notifier_count", 0444, root, &core->notifier_count); debugfs_create_file("clk_duty_cycle", 0444, root, core, &clk_duty_cycle_fops); +#ifdef CLOCK_ALLOW_WRITE_DEBUGFS + debugfs_create_file("clk_prepare_enable", 0644, root, core, + &clk_prepare_enable_fops); +#endif if (core->num_parents > 0) debugfs_create_file("clk_parent", 0444, root, core, @@ -4135,6 +4158,7 @@ static int devm_clk_hw_match(struct device *dev, void *res, void *data) /** * devm_clk_unregister - resource managed clk_unregister() + * @dev: device that is unregistering the clock data * @clk: clock to unregister * * Deallocate a clock allocated with devm_clk_register(). Normally @@ -4324,6 +4348,8 @@ static void clk_core_reparent_orphans(void) * @node: Pointer to device tree node of clock provider * @get: Get clock callback. Returns NULL or a struct clk for the * given clock specifier + * @get_hw: Get clk_hw callback. Returns NULL, ERR_PTR or a + * struct clk_hw for the given clock specifier * @data: context pointer to be passed into @get callback */ struct of_clk_provider { diff --git a/drivers/clk/davinci/pll.c b/drivers/clk/davinci/pll.c index 8a23d5dfd1f8..6c35e4bb7940 100644 --- a/drivers/clk/davinci/pll.c +++ b/drivers/clk/davinci/pll.c @@ -651,7 +651,7 @@ static int davinci_pll_sysclk_rate_change(struct notifier_block *nb, pllcmd = readl(pll->base + PLLCMD); pllcmd |= PLLCMD_GOSET; writel(pllcmd, pll->base + PLLCMD); - /* fallthrough */ + fallthrough; case PRE_RATE_CHANGE: /* Wait until for outstanding changes to take effect */ do { diff --git a/drivers/clk/imx/clk-pllv3.c b/drivers/clk/imx/clk-pllv3.c index a7db93030e02..b20cdea3e9cc 100644 --- a/drivers/clk/imx/clk-pllv3.c +++ b/drivers/clk/imx/clk-pllv3.c @@ -433,7 +433,7 @@ struct clk_hw *imx_clk_hw_pllv3(enum imx_pllv3_type type, const char *name, break; case IMX_PLLV3_USB_VF610: pll->div_shift = 1; - /* fall through */ + fallthrough; case IMX_PLLV3_USB: ops = &clk_pllv3_ops; pll->powerup_set = true; @@ -441,7 +441,7 @@ struct clk_hw *imx_clk_hw_pllv3(enum imx_pllv3_type type, const char *name, case IMX_PLLV3_AV_IMX7: pll->num_offset = PLL_IMX7_NUM_OFFSET; pll->denom_offset = PLL_IMX7_DENOM_OFFSET; - /* fall through */ + fallthrough; case IMX_PLLV3_AV: ops = &clk_pllv3_av_ops; break; diff --git a/drivers/clk/ingenic/jz4780-cgu.c b/drivers/clk/ingenic/jz4780-cgu.c index 6c5b8029cc8a..0268d23ebe2e 100644 --- a/drivers/clk/ingenic/jz4780-cgu.c +++ b/drivers/clk/ingenic/jz4780-cgu.c @@ -4,6 +4,7 @@ * * Copyright (c) 2013-2015 Imagination Technologies * Author: Paul Burton <[email protected]> + * Copyright (c) 2020 周琰杰 (Zhou Yanjie) <[email protected]> */ #include <linux/clk-provider.h> @@ -19,49 +20,50 @@ /* CGU register offsets */ #define CGU_REG_CLOCKCONTROL 0x00 -#define CGU_REG_LCR 0x04 -#define CGU_REG_APLL 0x10 -#define CGU_REG_MPLL 0x14 -#define CGU_REG_EPLL 0x18 -#define CGU_REG_VPLL 0x1c -#define CGU_REG_CLKGR0 0x20 -#define CGU_REG_OPCR 0x24 -#define CGU_REG_CLKGR1 0x28 -#define CGU_REG_DDRCDR 0x2c -#define CGU_REG_VPUCDR 0x30 -#define CGU_REG_USBPCR 0x3c -#define CGU_REG_USBRDT 0x40 -#define CGU_REG_USBVBFIL 0x44 -#define CGU_REG_USBPCR1 0x48 -#define CGU_REG_LP0CDR 0x54 -#define CGU_REG_I2SCDR 0x60 -#define CGU_REG_LP1CDR 0x64 -#define CGU_REG_MSC0CDR 0x68 -#define CGU_REG_UHCCDR 0x6c -#define CGU_REG_SSICDR 0x74 -#define CGU_REG_CIMCDR 0x7c -#define CGU_REG_PCMCDR 0x84 -#define CGU_REG_GPUCDR 0x88 -#define CGU_REG_HDMICDR 0x8c -#define CGU_REG_MSC1CDR 0xa4 -#define CGU_REG_MSC2CDR 0xa8 -#define CGU_REG_BCHCDR 0xac -#define CGU_REG_CLOCKSTATUS 0xd4 +#define CGU_REG_LCR 0x04 +#define CGU_REG_APLL 0x10 +#define CGU_REG_MPLL 0x14 +#define CGU_REG_EPLL 0x18 +#define CGU_REG_VPLL 0x1c +#define CGU_REG_CLKGR0 0x20 +#define CGU_REG_OPCR 0x24 +#define CGU_REG_CLKGR1 0x28 +#define CGU_REG_DDRCDR 0x2c +#define CGU_REG_VPUCDR 0x30 +#define CGU_REG_USBPCR 0x3c +#define CGU_REG_USBRDT 0x40 +#define CGU_REG_USBVBFIL 0x44 +#define CGU_REG_USBPCR1 0x48 +#define CGU_REG_LP0CDR 0x54 +#define CGU_REG_I2SCDR 0x60 +#define CGU_REG_LP1CDR 0x64 +#define CGU_REG_MSC0CDR 0x68 +#define CGU_REG_UHCCDR 0x6c +#define CGU_REG_SSICDR 0x74 +#define CGU_REG_CIMCDR 0x7c +#define CGU_REG_PCMCDR 0x84 +#define CGU_REG_GPUCDR 0x88 +#define CGU_REG_HDMICDR 0x8c +#define CGU_REG_MSC1CDR 0xa4 +#define CGU_REG_MSC2CDR 0xa8 +#define CGU_REG_BCHCDR 0xac +#define CGU_REG_CLOCKSTATUS 0xd4 /* bits within the OPCR register */ -#define OPCR_SPENDN0 BIT(7) -#define OPCR_SPENDN1 BIT(6) +#define OPCR_SPENDN0 BIT(7) +#define OPCR_SPENDN1 BIT(6) /* bits within the USBPCR register */ -#define USBPCR_USB_MODE BIT(31) +#define USBPCR_USB_MODE BIT(31) #define USBPCR_IDPULLUP_MASK (0x3 << 28) -#define USBPCR_COMMONONN BIT(25) -#define USBPCR_VBUSVLDEXT BIT(24) +#define USBPCR_COMMONONN BIT(25) +#define USBPCR_VBUSVLDEXT BIT(24) #define USBPCR_VBUSVLDEXTSEL BIT(23) -#define USBPCR_POR BIT(22) -#define USBPCR_OTG_DISABLE BIT(20) +#define USBPCR_POR BIT(22) +#define USBPCR_SIDDQ BIT(21) +#define USBPCR_OTG_DISABLE BIT(20) #define USBPCR_COMPDISTUNE_MASK (0x7 << 17) -#define USBPCR_OTGTUNE_MASK (0x7 << 14) +#define USBPCR_OTGTUNE_MASK (0x7 << 14) #define USBPCR_SQRXTUNE_MASK (0x7 << 11) #define USBPCR_TXFSLSTUNE_MASK (0xf << 7) #define USBPCR_TXPREEMPHTUNE BIT(6) @@ -78,13 +80,13 @@ #define USBPCR1_REFCLKDIV_48 (0x2 << USBPCR1_REFCLKDIV_SHIFT) #define USBPCR1_REFCLKDIV_24 (0x1 << USBPCR1_REFCLKDIV_SHIFT) #define USBPCR1_REFCLKDIV_12 (0x0 << USBPCR1_REFCLKDIV_SHIFT) -#define USBPCR1_USB_SEL BIT(28) -#define USBPCR1_WORD_IF0 BIT(19) -#define USBPCR1_WORD_IF1 BIT(18) +#define USBPCR1_USB_SEL BIT(28) +#define USBPCR1_WORD_IF0 BIT(19) +#define USBPCR1_WORD_IF1 BIT(18) /* bits within the USBRDT register */ -#define USBRDT_VBFIL_LD_EN BIT(25) -#define USBRDT_USBRDT_MASK 0x7fffff +#define USBRDT_VBFIL_LD_EN BIT(25) +#define USBRDT_USBRDT_MASK 0x7fffff /* bits within the USBVBFIL register */ #define USBVBFIL_IDDIGFIL_SHIFT 16 @@ -92,40 +94,14 @@ #define USBVBFIL_USBVBFIL_MASK (0xffff) /* bits within the LCR register */ -#define LCR_PD_SCPU BIT(31) -#define LCR_SCPUS BIT(27) +#define LCR_PD_SCPU BIT(31) +#define LCR_SCPUS BIT(27) /* bits within the CLKGR1 register */ -#define CLKGR1_CORE1 BIT(15) +#define CLKGR1_CORE1 BIT(15) static struct ingenic_cgu *cgu; -static u8 jz4780_otg_phy_get_parent(struct clk_hw *hw) -{ - /* we only use CLKCORE, revisit if that ever changes */ - return 0; -} - -static int jz4780_otg_phy_set_parent(struct clk_hw *hw, u8 idx) -{ - unsigned long flags; - u32 usbpcr1; - - if (idx > 0) - return -EINVAL; - - spin_lock_irqsave(&cgu->lock, flags); - - usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); - usbpcr1 &= ~USBPCR1_REFCLKSEL_MASK; - /* we only use CLKCORE */ - usbpcr1 |= USBPCR1_REFCLKSEL_CORE; - writel(usbpcr1, cgu->base + CGU_REG_USBPCR1); - - spin_unlock_irqrestore(&cgu->lock, flags); - return 0; -} - static unsigned long jz4780_otg_phy_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { @@ -149,7 +125,6 @@ static unsigned long jz4780_otg_phy_recalc_rate(struct clk_hw *hw, return 19200000; } - BUG(); return parent_rate; } @@ -206,13 +181,43 @@ static int jz4780_otg_phy_set_rate(struct clk_hw *hw, unsigned long req_rate, return 0; } -static const struct clk_ops jz4780_otg_phy_ops = { - .get_parent = jz4780_otg_phy_get_parent, - .set_parent = jz4780_otg_phy_set_parent, +static int jz4780_otg_phy_enable(struct clk_hw *hw) +{ + void __iomem *reg_opcr = cgu->base + CGU_REG_OPCR; + void __iomem *reg_usbpcr = cgu->base + CGU_REG_USBPCR; + + writel(readl(reg_opcr) | OPCR_SPENDN0, reg_opcr); + writel(readl(reg_usbpcr) & ~USBPCR_OTG_DISABLE & ~USBPCR_SIDDQ, reg_usbpcr); + return 0; +} +static void jz4780_otg_phy_disable(struct clk_hw *hw) +{ + void __iomem *reg_opcr = cgu->base + CGU_REG_OPCR; + void __iomem *reg_usbpcr = cgu->base + CGU_REG_USBPCR; + + writel(readl(reg_opcr) & ~OPCR_SPENDN0, reg_opcr); + writel(readl(reg_usbpcr) | USBPCR_OTG_DISABLE | USBPCR_SIDDQ, reg_usbpcr); +} + +static int jz4780_otg_phy_is_enabled(struct clk_hw *hw) +{ + void __iomem *reg_opcr = cgu->base + CGU_REG_OPCR; + void __iomem *reg_usbpcr = cgu->base + CGU_REG_USBPCR; + + return (readl(reg_opcr) & OPCR_SPENDN0) && + !(readl(reg_usbpcr) & USBPCR_SIDDQ) && + !(readl(reg_usbpcr) & USBPCR_OTG_DISABLE); +} + +static const struct clk_ops jz4780_otg_phy_ops = { .recalc_rate = jz4780_otg_phy_recalc_rate, .round_rate = jz4780_otg_phy_round_rate, .set_rate = jz4780_otg_phy_set_rate, + + .enable = jz4780_otg_phy_enable, + .disable = jz4780_otg_phy_disable, + .is_enabled = jz4780_otg_phy_is_enabled, }; static int jz4780_core1_enable(struct clk_hw *hw) @@ -516,6 +521,18 @@ static const struct ingenic_cgu_clk_info jz4780_cgu_clocks[] = { .gate = { CGU_REG_CLKGR0, 1 }, }, + [JZ4780_CLK_EXCLK_DIV512] = { + "exclk_div512", CGU_CLK_FIXDIV, + .parents = { JZ4780_CLK_EXCLK }, + .fixdiv = { 512 }, + }, + + [JZ4780_CLK_RTC] = { + "rtc_ercs", CGU_CLK_MUX | CGU_CLK_GATE, + .parents = { JZ4780_CLK_EXCLK_DIV512, JZ4780_CLK_RTCLK }, + .mux = { CGU_REG_OPCR, 2, 1}, + }, + /* Gate-only clocks */ [JZ4780_CLK_NEMC] = { diff --git a/drivers/clk/ingenic/x1000-cgu.c b/drivers/clk/ingenic/x1000-cgu.c index 453f3323cb99..9aa20b52e1c3 100644 --- a/drivers/clk/ingenic/x1000-cgu.c +++ b/drivers/clk/ingenic/x1000-cgu.c @@ -48,8 +48,87 @@ #define USBPCR_SIDDQ BIT(21) #define USBPCR_OTG_DISABLE BIT(20) +/* bits within the USBPCR1 register */ +#define USBPCR1_REFCLKSEL_SHIFT 26 +#define USBPCR1_REFCLKSEL_MASK (0x3 << USBPCR1_REFCLKSEL_SHIFT) +#define USBPCR1_REFCLKSEL_CORE (0x2 << USBPCR1_REFCLKSEL_SHIFT) +#define USBPCR1_REFCLKDIV_SHIFT 24 +#define USBPCR1_REFCLKDIV_MASK (0x3 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_48 (0x2 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_24 (0x1 << USBPCR1_REFCLKDIV_SHIFT) +#define USBPCR1_REFCLKDIV_12 (0x0 << USBPCR1_REFCLKDIV_SHIFT) + static struct ingenic_cgu *cgu; +static unsigned long x1000_otg_phy_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + u32 usbpcr1; + unsigned refclk_div; + + usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); + refclk_div = usbpcr1 & USBPCR1_REFCLKDIV_MASK; + + switch (refclk_div) { + case USBPCR1_REFCLKDIV_12: + return 12000000; + + case USBPCR1_REFCLKDIV_24: + return 24000000; + + case USBPCR1_REFCLKDIV_48: + return 48000000; + } + + return parent_rate; +} + +static long x1000_otg_phy_round_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long *parent_rate) +{ + if (req_rate < 18000000) + return 12000000; + + if (req_rate < 36000000) + return 24000000; + + return 48000000; +} + +static int x1000_otg_phy_set_rate(struct clk_hw *hw, unsigned long req_rate, + unsigned long parent_rate) +{ + unsigned long flags; + u32 usbpcr1, div_bits; + + switch (req_rate) { + case 12000000: + div_bits = USBPCR1_REFCLKDIV_12; + break; + + case 24000000: + div_bits = USBPCR1_REFCLKDIV_24; + break; + + case 48000000: + div_bits = USBPCR1_REFCLKDIV_48; + break; + + default: + return -EINVAL; + } + + spin_lock_irqsave(&cgu->lock, flags); + + usbpcr1 = readl(cgu->base + CGU_REG_USBPCR1); + usbpcr1 &= ~USBPCR1_REFCLKDIV_MASK; + usbpcr1 |= div_bits; + writel(usbpcr1, cgu->base + CGU_REG_USBPCR1); + + spin_unlock_irqrestore(&cgu->lock, flags); + return 0; +} + static int x1000_usb_phy_enable(struct clk_hw *hw) { void __iomem *reg_opcr = cgu->base + CGU_REG_OPCR; @@ -80,6 +159,10 @@ static int x1000_usb_phy_is_enabled(struct clk_hw *hw) } static const struct clk_ops x1000_otg_phy_ops = { + .recalc_rate = x1000_otg_phy_recalc_rate, + .round_rate = x1000_otg_phy_round_rate, + .set_rate = x1000_otg_phy_set_rate, + .enable = x1000_usb_phy_enable, .disable = x1000_usb_phy_disable, .is_enabled = x1000_usb_phy_is_enabled, @@ -144,7 +227,6 @@ static const struct ingenic_cgu_clk_info x1000_cgu_clocks[] = { }, }, - /* Custom (SoC-specific) OTG PHY */ [X1000_CLK_OTGPHY] = { @@ -278,6 +360,19 @@ static const struct ingenic_cgu_clk_info x1000_cgu_clocks[] = { .mux = { CGU_REG_SSICDR, 30, 1 }, }, + [X1000_CLK_EXCLK_DIV512] = { + "exclk_div512", CGU_CLK_FIXDIV, + .parents = { X1000_CLK_EXCLK }, + .fixdiv = { 512 }, + }, + + [X1000_CLK_RTC] = { + "rtc_ercs", CGU_CLK_MUX | CGU_CLK_GATE, + .parents = { X1000_CLK_EXCLK_DIV512, X1000_CLK_RTCLK }, + .mux = { CGU_REG_OPCR, 2, 1}, + .gate = { CGU_REG_CLKGR, 27 }, + }, + /* Gate-only clocks */ [X1000_CLK_EMC] = { diff --git a/drivers/clk/ingenic/x1830-cgu.c b/drivers/clk/ingenic/x1830-cgu.c index a1b2ff0ee487..950aee243364 100644 --- a/drivers/clk/ingenic/x1830-cgu.c +++ b/drivers/clk/ingenic/x1830-cgu.c @@ -329,6 +329,19 @@ static const struct ingenic_cgu_clk_info x1830_cgu_clocks[] = { .mux = { CGU_REG_SSICDR, 29, 1 }, }, + [X1830_CLK_EXCLK_DIV512] = { + "exclk_div512", CGU_CLK_FIXDIV, + .parents = { X1830_CLK_EXCLK }, + .fixdiv = { 512 }, + }, + + [X1830_CLK_RTC] = { + "rtc_ercs", CGU_CLK_MUX | CGU_CLK_GATE, + .parents = { X1830_CLK_EXCLK_DIV512, X1830_CLK_RTCLK }, + .mux = { CGU_REG_OPCR, 2, 1}, + .gate = { CGU_REG_CLKGR0, 29 }, + }, + /* Gate-only clocks */ [X1830_CLK_EMC] = { diff --git a/drivers/clk/mmp/clk-pxa168.c b/drivers/clk/mmp/clk-pxa168.c index 8e2551ab8462..b351039cac09 100644 --- a/drivers/clk/mmp/clk-pxa168.c +++ b/drivers/clk/mmp/clk-pxa168.c @@ -10,6 +10,7 @@ */ #include <linux/clk.h> +#include <linux/clk/mmp.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/spinlock.h> diff --git a/drivers/clk/mmp/clk-pxa910.c b/drivers/clk/mmp/clk-pxa910.c index 7a7965141918..f254ceff3ea7 100644 --- a/drivers/clk/mmp/clk-pxa910.c +++ b/drivers/clk/mmp/clk-pxa910.c @@ -10,6 +10,7 @@ */ #include <linux/clk.h> +#include <linux/clk/mmp.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/spinlock.h> diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 318c0adfaae1..058327310c25 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -308,6 +308,15 @@ config SC_GCC_7180 Say Y if you want to use peripheral devices such as UART, SPI, I2C, USB, UFS, SDCC, etc. +config SC_LPASS_CORECC_7180 + tristate "SC7180 LPASS Core Clock Controller" + select SC_GCC_7180 + help + Support for the LPASS(Low Power Audio Subsystem) core clock controller + on SC7180 devices. + Say Y if you want to use LPASS clocks and power domains of the LPASS + core clock controller. + config SC_GPUCC_7180 tristate "SC7180 Graphics Clock Controller" select SC_GCC_7180 @@ -419,6 +428,22 @@ config SM_GCC_8250 Say Y if you want to use peripheral devices such as UART, SPI, I2C, USB, SD/UFS, PCIe etc. +config SM_GPUCC_8150 + tristate "SM8150 Graphics Clock Controller" + select SM_GCC_8150 + help + Support for the graphics clock controller on SM8150 devices. + Say Y if you want to support graphics controller devices and + functionality such as 3D graphics. + +config SM_GPUCC_8250 + tristate "SM8250 Graphics Clock Controller" + select SM_GCC_8250 + help + Support for the graphics clock controller on SM8250 devices. + Say Y if you want to support graphics controller devices and + functionality such as 3D graphics. + config SPMI_PMIC_CLKDIV tristate "SPMI PMIC clkdiv Support" depends on SPMI || COMPILE_TEST diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile index ae0979bebe18..9677e769e7e9 100644 --- a/drivers/clk/qcom/Makefile +++ b/drivers/clk/qcom/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_QCS_TURING_404) += turingcc-qcs404.o obj-$(CONFIG_SC_DISPCC_7180) += dispcc-sc7180.o obj-$(CONFIG_SC_GCC_7180) += gcc-sc7180.o obj-$(CONFIG_SC_GPUCC_7180) += gpucc-sc7180.o +obj-$(CONFIG_SC_LPASS_CORECC_7180) += lpasscorecc-sc7180.o obj-$(CONFIG_SC_MSS_7180) += mss-sc7180.o obj-$(CONFIG_SC_VIDEOCC_7180) += videocc-sc7180.o obj-$(CONFIG_SDM_CAMCC_845) += camcc-sdm845.o @@ -65,6 +66,8 @@ obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o obj-$(CONFIG_SM_GCC_8150) += gcc-sm8150.o obj-$(CONFIG_SM_GCC_8250) += gcc-sm8250.o +obj-$(CONFIG_SM_GPUCC_8150) += gpucc-sm8150.o +obj-$(CONFIG_SM_GPUCC_8250) += gpucc-sm8250.o obj-$(CONFIG_SPMI_PMIC_CLKDIV) += clk-spmi-pmic-div.o obj-$(CONFIG_KPSS_XCC) += kpss-xcc.o obj-$(CONFIG_QCOM_HFPLL) += hfpll.o diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 9b2dfa08acb2..26139ef005e4 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -56,7 +56,6 @@ #define PLL_STATUS(p) ((p)->offset + (p)->regs[PLL_OFF_STATUS]) #define PLL_OPMODE(p) ((p)->offset + (p)->regs[PLL_OFF_OPMODE]) #define PLL_FRAC(p) ((p)->offset + (p)->regs[PLL_OFF_FRAC]) -#define PLL_CAL_VAL(p) ((p)->offset + (p)->regs[PLL_OFF_CAL_VAL]) const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = { [CLK_ALPHA_PLL_TYPE_DEFAULT] = { @@ -112,22 +111,6 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = { [PLL_OFF_CONFIG_CTL_U1] = 0x20, [PLL_OFF_TEST_CTL] = 0x24, [PLL_OFF_TEST_CTL_U] = 0x28, - [PLL_OFF_STATUS] = 0x30, - [PLL_OFF_OPMODE] = 0x38, - [PLL_OFF_ALPHA_VAL] = 0x40, - [PLL_OFF_CAL_VAL] = 0x44, - }, - [CLK_ALPHA_PLL_TYPE_LUCID] = { - [PLL_OFF_L_VAL] = 0x04, - [PLL_OFF_CAL_L_VAL] = 0x08, - [PLL_OFF_USER_CTL] = 0x0c, - [PLL_OFF_USER_CTL_U] = 0x10, - [PLL_OFF_USER_CTL_U1] = 0x14, - [PLL_OFF_CONFIG_CTL] = 0x18, - [PLL_OFF_CONFIG_CTL_U] = 0x1c, - [PLL_OFF_CONFIG_CTL_U1] = 0x20, - [PLL_OFF_TEST_CTL] = 0x24, - [PLL_OFF_TEST_CTL_U] = 0x28, [PLL_OFF_TEST_CTL_U1] = 0x2c, [PLL_OFF_STATUS] = 0x30, [PLL_OFF_OPMODE] = 0x38, @@ -156,9 +139,12 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_regs); #define PLL_OUT_MASK 0x7 #define PLL_RATE_MARGIN 500 +/* TRION PLL specific settings and offsets */ +#define TRION_PLL_CAL_VAL 0x44 +#define TRION_PCAL_DONE BIT(26) + /* LUCID PLL specific settings and offsets */ -#define LUCID_PLL_CAL_VAL 0x44 -#define LUCID_PCAL_DONE BIT(26) +#define LUCID_PCAL_DONE BIT(27) #define pll_alpha_width(p) \ ((PLL_ALPHA_VAL_U(p) - PLL_ALPHA_VAL(p) == 4) ? \ @@ -912,14 +898,14 @@ const struct clk_ops clk_alpha_pll_hwfsm_ops = { }; EXPORT_SYMBOL_GPL(clk_alpha_pll_hwfsm_ops); -const struct clk_ops clk_trion_fixed_pll_ops = { +const struct clk_ops clk_alpha_pll_fixed_trion_ops = { .enable = clk_trion_pll_enable, .disable = clk_trion_pll_disable, .is_enabled = clk_trion_pll_is_enabled, .recalc_rate = clk_trion_pll_recalc_rate, .round_rate = clk_alpha_pll_round_rate, }; -EXPORT_SYMBOL_GPL(clk_trion_fixed_pll_ops); +EXPORT_SYMBOL_GPL(clk_alpha_pll_fixed_trion_ops); static unsigned long clk_alpha_pll_postdiv_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) @@ -1339,12 +1325,12 @@ clk_trion_pll_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, val << PLL_POST_DIV_SHIFT); } -const struct clk_ops clk_trion_pll_postdiv_ops = { +const struct clk_ops clk_alpha_pll_postdiv_trion_ops = { .recalc_rate = clk_trion_pll_postdiv_recalc_rate, .round_rate = clk_trion_pll_postdiv_round_rate, .set_rate = clk_trion_pll_postdiv_set_rate, }; -EXPORT_SYMBOL_GPL(clk_trion_pll_postdiv_ops); +EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_trion_ops); static long clk_alpha_pll_postdiv_fabia_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate) @@ -1399,13 +1385,13 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_fabia_ops); * @regmap: register map * @config: configuration to apply for pll */ -void clk_lucid_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, +void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config) { if (config->l) regmap_write(regmap, PLL_L_VAL(pll), config->l); - regmap_write(regmap, PLL_CAL_L_VAL(pll), LUCID_PLL_CAL_VAL); + regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL); if (config->alpha) regmap_write(regmap, PLL_ALPHA_VAL(pll), config->alpha); @@ -1458,13 +1444,13 @@ void clk_lucid_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, /* Place the PLL in STANDBY mode */ regmap_update_bits(regmap, PLL_MODE(pll), PLL_RESET_N, PLL_RESET_N); } -EXPORT_SYMBOL_GPL(clk_lucid_pll_configure); +EXPORT_SYMBOL_GPL(clk_trion_pll_configure); /* - * The Lucid PLL requires a power-on self-calibration which happens when the + * The TRION PLL requires a power-on self-calibration which happens when the * PLL comes out of reset. Calibrate in case it is not completed. */ -static int alpha_pll_lucid_prepare(struct clk_hw *hw) +static int __alpha_pll_trion_prepare(struct clk_hw *hw, u32 pcal_done) { struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); u32 regval; @@ -1472,7 +1458,7 @@ static int alpha_pll_lucid_prepare(struct clk_hw *hw) /* Return early if calibration is not needed. */ regmap_read(pll->clkr.regmap, PLL_STATUS(pll), ®val); - if (regval & LUCID_PCAL_DONE) + if (regval & pcal_done) return 0; /* On/off to calibrate */ @@ -1483,7 +1469,17 @@ static int alpha_pll_lucid_prepare(struct clk_hw *hw) return ret; } -static int alpha_pll_lucid_set_rate(struct clk_hw *hw, unsigned long rate, +static int alpha_pll_trion_prepare(struct clk_hw *hw) +{ + return __alpha_pll_trion_prepare(hw, TRION_PCAL_DONE); +} + +static int alpha_pll_lucid_prepare(struct clk_hw *hw) +{ + return __alpha_pll_trion_prepare(hw, LUCID_PCAL_DONE); +} + +static int alpha_pll_trion_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long prate) { struct clk_alpha_pll *pll = to_clk_alpha_pll(hw); @@ -1537,25 +1533,27 @@ static int alpha_pll_lucid_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } -const struct clk_ops clk_alpha_pll_lucid_ops = { - .prepare = alpha_pll_lucid_prepare, +const struct clk_ops clk_alpha_pll_trion_ops = { + .prepare = alpha_pll_trion_prepare, .enable = clk_trion_pll_enable, .disable = clk_trion_pll_disable, .is_enabled = clk_trion_pll_is_enabled, .recalc_rate = clk_trion_pll_recalc_rate, .round_rate = clk_alpha_pll_round_rate, - .set_rate = alpha_pll_lucid_set_rate, + .set_rate = alpha_pll_trion_set_rate, }; -EXPORT_SYMBOL_GPL(clk_alpha_pll_lucid_ops); +EXPORT_SYMBOL_GPL(clk_alpha_pll_trion_ops); -const struct clk_ops clk_alpha_pll_fixed_lucid_ops = { +const struct clk_ops clk_alpha_pll_lucid_ops = { + .prepare = alpha_pll_lucid_prepare, .enable = clk_trion_pll_enable, .disable = clk_trion_pll_disable, .is_enabled = clk_trion_pll_is_enabled, .recalc_rate = clk_trion_pll_recalc_rate, .round_rate = clk_alpha_pll_round_rate, + .set_rate = alpha_pll_trion_set_rate, }; -EXPORT_SYMBOL_GPL(clk_alpha_pll_fixed_lucid_ops); +EXPORT_SYMBOL_GPL(clk_alpha_pll_lucid_ops); const struct clk_ops clk_alpha_pll_postdiv_lucid_ops = { .recalc_rate = clk_alpha_pll_postdiv_fabia_recalc_rate, diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h index 1ba82be93dd5..d3201b87c0cd 100644 --- a/drivers/clk/qcom/clk-alpha-pll.h +++ b/drivers/clk/qcom/clk-alpha-pll.h @@ -14,7 +14,7 @@ enum { CLK_ALPHA_PLL_TYPE_BRAMMO, CLK_ALPHA_PLL_TYPE_FABIA, CLK_ALPHA_PLL_TYPE_TRION, - CLK_ALPHA_PLL_TYPE_LUCID, + CLK_ALPHA_PLL_TYPE_LUCID = CLK_ALPHA_PLL_TYPE_TRION, CLK_ALPHA_PLL_TYPE_MAX, }; @@ -134,18 +134,23 @@ extern const struct clk_ops clk_alpha_pll_fabia_ops; extern const struct clk_ops clk_alpha_pll_fixed_fabia_ops; extern const struct clk_ops clk_alpha_pll_postdiv_fabia_ops; +extern const struct clk_ops clk_alpha_pll_trion_ops; +extern const struct clk_ops clk_alpha_pll_fixed_trion_ops; +extern const struct clk_ops clk_alpha_pll_postdiv_trion_ops; + extern const struct clk_ops clk_alpha_pll_lucid_ops; -extern const struct clk_ops clk_alpha_pll_fixed_lucid_ops; +#define clk_alpha_pll_fixed_lucid_ops clk_alpha_pll_fixed_trion_ops extern const struct clk_ops clk_alpha_pll_postdiv_lucid_ops; void clk_alpha_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); void clk_fabia_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); -void clk_lucid_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, +void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config); +#define clk_lucid_pll_configure(pll, regmap, config) \ + clk_trion_pll_configure(pll, regmap, config) + -extern const struct clk_ops clk_trion_fixed_pll_ops; -extern const struct clk_ops clk_trion_pll_postdiv_ops; #endif diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c index 538677befb86..68d8f7aaf64e 100644 --- a/drivers/clk/qcom/gcc-sc7180.c +++ b/drivers/clk/qcom/gcc-sc7180.c @@ -2251,6 +2251,19 @@ static struct clk_branch gcc_mss_q6_memnoc_axi_clk = { }, }; +static struct clk_branch gcc_lpass_cfg_noc_sway_clk = { + .halt_reg = 0x47018, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x47018, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_lpass_cfg_noc_sway_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + static struct gdsc ufs_phy_gdsc = { .gdscr = 0x77004, .pd = { @@ -2428,6 +2441,7 @@ static struct clk_regmap *gcc_sc7180_clocks[] = { [GCC_MSS_Q6_MEMNOC_AXI_CLK] = &gcc_mss_q6_memnoc_axi_clk.clkr, [GCC_MSS_SNOC_AXI_CLK] = &gcc_mss_snoc_axi_clk.clkr, [GCC_SEC_CTRL_CLK_SRC] = &gcc_sec_ctrl_clk_src.clkr, + [GCC_LPASS_CFG_NOC_SWAY_CLK] = &gcc_lpass_cfg_noc_sway_clk.clkr, }; static const struct qcom_reset_map gcc_sc7180_resets[] = { diff --git a/drivers/clk/qcom/gcc-sdm660.c b/drivers/clk/qcom/gcc-sdm660.c index bf5730832ef3..f0b47b7d50ca 100644 --- a/drivers/clk/qcom/gcc-sdm660.c +++ b/drivers/clk/qcom/gcc-sdm660.c @@ -1715,6 +1715,9 @@ static struct clk_branch gcc_mss_cfg_ahb_clk = { static struct clk_branch gcc_mss_mnoc_bimc_axi_clk = { .halt_reg = 0x8a004, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x8a004, + .hwcg_bit = 1, .clkr = { .enable_reg = 0x8a004, .enable_mask = BIT(0), @@ -2402,6 +2405,7 @@ static const struct qcom_reset_map gcc_sdm660_resets[] = { [GCC_USB_20_BCR] = { 0x2f000 }, [GCC_USB_30_BCR] = { 0xf000 }, [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 }, + [GCC_MSS_RESTART] = { 0x79000 }, }; static const struct regmap_config gcc_sdm660_regmap_config = { diff --git a/drivers/clk/qcom/gcc-sm8150.c b/drivers/clk/qcom/gcc-sm8150.c index 72524cf11048..8e9b5b3cceaf 100644 --- a/drivers/clk/qcom/gcc-sm8150.c +++ b/drivers/clk/qcom/gcc-sm8150.c @@ -34,14 +34,8 @@ enum { P_SLEEP_CLK, }; -static const struct pll_vco trion_vco[] = { - { 249600000, 2000000000, 0 }, -}; - static struct clk_alpha_pll gpll0 = { .offset = 0x0, - .vco_table = trion_vco, - .num_vco = ARRAY_SIZE(trion_vco), .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION], .clkr = { .enable_reg = 0x52000, @@ -53,7 +47,7 @@ static struct clk_alpha_pll gpll0 = { .name = "bi_tcxo", }, .num_parents = 1, - .ops = &clk_trion_fixed_pll_ops, + .ops = &clk_alpha_pll_fixed_trion_ops, }, }, }; @@ -79,14 +73,12 @@ static struct clk_alpha_pll_postdiv gpll0_out_even = { .hw = &gpll0.clkr.hw, }, .num_parents = 1, - .ops = &clk_trion_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_trion_ops, }, }; static struct clk_alpha_pll gpll7 = { .offset = 0x1a000, - .vco_table = trion_vco, - .num_vco = ARRAY_SIZE(trion_vco), .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION], .clkr = { .enable_reg = 0x52000, @@ -98,15 +90,13 @@ static struct clk_alpha_pll gpll7 = { .name = "bi_tcxo", }, .num_parents = 1, - .ops = &clk_trion_fixed_pll_ops, + .ops = &clk_alpha_pll_fixed_trion_ops, }, }, }; static struct clk_alpha_pll gpll9 = { .offset = 0x1c000, - .vco_table = trion_vco, - .num_vco = ARRAY_SIZE(trion_vco), .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION], .clkr = { .enable_reg = 0x52000, @@ -118,7 +108,7 @@ static struct clk_alpha_pll gpll9 = { .name = "bi_tcxo", }, .num_parents = 1, - .ops = &clk_trion_fixed_pll_ops, + .ops = &clk_alpha_pll_fixed_trion_ops, }, }, }; @@ -1617,6 +1607,7 @@ static struct clk_branch gcc_gpu_cfg_ahb_clk = { }; static struct clk_branch gcc_gpu_gpll0_clk_src = { + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52004, .enable_mask = BIT(15), @@ -1632,13 +1623,14 @@ static struct clk_branch gcc_gpu_gpll0_clk_src = { }; static struct clk_branch gcc_gpu_gpll0_div_clk_src = { + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52004, .enable_mask = BIT(16), .hw.init = &(struct clk_init_data){ .name = "gcc_gpu_gpll0_div_clk_src", .parent_hws = (const struct clk_hw *[]){ - &gcc_gpu_gpll0_clk_src.clkr.hw }, + &gpll0_out_even.clkr.hw }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, @@ -1729,6 +1721,7 @@ static struct clk_branch gcc_npu_cfg_ahb_clk = { }; static struct clk_branch gcc_npu_gpll0_clk_src = { + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52004, .enable_mask = BIT(18), @@ -1744,13 +1737,14 @@ static struct clk_branch gcc_npu_gpll0_clk_src = { }; static struct clk_branch gcc_npu_gpll0_div_clk_src = { + .halt_check = BRANCH_HALT_SKIP, .clkr = { .enable_reg = 0x52004, .enable_mask = BIT(19), .hw.init = &(struct clk_init_data){ .name = "gcc_npu_gpll0_div_clk_src", .parent_hws = (const struct clk_hw *[]){ - &gcc_npu_gpll0_clk_src.clkr.hw }, + &gpll0_out_even.clkr.hw }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, .ops = &clk_branch2_ops, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 04944f11659b..bfc4ac02f9ea 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -6,6 +6,7 @@ #include <linux/bitops.h> #include <linux/delay.h> #include <linux/err.h> +#include <linux/export.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/ktime.h> @@ -29,6 +30,7 @@ /* CFG_GDSCR */ #define GDSC_POWER_UP_COMPLETE BIT(16) #define GDSC_POWER_DOWN_COMPLETE BIT(15) +#define GDSC_RETAIN_FF_ENABLE BIT(11) #define CFG_GDSCR_OFFSET 0x4 /* Wait 2^n CXO cycles between all states. Here, n=2 (4 cycles). */ @@ -216,6 +218,14 @@ static inline void gdsc_assert_reset_aon(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->clamp_io_ctrl, GMEM_RESET_MASK, 0); } + +static void gdsc_retain_ff_on(struct gdsc *sc) +{ + u32 mask = GDSC_RETAIN_FF_ENABLE; + + regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); +} + static int gdsc_enable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); @@ -268,6 +278,9 @@ static int gdsc_enable(struct generic_pm_domain *domain) udelay(1); } + if (sc->flags & RETAIN_FF_ENABLE) + gdsc_retain_ff_on(sc); + return 0; } @@ -433,3 +446,29 @@ void gdsc_unregister(struct gdsc_desc *desc) } of_genpd_del_provider(dev->of_node); } + +/* + * On SDM845+ the GPU GX domain is *almost* entirely controlled by the GMU + * running in the CX domain so the CPU doesn't need to know anything about the + * GX domain EXCEPT.... + * + * Hardware constraints dictate that the GX be powered down before the CX. If + * the GMU crashes it could leave the GX on. In order to successfully bring back + * the device the CPU needs to disable the GX headswitch. There being no sane + * way to reach in and touch that register from deep inside the GPU driver we + * need to set up the infrastructure to be able to ensure that the GPU can + * ensure that the GX is off during this super special case. We do this by + * defining a GX gdsc with a dummy enable function and a "default" disable + * function. + * + * This allows us to attach with genpd_dev_pm_attach_by_name() in the GPU + * driver. During power up, nothing will happen from the CPU (and the GMU will + * power up normally but during power down this will ensure that the GX domain + * is *really* off - this gives us a semi standard way of doing what we need. + */ +int gdsc_gx_do_nothing_enable(struct generic_pm_domain *domain) +{ + /* Do nothing but give genpd the impression that we were successful */ + return 0; +} +EXPORT_SYMBOL_GPL(gdsc_gx_do_nothing_enable); diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index c36fc26dcdff..bd537438c793 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -50,6 +50,7 @@ struct gdsc { #define AON_RESET BIT(4) #define POLL_CFG_GDSCR BIT(5) #define ALWAYS_ON BIT(6) +#define RETAIN_FF_ENABLE BIT(7) struct reset_controller_dev *rcdev; unsigned int *resets; unsigned int reset_count; @@ -68,6 +69,7 @@ struct gdsc_desc { int gdsc_register(struct gdsc_desc *desc, struct reset_controller_dev *, struct regmap *); void gdsc_unregister(struct gdsc_desc *desc); +int gdsc_gx_do_nothing_enable(struct generic_pm_domain *domain); #else static inline int gdsc_register(struct gdsc_desc *desc, struct reset_controller_dev *rcdev, diff --git a/drivers/clk/qcom/gpucc-sc7180.c b/drivers/clk/qcom/gpucc-sc7180.c index 7b656b6aeced..88a739b6fec3 100644 --- a/drivers/clk/qcom/gpucc-sc7180.c +++ b/drivers/clk/qcom/gpucc-sc7180.c @@ -170,37 +170,12 @@ static struct gdsc cx_gdsc = { .flags = VOTABLE, }; -/* - * On SC7180 the GPU GX domain is *almost* entirely controlled by the GMU - * running in the CX domain so the CPU doesn't need to know anything about the - * GX domain EXCEPT.... - * - * Hardware constraints dictate that the GX be powered down before the CX. If - * the GMU crashes it could leave the GX on. In order to successfully bring back - * the device the CPU needs to disable the GX headswitch. There being no sane - * way to reach in and touch that register from deep inside the GPU driver we - * need to set up the infrastructure to be able to ensure that the GPU can - * ensure that the GX is off during this super special case. We do this by - * defining a GX gdsc with a dummy enable function and a "default" disable - * function. - * - * This allows us to attach with genpd_dev_pm_attach_by_name() in the GPU - * driver. During power up, nothing will happen from the CPU (and the GMU will - * power up normally but during power down this will ensure that the GX domain - * is *really* off - this gives us a semi standard way of doing what we need. - */ -static int gx_gdsc_enable(struct generic_pm_domain *domain) -{ - /* Do nothing but give genpd the impression that we were successful */ - return 0; -} - static struct gdsc gx_gdsc = { .gdscr = 0x100c, .clamp_io_ctrl = 0x1508, .pd = { .name = "gx_gdsc", - .power_on = gx_gdsc_enable, + .power_on = gdsc_gx_do_nothing_enable, }, .pwrsts = PWRSTS_OFF_ON, .flags = CLAMP_IO, diff --git a/drivers/clk/qcom/gpucc-sdm845.c b/drivers/clk/qcom/gpucc-sdm845.c index e40efba1bf7d..5663698b306b 100644 --- a/drivers/clk/qcom/gpucc-sdm845.c +++ b/drivers/clk/qcom/gpucc-sdm845.c @@ -131,37 +131,12 @@ static struct gdsc gpu_cx_gdsc = { .flags = VOTABLE, }; -/* - * On SDM845 the GPU GX domain is *almost* entirely controlled by the GMU - * running in the CX domain so the CPU doesn't need to know anything about the - * GX domain EXCEPT.... - * - * Hardware constraints dictate that the GX be powered down before the CX. If - * the GMU crashes it could leave the GX on. In order to successfully bring back - * the device the CPU needs to disable the GX headswitch. There being no sane - * way to reach in and touch that register from deep inside the GPU driver we - * need to set up the infrastructure to be able to ensure that the GPU can - * ensure that the GX is off during this super special case. We do this by - * defining a GX gdsc with a dummy enable function and a "default" disable - * function. - * - * This allows us to attach with genpd_dev_pm_attach_by_name() in the GPU - * driver. During power up, nothing will happen from the CPU (and the GMU will - * power up normally but during power down this will ensure that the GX domain - * is *really* off - this gives us a semi standard way of doing what we need. - */ -static int gx_gdsc_enable(struct generic_pm_domain *domain) -{ - /* Do nothing but give genpd the impression that we were successful */ - return 0; -} - static struct gdsc gpu_gx_gdsc = { .gdscr = 0x100c, .clamp_io_ctrl = 0x1508, .pd = { .name = "gpu_gx_gdsc", - .power_on = gx_gdsc_enable, + .power_on = gdsc_gx_do_nothing_enable, }, .pwrsts = PWRSTS_OFF_ON, .flags = CLAMP_IO | AON_RESET | POLL_CFG_GDSCR, diff --git a/drivers/clk/qcom/gpucc-sm8150.c b/drivers/clk/qcom/gpucc-sm8150.c new file mode 100644 index 000000000000..27c40754b2c7 --- /dev/null +++ b/drivers/clk/qcom/gpucc-sm8150.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + */ + +#include <linux/clk-provider.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> + +#include <dt-bindings/clock/qcom,gpucc-sm8150.h> + +#include "common.h" +#include "clk-alpha-pll.h" +#include "clk-branch.h" +#include "clk-pll.h" +#include "clk-rcg.h" +#include "clk-regmap.h" +#include "reset.h" +#include "gdsc.h" + +enum { + P_BI_TCXO, + P_CORE_BI_PLL_TEST_SE, + P_GPLL0_OUT_MAIN, + P_GPLL0_OUT_MAIN_DIV, + P_GPU_CC_PLL1_OUT_MAIN, +}; + +static const struct pll_vco trion_vco[] = { + { 249600000, 2000000000, 0 }, +}; + +static struct alpha_pll_config gpu_cc_pll1_config = { + .l = 0x1a, + .alpha = 0xaaa, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002267, + .config_ctl_hi1_val = 0x00000024, + .test_ctl_val = 0x00000000, + .test_ctl_hi_val = 0x00000002, + .test_ctl_hi1_val = 0x00000000, + .user_ctl_val = 0x00000000, + .user_ctl_hi_val = 0x00000805, + .user_ctl_hi1_val = 0x000000d0, +}; + +static struct clk_alpha_pll gpu_cc_pll1 = { + .offset = 0x100, + .vco_table = trion_vco, + .num_vco = ARRAY_SIZE(trion_vco), + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_TRION], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_pll1", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_trion_ops, + }, + }, +}; + +static const struct parent_map gpu_cc_parent_map_0[] = { + { P_BI_TCXO, 0 }, + { P_GPU_CC_PLL1_OUT_MAIN, 3 }, + { P_GPLL0_OUT_MAIN, 5 }, + { P_GPLL0_OUT_MAIN_DIV, 6 }, +}; + +static const struct clk_parent_data gpu_cc_parent_data_0[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &gpu_cc_pll1.clkr.hw }, + { .fw_name = "gcc_gpu_gpll0_clk_src" }, + { .fw_name = "gcc_gpu_gpll0_div_clk_src" }, +}; + +static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + F(200000000, P_GPLL0_OUT_MAIN_DIV, 1.5, 0, 0), + F(500000000, P_GPU_CC_PLL1_OUT_MAIN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 gpu_cc_gmu_clk_src = { + .cmd_rcgr = 0x1120, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gpu_cc_parent_map_0, + .freq_tbl = ftbl_gpu_cc_gmu_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gpu_cc_gmu_clk_src", + .parent_data = gpu_cc_parent_data_0, + .num_parents = ARRAY_SIZE(gpu_cc_parent_data_0), + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_branch gpu_cc_ahb_clk = { + .halt_reg = 0x1078, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x1078, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_crc_ahb_clk = { + .halt_reg = 0x107c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x107c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_crc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_apb_clk = { + .halt_reg = 0x1088, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1088, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_apb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_gmu_clk = { + .halt_reg = 0x1098, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1098, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_gmu_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpu_cc_gmu_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_snoc_dvm_clk = { + .halt_reg = 0x108c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x108c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_snoc_dvm_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cxo_aon_clk = { + .halt_reg = 0x1004, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cxo_aon_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cxo_clk = { + .halt_reg = 0x109c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x109c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cxo_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_gx_gmu_clk = { + .halt_reg = 0x1064, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1064, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_gx_gmu_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpu_cc_gmu_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct gdsc gpu_cx_gdsc = { + .gdscr = 0x106c, + .gds_hw_ctrl = 0x1540, + .pd = { + .name = "gpu_cx_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = VOTABLE, +}; + +static struct gdsc gpu_gx_gdsc = { + .gdscr = 0x100c, + .clamp_io_ctrl = 0x1508, + .pd = { + .name = "gpu_gx_gdsc", + .power_on = gdsc_gx_do_nothing_enable, + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = CLAMP_IO | AON_RESET | POLL_CFG_GDSCR, +}; + +static struct clk_regmap *gpu_cc_sm8150_clocks[] = { + [GPU_CC_AHB_CLK] = &gpu_cc_ahb_clk.clkr, + [GPU_CC_CRC_AHB_CLK] = &gpu_cc_crc_ahb_clk.clkr, + [GPU_CC_CX_APB_CLK] = &gpu_cc_cx_apb_clk.clkr, + [GPU_CC_CX_GMU_CLK] = &gpu_cc_cx_gmu_clk.clkr, + [GPU_CC_CX_SNOC_DVM_CLK] = &gpu_cc_cx_snoc_dvm_clk.clkr, + [GPU_CC_CXO_AON_CLK] = &gpu_cc_cxo_aon_clk.clkr, + [GPU_CC_CXO_CLK] = &gpu_cc_cxo_clk.clkr, + [GPU_CC_GMU_CLK_SRC] = &gpu_cc_gmu_clk_src.clkr, + [GPU_CC_GX_GMU_CLK] = &gpu_cc_gx_gmu_clk.clkr, + [GPU_CC_PLL1] = &gpu_cc_pll1.clkr, +}; + +static const struct qcom_reset_map gpu_cc_sm8150_resets[] = { + [GPUCC_GPU_CC_CX_BCR] = { 0x1068 }, + [GPUCC_GPU_CC_GMU_BCR] = { 0x111c }, + [GPUCC_GPU_CC_GX_BCR] = { 0x1008 }, + [GPUCC_GPU_CC_SPDM_BCR] = { 0x1110 }, + [GPUCC_GPU_CC_XO_BCR] = { 0x1000 }, +}; + +static struct gdsc *gpu_cc_sm8150_gdscs[] = { + [GPU_CX_GDSC] = &gpu_cx_gdsc, + [GPU_GX_GDSC] = &gpu_gx_gdsc, +}; + +static const struct regmap_config gpu_cc_sm8150_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0x8008, + .fast_io = true, +}; + +static const struct qcom_cc_desc gpu_cc_sm8150_desc = { + .config = &gpu_cc_sm8150_regmap_config, + .clks = gpu_cc_sm8150_clocks, + .num_clks = ARRAY_SIZE(gpu_cc_sm8150_clocks), + .resets = gpu_cc_sm8150_resets, + .num_resets = ARRAY_SIZE(gpu_cc_sm8150_resets), + .gdscs = gpu_cc_sm8150_gdscs, + .num_gdscs = ARRAY_SIZE(gpu_cc_sm8150_gdscs), +}; + +static const struct of_device_id gpu_cc_sm8150_match_table[] = { + { .compatible = "qcom,sm8150-gpucc" }, + { } +}; +MODULE_DEVICE_TABLE(of, gpu_cc_sm8150_match_table); + +static int gpu_cc_sm8150_probe(struct platform_device *pdev) +{ + struct regmap *regmap; + + regmap = qcom_cc_map(pdev, &gpu_cc_sm8150_desc); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + clk_trion_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config); + + return qcom_cc_really_probe(pdev, &gpu_cc_sm8150_desc, regmap); +} + +static struct platform_driver gpu_cc_sm8150_driver = { + .probe = gpu_cc_sm8150_probe, + .driver = { + .name = "sm8150-gpucc", + .of_match_table = gpu_cc_sm8150_match_table, + }, +}; + +static int __init gpu_cc_sm8150_init(void) +{ + return platform_driver_register(&gpu_cc_sm8150_driver); +} +subsys_initcall(gpu_cc_sm8150_init); + +static void __exit gpu_cc_sm8150_exit(void) +{ + platform_driver_unregister(&gpu_cc_sm8150_driver); +} +module_exit(gpu_cc_sm8150_exit); + +MODULE_DESCRIPTION("QTI GPUCC SM8150 Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/qcom/gpucc-sm8250.c b/drivers/clk/qcom/gpucc-sm8250.c new file mode 100644 index 000000000000..3fa7d1f9ff98 --- /dev/null +++ b/drivers/clk/qcom/gpucc-sm8250.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + */ + +#include <linux/clk-provider.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> + +#include <dt-bindings/clock/qcom,gpucc-sm8250.h> + +#include "common.h" +#include "clk-alpha-pll.h" +#include "clk-branch.h" +#include "clk-pll.h" +#include "clk-rcg.h" +#include "clk-regmap.h" +#include "reset.h" +#include "gdsc.h" + +#define CX_GMU_CBCR_SLEEP_MASK 0xf +#define CX_GMU_CBCR_SLEEP_SHIFT 4 +#define CX_GMU_CBCR_WAKE_MASK 0xf +#define CX_GMU_CBCR_WAKE_SHIFT 8 + +enum { + P_BI_TCXO, + P_CORE_BI_PLL_TEST_SE, + P_GPLL0_OUT_MAIN, + P_GPLL0_OUT_MAIN_DIV, + P_GPU_CC_PLL0_OUT_MAIN, + P_GPU_CC_PLL1_OUT_MAIN, +}; + +static struct pll_vco lucid_vco[] = { + { 249600000, 2000000000, 0 }, +}; + +static const struct alpha_pll_config gpu_cc_pll1_config = { + .l = 0x1a, + .alpha = 0xaaa, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002261, + .config_ctl_hi1_val = 0x029a699c, + .user_ctl_val = 0x00000000, + .user_ctl_hi_val = 0x00000805, + .user_ctl_hi1_val = 0x00000000, +}; + +static struct clk_alpha_pll gpu_cc_pll1 = { + .offset = 0x100, + .vco_table = lucid_vco, + .num_vco = ARRAY_SIZE(lucid_vco), + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_pll1", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_lucid_ops, + }, + }, +}; + +static const struct parent_map gpu_cc_parent_map_0[] = { + { P_BI_TCXO, 0 }, + { P_GPU_CC_PLL1_OUT_MAIN, 3 }, + { P_GPLL0_OUT_MAIN, 5 }, + { P_GPLL0_OUT_MAIN_DIV, 6 }, +}; + +static const struct clk_parent_data gpu_cc_parent_data_0[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &gpu_cc_pll1.clkr.hw }, + { .fw_name = "gcc_gpu_gpll0_clk_src" }, + { .fw_name = "gcc_gpu_gpll0_div_clk_src" }, +}; + +static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src[] = { + F(19200000, P_BI_TCXO, 1, 0, 0), + F(200000000, P_GPLL0_OUT_MAIN_DIV, 1.5, 0, 0), + F(500000000, P_GPU_CC_PLL1_OUT_MAIN, 1, 0, 0), + { } +}; + +static struct clk_rcg2 gpu_cc_gmu_clk_src = { + .cmd_rcgr = 0x1120, + .mnd_width = 0, + .hid_width = 5, + .parent_map = gpu_cc_parent_map_0, + .freq_tbl = ftbl_gpu_cc_gmu_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "gpu_cc_gmu_clk_src", + .parent_data = gpu_cc_parent_data_0, + .num_parents = ARRAY_SIZE(gpu_cc_parent_data_0), + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_branch gpu_cc_ahb_clk = { + .halt_reg = 0x1078, + .halt_check = BRANCH_HALT_DELAY, + .clkr = { + .enable_reg = 0x1078, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_crc_ahb_clk = { + .halt_reg = 0x107c, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x107c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_crc_ahb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_apb_clk = { + .halt_reg = 0x1088, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x1088, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_apb_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_gmu_clk = { + .halt_reg = 0x1098, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1098, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_gmu_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpu_cc_gmu_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cx_snoc_dvm_clk = { + .halt_reg = 0x108c, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x108c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cx_snoc_dvm_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cxo_aon_clk = { + .halt_reg = 0x1004, + .halt_check = BRANCH_HALT_VOTED, + .clkr = { + .enable_reg = 0x1004, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cxo_aon_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_cxo_clk = { + .halt_reg = 0x109c, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x109c, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_cxo_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_gx_gmu_clk = { + .halt_reg = 0x1064, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x1064, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_gx_gmu_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &gpu_cc_gmu_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch gpu_cc_hlos1_vote_gpu_smmu_clk = { + .halt_reg = 0x5000, + .halt_check = BRANCH_VOTED, + .clkr = { + .enable_reg = 0x5000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gpu_cc_hlos1_vote_gpu_smmu_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct gdsc gpu_cx_gdsc = { + .gdscr = 0x106c, + .gds_hw_ctrl = 0x1540, + .pd = { + .name = "gpu_cx_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = VOTABLE, +}; + +static struct gdsc gpu_gx_gdsc = { + .gdscr = 0x100c, + .clamp_io_ctrl = 0x1508, + .pd = { + .name = "gpu_gx_gdsc", + .power_on = gdsc_gx_do_nothing_enable, + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = CLAMP_IO | AON_RESET | POLL_CFG_GDSCR, +}; + +static struct clk_regmap *gpu_cc_sm8250_clocks[] = { + [GPU_CC_AHB_CLK] = &gpu_cc_ahb_clk.clkr, + [GPU_CC_CRC_AHB_CLK] = &gpu_cc_crc_ahb_clk.clkr, + [GPU_CC_CX_APB_CLK] = &gpu_cc_cx_apb_clk.clkr, + [GPU_CC_CX_GMU_CLK] = &gpu_cc_cx_gmu_clk.clkr, + [GPU_CC_CX_SNOC_DVM_CLK] = &gpu_cc_cx_snoc_dvm_clk.clkr, + [GPU_CC_CXO_AON_CLK] = &gpu_cc_cxo_aon_clk.clkr, + [GPU_CC_CXO_CLK] = &gpu_cc_cxo_clk.clkr, + [GPU_CC_GMU_CLK_SRC] = &gpu_cc_gmu_clk_src.clkr, + [GPU_CC_GX_GMU_CLK] = &gpu_cc_gx_gmu_clk.clkr, + [GPU_CC_PLL1] = &gpu_cc_pll1.clkr, + [GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK] = &gpu_cc_hlos1_vote_gpu_smmu_clk.clkr, +}; + +static const struct qcom_reset_map gpu_cc_sm8250_resets[] = { + [GPUCC_GPU_CC_ACD_BCR] = { 0x1160 }, + [GPUCC_GPU_CC_CX_BCR] = { 0x1068 }, + [GPUCC_GPU_CC_GFX3D_AON_BCR] = { 0x10a0 }, + [GPUCC_GPU_CC_GMU_BCR] = { 0x111c }, + [GPUCC_GPU_CC_GX_BCR] = { 0x1008 }, + [GPUCC_GPU_CC_XO_BCR] = { 0x1000 }, +}; + +static struct gdsc *gpu_cc_sm8250_gdscs[] = { + [GPU_CX_GDSC] = &gpu_cx_gdsc, + [GPU_GX_GDSC] = &gpu_gx_gdsc, +}; + +static const struct regmap_config gpu_cc_sm8250_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0x8008, + .fast_io = true, +}; + +static const struct qcom_cc_desc gpu_cc_sm8250_desc = { + .config = &gpu_cc_sm8250_regmap_config, + .clks = gpu_cc_sm8250_clocks, + .num_clks = ARRAY_SIZE(gpu_cc_sm8250_clocks), + .resets = gpu_cc_sm8250_resets, + .num_resets = ARRAY_SIZE(gpu_cc_sm8250_resets), + .gdscs = gpu_cc_sm8250_gdscs, + .num_gdscs = ARRAY_SIZE(gpu_cc_sm8250_gdscs), +}; + +static const struct of_device_id gpu_cc_sm8250_match_table[] = { + { .compatible = "qcom,sm8250-gpucc" }, + { } +}; +MODULE_DEVICE_TABLE(of, gpu_cc_sm8250_match_table); + +static int gpu_cc_sm8250_probe(struct platform_device *pdev) +{ + struct regmap *regmap; + unsigned int value, mask; + + regmap = qcom_cc_map(pdev, &gpu_cc_sm8250_desc); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + clk_lucid_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config); + + /* + * Configure gpu_cc_cx_gmu_clk with recommended + * wakeup/sleep settings + */ + mask = CX_GMU_CBCR_WAKE_MASK << CX_GMU_CBCR_WAKE_SHIFT; + mask |= CX_GMU_CBCR_SLEEP_MASK << CX_GMU_CBCR_SLEEP_SHIFT; + value = 0xf << CX_GMU_CBCR_WAKE_SHIFT | 0xf << CX_GMU_CBCR_SLEEP_SHIFT; + regmap_update_bits(regmap, 0x1098, mask, value); + + return qcom_cc_really_probe(pdev, &gpu_cc_sm8250_desc, regmap); +} + +static struct platform_driver gpu_cc_sm8250_driver = { + .probe = gpu_cc_sm8250_probe, + .driver = { + .name = "sm8250-gpucc", + .of_match_table = gpu_cc_sm8250_match_table, + }, +}; + +static int __init gpu_cc_sm8250_init(void) +{ + return platform_driver_register(&gpu_cc_sm8250_driver); +} +subsys_initcall(gpu_cc_sm8250_init); + +static void __exit gpu_cc_sm8250_exit(void) +{ + platform_driver_unregister(&gpu_cc_sm8250_driver); +} +module_exit(gpu_cc_sm8250_exit); + +MODULE_DESCRIPTION("QTI GPU_CC SM8250 Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c new file mode 100644 index 000000000000..d4c1864e1ee9 --- /dev/null +++ b/drivers/clk/qcom/lpasscorecc-sc7180.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#include <linux/clk-provider.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/pm_clock.h> +#include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/regmap.h> + +#include <dt-bindings/clock/qcom,lpasscorecc-sc7180.h> + +#include "clk-alpha-pll.h" +#include "clk-branch.h" +#include "clk-rcg.h" +#include "clk-regmap.h" +#include "common.h" +#include "gdsc.h" + +enum { + P_BI_TCXO, + P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, + P_SLEEP_CLK, +}; + +static struct pll_vco fabia_vco[] = { + { 249600000, 2000000000, 0 }, +}; + +static const struct alpha_pll_config lpass_lpaaudio_dig_pll_config = { + .l = 0x20, + .alpha = 0x0, + .config_ctl_val = 0x20485699, + .config_ctl_hi_val = 0x00002067, + .test_ctl_val = 0x40000000, + .test_ctl_hi_val = 0x00000000, + .user_ctl_val = 0x00005105, + .user_ctl_hi_val = 0x00004805, +}; + +static const u8 clk_alpha_pll_regs_offset[][PLL_OFF_MAX_REGS] = { + [CLK_ALPHA_PLL_TYPE_FABIA] = { + [PLL_OFF_L_VAL] = 0x04, + [PLL_OFF_CAL_L_VAL] = 0x8, + [PLL_OFF_USER_CTL] = 0x0c, + [PLL_OFF_USER_CTL_U] = 0x10, + [PLL_OFF_USER_CTL_U1] = 0x14, + [PLL_OFF_CONFIG_CTL] = 0x18, + [PLL_OFF_CONFIG_CTL_U] = 0x1C, + [PLL_OFF_CONFIG_CTL_U1] = 0x20, + [PLL_OFF_TEST_CTL] = 0x24, + [PLL_OFF_TEST_CTL_U] = 0x28, + [PLL_OFF_STATUS] = 0x30, + [PLL_OFF_OPMODE] = 0x38, + [PLL_OFF_FRAC] = 0x40, + }, +}; + +static struct clk_alpha_pll lpass_lpaaudio_dig_pll = { + .offset = 0x1000, + .vco_table = fabia_vco, + .num_vco = ARRAY_SIZE(fabia_vco), + .regs = clk_alpha_pll_regs_offset[CLK_ALPHA_PLL_TYPE_FABIA], + .clkr = { + .hw.init = &(struct clk_init_data){ + .name = "lpass_lpaaudio_dig_pll", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_alpha_pll_fabia_ops, + }, + }, +}; + +static const struct clk_div_table + post_div_table_lpass_lpaaudio_dig_pll_out_odd[] = { + { 0x5, 5 }, + { } +}; + +static struct clk_alpha_pll_postdiv lpass_lpaaudio_dig_pll_out_odd = { + .offset = 0x1000, + .post_div_shift = 12, + .post_div_table = post_div_table_lpass_lpaaudio_dig_pll_out_odd, + .num_post_div = + ARRAY_SIZE(post_div_table_lpass_lpaaudio_dig_pll_out_odd), + .width = 4, + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], + .clkr.hw.init = &(struct clk_init_data){ + .name = "lpass_lpaaudio_dig_pll_out_odd", + .parent_data = &(const struct clk_parent_data){ + .hw = &lpass_lpaaudio_dig_pll.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_alpha_pll_postdiv_fabia_ops, + }, +}; + +static const struct parent_map lpass_core_cc_parent_map_0[] = { + { P_BI_TCXO, 0 }, + { P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 5 }, +}; + +static const struct clk_parent_data lpass_core_cc_parent_data_0[] = { + { .fw_name = "bi_tcxo" }, + { .hw = &lpass_lpaaudio_dig_pll_out_odd.clkr.hw }, +}; + +static const struct parent_map lpass_core_cc_parent_map_2[] = { + { P_BI_TCXO, 0 }, +}; + +static struct clk_rcg2 core_clk_src = { + .cmd_rcgr = 0x1d000, + .mnd_width = 8, + .hid_width = 5, + .parent_map = lpass_core_cc_parent_map_2, + .clkr.hw.init = &(struct clk_init_data){ + .name = "core_clk_src", + .parent_data = &(const struct clk_parent_data){ + .fw_name = "bi_tcxo", + }, + .num_parents = 1, + .ops = &clk_rcg2_ops, + }, +}; + +static const struct freq_tbl ftbl_ext_mclk0_clk_src[] = { + F(9600000, P_BI_TCXO, 2, 0, 0), + F(19200000, P_BI_TCXO, 1, 0, 0), + { } +}; + +static const struct freq_tbl ftbl_ext_lpaif_clk_src[] = { + F(256000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 32), + F(512000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 16), + F(768000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 1, 16), + F(1024000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 8), + F(1536000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 1, 8), + F(2048000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 4), + F(3072000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 1, 4), + F(4096000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 1, 2), + F(6144000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 1, 2), + F(8192000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 15, 0, 0), + F(9600000, P_BI_TCXO, 2, 0, 0), + F(12288000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 10, 0, 0), + F(19200000, P_BI_TCXO, 1, 0, 0), + F(24576000, P_LPASS_LPAAUDIO_DIG_PLL_OUT_ODD, 5, 0, 0), + { } +}; + +static struct clk_rcg2 ext_mclk0_clk_src = { + .cmd_rcgr = 0x20000, + .mnd_width = 8, + .hid_width = 5, + .parent_map = lpass_core_cc_parent_map_0, + .freq_tbl = ftbl_ext_mclk0_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "ext_mclk0_clk_src", + .parent_data = lpass_core_cc_parent_data_0, + .num_parents = 2, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 lpaif_pri_clk_src = { + .cmd_rcgr = 0x10000, + .mnd_width = 16, + .hid_width = 5, + .parent_map = lpass_core_cc_parent_map_0, + .freq_tbl = ftbl_ext_lpaif_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "lpaif_pri_clk_src", + .parent_data = lpass_core_cc_parent_data_0, + .num_parents = 2, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_rcg2 lpaif_sec_clk_src = { + .cmd_rcgr = 0x11000, + .mnd_width = 16, + .hid_width = 5, + .parent_map = lpass_core_cc_parent_map_0, + .freq_tbl = ftbl_ext_lpaif_clk_src, + .clkr.hw.init = &(struct clk_init_data){ + .name = "lpaif_sec_clk_src", + .parent_data = lpass_core_cc_parent_data_0, + .num_parents = 2, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_rcg2_ops, + }, +}; + +static struct clk_branch lpass_audio_core_ext_mclk0_clk = { + .halt_reg = 0x20014, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x20014, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x20014, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_audio_core_ext_mclk0_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &ext_mclk0_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch lpass_audio_core_lpaif_pri_ibit_clk = { + .halt_reg = 0x10018, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x10018, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x10018, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_audio_core_lpaif_pri_ibit_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &lpaif_pri_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch lpass_audio_core_lpaif_sec_ibit_clk = { + .halt_reg = 0x11018, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x11018, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x11018, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_audio_core_lpaif_sec_ibit_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &lpaif_sec_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_branch lpass_audio_core_sysnoc_mport_core_clk = { + .halt_reg = 0x23000, + .halt_check = BRANCH_HALT, + .hwcg_reg = 0x23000, + .hwcg_bit = 1, + .clkr = { + .enable_reg = 0x23000, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "lpass_audio_core_sysnoc_mport_core_clk", + .parent_data = &(const struct clk_parent_data){ + .hw = &core_clk_src.clkr.hw, + }, + .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, + .ops = &clk_branch2_ops, + }, + }, +}; + +static struct clk_regmap *lpass_core_cc_sc7180_clocks[] = { + [EXT_MCLK0_CLK_SRC] = &ext_mclk0_clk_src.clkr, + [LPAIF_PRI_CLK_SRC] = &lpaif_pri_clk_src.clkr, + [LPAIF_SEC_CLK_SRC] = &lpaif_sec_clk_src.clkr, + [CORE_CLK_SRC] = &core_clk_src.clkr, + [LPASS_AUDIO_CORE_EXT_MCLK0_CLK] = &lpass_audio_core_ext_mclk0_clk.clkr, + [LPASS_AUDIO_CORE_LPAIF_PRI_IBIT_CLK] = + &lpass_audio_core_lpaif_pri_ibit_clk.clkr, + [LPASS_AUDIO_CORE_LPAIF_SEC_IBIT_CLK] = + &lpass_audio_core_lpaif_sec_ibit_clk.clkr, + [LPASS_AUDIO_CORE_SYSNOC_MPORT_CORE_CLK] = + &lpass_audio_core_sysnoc_mport_core_clk.clkr, + [LPASS_LPAAUDIO_DIG_PLL] = &lpass_lpaaudio_dig_pll.clkr, + [LPASS_LPAAUDIO_DIG_PLL_OUT_ODD] = &lpass_lpaaudio_dig_pll_out_odd.clkr, +}; + +static struct gdsc lpass_pdc_hm_gdsc = { + .gdscr = 0x3090, + .pd = { + .name = "lpass_pdc_hm_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = VOTABLE, +}; + +static struct gdsc lpass_audio_hm_gdsc = { + .gdscr = 0x9090, + .pd = { + .name = "lpass_audio_hm_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, +}; + +static struct gdsc lpass_core_hm_gdsc = { + .gdscr = 0x0, + .pd = { + .name = "lpass_core_hm_gdsc", + }, + .pwrsts = PWRSTS_OFF_ON, + .flags = RETAIN_FF_ENABLE, +}; + +static struct gdsc *lpass_core_hm_sc7180_gdscs[] = { + [LPASS_CORE_HM_GDSCR] = &lpass_core_hm_gdsc, +}; + +static struct gdsc *lpass_audio_hm_sc7180_gdscs[] = { + [LPASS_PDC_HM_GDSCR] = &lpass_pdc_hm_gdsc, + [LPASS_AUDIO_HM_GDSCR] = &lpass_audio_hm_gdsc, +}; + +static struct regmap_config lpass_core_cc_sc7180_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .fast_io = true, +}; + +static const struct qcom_cc_desc lpass_core_hm_sc7180_desc = { + .config = &lpass_core_cc_sc7180_regmap_config, + .gdscs = lpass_core_hm_sc7180_gdscs, + .num_gdscs = ARRAY_SIZE(lpass_core_hm_sc7180_gdscs), +}; + +static const struct qcom_cc_desc lpass_core_cc_sc7180_desc = { + .config = &lpass_core_cc_sc7180_regmap_config, + .clks = lpass_core_cc_sc7180_clocks, + .num_clks = ARRAY_SIZE(lpass_core_cc_sc7180_clocks), +}; + +static const struct qcom_cc_desc lpass_audio_hm_sc7180_desc = { + .config = &lpass_core_cc_sc7180_regmap_config, + .gdscs = lpass_audio_hm_sc7180_gdscs, + .num_gdscs = ARRAY_SIZE(lpass_audio_hm_sc7180_gdscs), +}; + +static int lpass_core_cc_sc7180_probe(struct platform_device *pdev) +{ + const struct qcom_cc_desc *desc; + struct regmap *regmap; + int ret; + + lpass_core_cc_sc7180_regmap_config.name = "lpass_audio_cc"; + desc = &lpass_audio_hm_sc7180_desc; + ret = qcom_cc_probe_by_index(pdev, 1, desc); + if (ret) + return ret; + + lpass_core_cc_sc7180_regmap_config.name = "lpass_core_cc"; + regmap = qcom_cc_map(pdev, &lpass_core_cc_sc7180_desc); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + /* + * Keep the CLK always-ON + * LPASS_AUDIO_CORE_SYSNOC_SWAY_CORE_CLK + */ + regmap_update_bits(regmap, 0x24000, BIT(0), BIT(0)); + + /* PLL settings */ + regmap_write(regmap, 0x1008, 0x20); + regmap_update_bits(regmap, 0x1014, BIT(0), BIT(0)); + + clk_fabia_pll_configure(&lpass_lpaaudio_dig_pll, regmap, + &lpass_lpaaudio_dig_pll_config); + + return qcom_cc_really_probe(pdev, &lpass_core_cc_sc7180_desc, regmap); +} + +static int lpass_hm_core_probe(struct platform_device *pdev) +{ + const struct qcom_cc_desc *desc; + + lpass_core_cc_sc7180_regmap_config.name = "lpass_hm_core"; + desc = &lpass_core_hm_sc7180_desc; + + return qcom_cc_probe_by_index(pdev, 0, desc); +} + +static const struct of_device_id lpass_core_cc_sc7180_match_table[] = { + { + .compatible = "qcom,sc7180-lpasshm", + .data = lpass_hm_core_probe, + }, + { + .compatible = "qcom,sc7180-lpasscorecc", + .data = lpass_core_cc_sc7180_probe, + }, + { } +}; +MODULE_DEVICE_TABLE(of, lpass_core_cc_sc7180_match_table); + +static int lpass_core_sc7180_probe(struct platform_device *pdev) +{ + int (*clk_probe)(struct platform_device *p); + int ret; + + pm_runtime_enable(&pdev->dev); + ret = pm_clk_create(&pdev->dev); + if (ret) + return ret; + + ret = pm_clk_add(&pdev->dev, "iface"); + if (ret < 0) { + dev_err(&pdev->dev, "failed to acquire iface clock\n"); + goto disable_pm_runtime; + } + + clk_probe = of_device_get_match_data(&pdev->dev); + if (!clk_probe) + return -EINVAL; + + ret = clk_probe(pdev); + if (ret) + goto destroy_pm_clk; + + return 0; + +destroy_pm_clk: + pm_clk_destroy(&pdev->dev); + +disable_pm_runtime: + pm_runtime_disable(&pdev->dev); + + return ret; +} + +static const struct dev_pm_ops lpass_core_cc_pm_ops = { + SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL) +}; + +static struct platform_driver lpass_core_cc_sc7180_driver = { + .probe = lpass_core_sc7180_probe, + .driver = { + .name = "lpass_core_cc-sc7180", + .of_match_table = lpass_core_cc_sc7180_match_table, + .pm = &lpass_core_cc_pm_ops, + }, +}; + +static int __init lpass_core_cc_sc7180_init(void) +{ + return platform_driver_register(&lpass_core_cc_sc7180_driver); +} +subsys_initcall(lpass_core_cc_sc7180_init); + +static void __exit lpass_core_cc_sc7180_exit(void) +{ + platform_driver_unregister(&lpass_core_cc_sc7180_driver); +} +module_exit(lpass_core_cc_sc7180_exit); + +MODULE_DESCRIPTION("QTI LPASS_CORE_CC SC7180 Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c index 10560d963baf..4c6c9167ef50 100644 --- a/drivers/clk/rockchip/clk-pll.c +++ b/drivers/clk/rockchip/clk-pll.c @@ -12,6 +12,7 @@ #include <linux/io.h> #include <linux/delay.h> #include <linux/clk-provider.h> +#include <linux/iopoll.h> #include <linux/regmap.h> #include <linux/clk.h> #include "clk.h" @@ -86,23 +87,14 @@ static int rockchip_pll_wait_lock(struct rockchip_clk_pll *pll) { struct regmap *grf = pll->ctx->grf; unsigned int val; - int delay = 24000000, ret; - - while (delay > 0) { - ret = regmap_read(grf, pll->lock_offset, &val); - if (ret) { - pr_err("%s: failed to read pll lock status: %d\n", - __func__, ret); - return ret; - } + int ret; - if (val & BIT(pll->lock_shift)) - return 0; - delay--; - } + ret = regmap_read_poll_timeout(grf, pll->lock_offset, val, + val & BIT(pll->lock_shift), 0, 1000); + if (ret) + pr_err("%s: timeout waiting for pll to lock\n", __func__); - pr_err("%s: timeout waiting for pll to lock\n", __func__); - return -ETIMEDOUT; + return ret; } /** @@ -118,12 +110,31 @@ static int rockchip_pll_wait_lock(struct rockchip_clk_pll *pll) #define RK3036_PLLCON1_REFDIV_SHIFT 0 #define RK3036_PLLCON1_POSTDIV2_MASK 0x7 #define RK3036_PLLCON1_POSTDIV2_SHIFT 6 +#define RK3036_PLLCON1_LOCK_STATUS BIT(10) #define RK3036_PLLCON1_DSMPD_MASK 0x1 #define RK3036_PLLCON1_DSMPD_SHIFT 12 +#define RK3036_PLLCON1_PWRDOWN BIT(13) #define RK3036_PLLCON2_FRAC_MASK 0xffffff #define RK3036_PLLCON2_FRAC_SHIFT 0 -#define RK3036_PLLCON1_PWRDOWN (1 << 13) +static int rockchip_rk3036_pll_wait_lock(struct rockchip_clk_pll *pll) +{ + u32 pllcon; + int ret; + + /* + * Lock time typical 250, max 500 input clock cycles @24MHz + * So define a very safe maximum of 1000us, meaning 24000 cycles. + */ + ret = readl_relaxed_poll_timeout(pll->reg_base + RK3036_PLLCON(1), + pllcon, + pllcon & RK3036_PLLCON1_LOCK_STATUS, + 0, 1000); + if (ret) + pr_err("%s: timeout waiting for pll to lock\n", __func__); + + return ret; +} static void rockchip_rk3036_pll_get_params(struct rockchip_clk_pll *pll, struct rockchip_pll_rate_table *rate) @@ -221,7 +232,7 @@ static int rockchip_rk3036_pll_set_params(struct rockchip_clk_pll *pll, writel_relaxed(pllcon, pll->reg_base + RK3036_PLLCON(2)); /* wait for the pll to lock */ - ret = rockchip_pll_wait_lock(pll); + ret = rockchip_rk3036_pll_wait_lock(pll); if (ret) { pr_warn("%s: pll update unsuccessful, trying to restore old params\n", __func__); @@ -260,7 +271,7 @@ static int rockchip_rk3036_pll_enable(struct clk_hw *hw) writel(HIWORD_UPDATE(0, RK3036_PLLCON1_PWRDOWN, 0), pll->reg_base + RK3036_PLLCON(1)); - rockchip_pll_wait_lock(pll); + rockchip_rk3036_pll_wait_lock(pll); return 0; } @@ -589,19 +600,20 @@ static const struct clk_ops rockchip_rk3066_pll_clk_ops = { static int rockchip_rk3399_pll_wait_lock(struct rockchip_clk_pll *pll) { u32 pllcon; - int delay = 24000000; - - /* poll check the lock status in rk3399 xPLLCON2 */ - while (delay > 0) { - pllcon = readl_relaxed(pll->reg_base + RK3399_PLLCON(2)); - if (pllcon & RK3399_PLLCON2_LOCK_STATUS) - return 0; + int ret; - delay--; - } + /* + * Lock time typical 250, max 500 input clock cycles @24MHz + * So define a very safe maximum of 1000us, meaning 24000 cycles. + */ + ret = readl_relaxed_poll_timeout(pll->reg_base + RK3399_PLLCON(2), + pllcon, + pllcon & RK3399_PLLCON2_LOCK_STATUS, + 0, 1000); + if (ret) + pr_err("%s: timeout waiting for pll to lock\n", __func__); - pr_err("%s: timeout waiting for pll to lock\n", __func__); - return -ETIMEDOUT; + return ret; } static void rockchip_rk3399_pll_get_params(struct rockchip_clk_pll *pll, diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index 77aebfb1d6d5..730020fcc7fe 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -751,6 +751,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "pclk_peri", "hclk_cpubus", "hclk_vio_bus", + "sclk_mac_lbtest", }; static struct rockchip_clk_provider *__init rk3188_common_clk_init(struct device_node *np) diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c index cc2a177bbdbf..93c794695c46 100644 --- a/drivers/clk/rockchip/clk-rk3288.c +++ b/drivers/clk/rockchip/clk-rk3288.c @@ -15,6 +15,11 @@ #define RK3288_GRF_SOC_CON(x) (0x244 + x * 4) #define RK3288_GRF_SOC_STATUS1 0x284 +enum rk3288_variant { + RK3288_CRU, + RK3288W_CRU, +}; + enum rk3288_plls { apll, dpll, cpll, gpll, npll, }; @@ -425,8 +430,6 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { COMPOSITE(0, "aclk_vio0", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, RK3288_CLKSEL_CON(31), 6, 2, MFLAGS, 0, 5, DFLAGS, RK3288_CLKGATE_CON(3), 0, GFLAGS), - DIV(0, "hclk_vio", "aclk_vio0", 0, - RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), COMPOSITE(0, "aclk_vio1", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED, RK3288_CLKSEL_CON(31), 14, 2, MFLAGS, 8, 5, DFLAGS, RK3288_CLKGATE_CON(3), 2, GFLAGS), @@ -819,6 +822,16 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = { INVERTER(0, "pclk_isp", "pclk_isp_in", RK3288_CLKSEL_CON(29), 3, IFLAGS), }; +static struct rockchip_clk_branch rk3288w_hclkvio_branch[] __initdata = { + DIV(0, "hclk_vio", "aclk_vio1", 0, + RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), +}; + +static struct rockchip_clk_branch rk3288_hclkvio_branch[] __initdata = { + DIV(0, "hclk_vio", "aclk_vio0", 0, + RK3288_CLKSEL_CON(28), 8, 5, DFLAGS), +}; + static const char *const rk3288_critical_clocks[] __initconst = { "aclk_cpu", "aclk_peri", @@ -914,7 +927,8 @@ static struct syscore_ops rk3288_clk_syscore_ops = { .resume = rk3288_clk_resume, }; -static void __init rk3288_clk_init(struct device_node *np) +static void __init rk3288_common_init(struct device_node *np, + enum rk3288_variant soc) { struct rockchip_clk_provider *ctx; @@ -936,6 +950,14 @@ static void __init rk3288_clk_init(struct device_node *np) RK3288_GRF_SOC_STATUS1); rockchip_clk_register_branches(ctx, rk3288_clk_branches, ARRAY_SIZE(rk3288_clk_branches)); + + if (soc == RK3288W_CRU) + rockchip_clk_register_branches(ctx, rk3288w_hclkvio_branch, + ARRAY_SIZE(rk3288w_hclkvio_branch)); + else + rockchip_clk_register_branches(ctx, rk3288_hclkvio_branch, + ARRAY_SIZE(rk3288_hclkvio_branch)); + rockchip_clk_protect_critical(rk3288_critical_clocks, ARRAY_SIZE(rk3288_critical_clocks)); @@ -954,4 +976,15 @@ static void __init rk3288_clk_init(struct device_node *np) rockchip_clk_of_add_provider(np, ctx); } + +static void __init rk3288_clk_init(struct device_node *np) +{ + rk3288_common_init(np, RK3288_CRU); +} CLK_OF_DECLARE(rk3288_cru, "rockchip,rk3288-cru", rk3288_clk_init); + +static void __init rk3288w_clk_init(struct device_node *np) +{ + rk3288_common_init(np, RK3288W_CRU); +} +CLK_OF_DECLARE(rk3288w_cru, "rockchip,rk3288w-cru", rk3288w_clk_init); diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c index c186a1985bf4..2429b7c2a8b3 100644 --- a/drivers/clk/rockchip/clk-rk3328.c +++ b/drivers/clk/rockchip/clk-rk3328.c @@ -808,22 +808,22 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc", RK3328_SDMMC_CON0, 1), MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc", - RK3328_SDMMC_CON1, 0), + RK3328_SDMMC_CON1, 1), MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio", RK3328_SDIO_CON0, 1), MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio", - RK3328_SDIO_CON1, 0), + RK3328_SDIO_CON1, 1), MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc", RK3328_EMMC_CON0, 1), MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc", - RK3328_EMMC_CON1, 0), + RK3328_EMMC_CON1, 1), MMC(SCLK_SDMMC_EXT_DRV, "sdmmc_ext_drv", "clk_sdmmc_ext", RK3328_SDMMC_EXT_CON0, 1), MMC(SCLK_SDMMC_EXT_SAMPLE, "sdmmc_ext_sample", "clk_sdmmc_ext", - RK3328_SDMMC_EXT_CON1, 0), + RK3328_SDMMC_EXT_CON1, 1), }; static const char *const rk3328_critical_clocks[] __initconst = { diff --git a/drivers/clk/sirf/clk-atlas6.c b/drivers/clk/sirf/clk-atlas6.c index c84d5bab7ac2..b95483bb6a5e 100644 --- a/drivers/clk/sirf/clk-atlas6.c +++ b/drivers/clk/sirf/clk-atlas6.c @@ -135,7 +135,7 @@ static void __init atlas6_clk_init(struct device_node *np) for (i = pll1; i < maxclk; i++) { atlas6_clks[i] = clk_register(NULL, atlas6_clk_hw_array[i]); - BUG_ON(!atlas6_clks[i]); + BUG_ON(IS_ERR(atlas6_clks[i])); } clk_register_clkdev(atlas6_clks[cpu], NULL, "cpu"); clk_register_clkdev(atlas6_clks[io], NULL, "io"); diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index 0b212cf2e794..f180c055d33f 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -327,16 +327,26 @@ int tegra_pll_wait_for_lock(struct tegra_clk_pll *pll) return clk_pll_wait_for_lock(pll); } +static bool pllm_clk_is_gated_by_pmc(struct tegra_clk_pll *pll) +{ + u32 val = readl_relaxed(pll->pmc + PMC_PLLP_WB0_OVERRIDE); + + return (val & PMC_PLLP_WB0_OVERRIDE_PLLM_OVERRIDE) && + !(val & PMC_PLLP_WB0_OVERRIDE_PLLM_ENABLE); +} + static int clk_pll_is_enabled(struct clk_hw *hw) { struct tegra_clk_pll *pll = to_clk_pll(hw); u32 val; - if (pll->params->flags & TEGRA_PLLM) { - val = readl_relaxed(pll->pmc + PMC_PLLP_WB0_OVERRIDE); - if (val & PMC_PLLP_WB0_OVERRIDE_PLLM_OVERRIDE) - return val & PMC_PLLP_WB0_OVERRIDE_PLLM_ENABLE ? 1 : 0; - } + /* + * Power Management Controller (PMC) can override the PLLM clock + * settings, including the enable-state. The PLLM is enabled when + * PLLM's CaR state is ON and when PLLM isn't gated by PMC. + */ + if ((pll->params->flags & TEGRA_PLLM) && pllm_clk_is_gated_by_pmc(pll)) + return 0; val = pll_readl_base(pll); diff --git a/drivers/clk/x86/clk-cgu-pll.c b/drivers/clk/x86/clk-cgu-pll.c index c03cc6b85b9f..3179557b5f78 100644 --- a/drivers/clk/x86/clk-cgu-pll.c +++ b/drivers/clk/x86/clk-cgu-pll.c @@ -128,7 +128,7 @@ lgm_clk_register_pll(struct lgm_clk_provider *ctx, pll->hw.init = &init; hw = &pll->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) return ERR_PTR(ret); diff --git a/drivers/clk/x86/clk-cgu.c b/drivers/clk/x86/clk-cgu.c index 56af0e04ec1e..33de600e0c38 100644 --- a/drivers/clk/x86/clk-cgu.c +++ b/drivers/clk/x86/clk-cgu.c @@ -119,7 +119,7 @@ lgm_clk_register_mux(struct lgm_clk_provider *ctx, mux->hw.init = &init; hw = &mux->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) return ERR_PTR(ret); @@ -247,7 +247,7 @@ lgm_clk_register_divider(struct lgm_clk_provider *ctx, div->hw.init = &init; hw = &div->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) return ERR_PTR(ret); @@ -361,7 +361,7 @@ lgm_clk_register_gate(struct lgm_clk_provider *ctx, gate->hw.init = &init; hw = &gate->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) return ERR_PTR(ret); @@ -420,18 +420,14 @@ lgm_clk_ddiv_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { struct lgm_clk_ddiv *ddiv = to_lgm_clk_ddiv(hw); unsigned int div0, div1, exdiv; - unsigned long flags; u64 prate; - spin_lock_irqsave(&ddiv->lock, flags); div0 = lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift0, ddiv->width0) + 1; div1 = lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift1, ddiv->width1) + 1; exdiv = lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, ddiv->width2); - spin_unlock_irqrestore(&ddiv->lock, flags); - prate = (u64)parent_rate; do_div(prate, div0); do_div(prate, div1); @@ -548,24 +544,21 @@ lgm_clk_ddiv_round_rate(struct clk_hw *hw, unsigned long rate, div = div * 2; div = DIV_ROUND_CLOSEST_ULL((u64)div, 5); } + spin_unlock_irqrestore(&ddiv->lock, flags); - if (div <= 0) { - spin_unlock_irqrestore(&ddiv->lock, flags); + if (div <= 0) return *prate; - } - if (lgm_clk_get_ddiv_val(div, &ddiv1, &ddiv2) != 0) { - if (lgm_clk_get_ddiv_val(div + 1, &ddiv1, &ddiv2) != 0) { - spin_unlock_irqrestore(&ddiv->lock, flags); + if (lgm_clk_get_ddiv_val(div, &ddiv1, &ddiv2) != 0) + if (lgm_clk_get_ddiv_val(div + 1, &ddiv1, &ddiv2) != 0) return -EINVAL; - } - } rate64 = *prate; do_div(rate64, ddiv1); do_div(rate64, ddiv2); /* if predivide bit is enabled, modify rounded rate by factor of 2.5 */ + spin_lock_irqsave(&ddiv->lock, flags); if (lgm_get_clk_val(ddiv->membase, ddiv->reg, ddiv->shift2, 1)) { rate64 = rate64 * 2; rate64 = DIV_ROUND_CLOSEST_ULL(rate64, 5); @@ -588,19 +581,18 @@ int lgm_clk_register_ddiv(struct lgm_clk_provider *ctx, unsigned int nr_clk) { struct device *dev = ctx->dev; - struct clk_init_data init = {}; - struct lgm_clk_ddiv *ddiv; struct clk_hw *hw; unsigned int idx; int ret; for (idx = 0; idx < nr_clk; idx++, list++) { - ddiv = NULL; + struct clk_init_data init = {}; + struct lgm_clk_ddiv *ddiv; + ddiv = devm_kzalloc(dev, sizeof(*ddiv), GFP_KERNEL); if (!ddiv) return -ENOMEM; - memset(&init, 0, sizeof(init)); init.name = list->name; init.ops = &lgm_clk_ddiv_ops; init.flags = list->flags; @@ -624,7 +616,7 @@ int lgm_clk_register_ddiv(struct lgm_clk_provider *ctx, ddiv->hw.init = &init; hw = &ddiv->hw; - ret = clk_hw_register(dev, hw); + ret = devm_clk_hw_register(dev, hw); if (ret) { dev_err(dev, "register clk: %s failed!\n", list->name); return ret; diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index e7e36aab2386..b4b9ce97f415 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1136,15 +1136,14 @@ int sdei_event_handler(struct pt_regs *regs, * access kernel memory. * Do the same here because this doesn't come via the same entry code. */ - orig_addr_limit = get_fs(); - set_fs(USER_DS); + orig_addr_limit = force_uaccess_begin(); err = arg->callback(event_num, regs, arg->callback_arg); if (err) pr_err_ratelimited("event %u on CPU %u failed with error: %d\n", event_num, smp_processor_id(), err); - set_fs(orig_addr_limit); + force_uaccess_end(orig_addr_limit); return err; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index aa5b54e5a1d7..eb7cfe87042e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2574,6 +2574,9 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_IH, }; + for (i = 0; i < adev->num_ip_blocks; i++) + adev->ip_blocks[i].status.hw = false; + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { int j; struct amdgpu_ip_block *block; @@ -2581,7 +2584,6 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) for (j = 0; j < adev->num_ip_blocks; j++) { block = &adev->ip_blocks[j]; - block->status.hw = false; if (block->version->type != ip_order[i] || !block->status.valid) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 5f20cadee343..e4dbf14320b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -3212,6 +3212,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) return 0; + /* Skip crit temp on APU */ + if ((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ) && + (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) + return 0; + /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index fe7d39bb975d..7fe564275457 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -193,12 +193,18 @@ static int psp_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; psp_memory_training_fini(&adev->psp); - release_firmware(adev->psp.sos_fw); - adev->psp.sos_fw = NULL; - release_firmware(adev->psp.asd_fw); - adev->psp.asd_fw = NULL; - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; + if (adev->psp.sos_fw) { + release_firmware(adev->psp.sos_fw); + adev->psp.sos_fw = NULL; + } + if (adev->psp.asd_fw) { + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + } + if (adev->psp.ta_fw) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + } if (adev->asic_type == CHIP_NAVI10) psp_sysfs_fini(adev); @@ -409,11 +415,28 @@ static int psp_clear_vf_fw(struct psp_context *psp) return ret; } +static bool psp_skip_tmr(struct psp_context *psp) +{ + switch (psp->adev->asic_type) { + case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + return true; + default: + return false; + } +} + static int psp_tmr_load(struct psp_context *psp) { int ret; struct psp_gfx_cmd_resp *cmd; + /* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR. + * Already set up by host driver. + */ + if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp)) + return 0; + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -1987,7 +2010,7 @@ static int psp_suspend(void *handle) ret = psp_tmr_terminate(psp); if (ret) { - DRM_ERROR("Falied to terminate tmr\n"); + DRM_ERROR("Failed to terminate tmr\n"); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e10f02ed3f65..bcce4c0be462 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1618,7 +1618,7 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) data = con->eh_data; save_count = data->count - control->num_recs; /* only new entries are saved */ - if (save_count > 0) + if (save_count > 0) { if (amdgpu_ras_eeprom_process_recods(control, &data->bps[control->num_recs], true, @@ -1627,6 +1627,9 @@ static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) return -EIO; } + dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 61e89247faf3..65997ffaed45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3082,7 +3082,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), @@ -3127,7 +3127,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), @@ -3158,7 +3158,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xffffffff, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) }; @@ -7529,6 +7529,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: amdgpu_gfx_off_ctrl(adev, enable); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 42f1a516005e..c41e5590a701 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -49,12 +49,11 @@ static int jpeg_v3_0_set_powergating_state(void *handle, static int jpeg_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_SIENNA_CICHLID) { - u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); + u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); + + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + return -ENOENT; - if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) - return -ENOENT; - } adev->jpeg.num_jpeg_inst = 1; jpeg_v3_0_set_dec_ring_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index ea69ae76773e..da8024c2826e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -97,6 +97,49 @@ static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u64 r; + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* read low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + + /* read high 32 bit*/ + WREG32(address, reg + 4); + (void)RREG32(address); + r |= ((u64)RREG32(data) << 32); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* write low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, (u32)(v & 0xffffffffULL)); + (void)RREG32(data); + + /* write high 32 bit */ + WREG32(address, reg + 4); + (void)RREG32(address); + WREG32(data, (u32)(v >> 32)); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -319,10 +362,15 @@ nv_asic_reset_method(struct amdgpu_device *adev) dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", amdgpu_reset_method); - if (smu_baco_is_support(smu)) - return AMD_RESET_METHOD_BACO; - else + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: return AMD_RESET_METHOD_MODE1; + default: + if (smu_baco_is_support(smu)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; + } } static int nv_asic_reset(struct amdgpu_device *adev) @@ -673,6 +721,8 @@ static int nv_common_early_init(void *handle) adev->smc_wreg = NULL; adev->pcie_rreg = &nv_pcie_rreg; adev->pcie_wreg = &nv_pcie_wreg; + adev->pcie_rreg64 = &nv_pcie_rreg64; + adev->pcie_wreg64 = &nv_pcie_wreg64; /* TODO: will add them during VCN v2 implementation */ adev->uvd_ctx_rreg = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 910a4a32ff78..63e5547cfb16 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1659,7 +1659,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .emit_ib = vcn_v2_0_dec_ring_emit_ib, .emit_fence = vcn_v2_0_dec_ring_emit_fence, .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ring = vcn_v2_0_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, .insert_nop = vcn_v2_0_dec_ring_insert_nop, .insert_start = vcn_v2_0_dec_ring_insert_start, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 407065cd8d57..e4b33c67b634 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -97,6 +97,8 @@ MODULE_FIRMWARE(FIRMWARE_RENOIR_DMUB); #if defined(CONFIG_DRM_AMD_DC_DCN3_0) #define FIRMWARE_SIENNA_CICHLID_DMUB "amdgpu/sienna_cichlid_dmcub.bin" MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID_DMUB); +#define FIRMWARE_NAVY_FLOUNDER_DMUB "amdgpu/navy_flounder_dmcub.bin" +MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER_DMUB); #endif #define FIRMWARE_RAVEN_DMCU "amdgpu/raven_dmcu.bin" @@ -1185,10 +1187,13 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) break; #if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: dmub_asic = DMUB_ASIC_DCN30; fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB; break; + case CHIP_NAVY_FLOUNDER: + dmub_asic = DMUB_ASIC_DCN30; + fw_name_dmub = FIRMWARE_NAVY_FLOUNDER_DMUB; + break; #endif default: @@ -8544,6 +8549,29 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (ret) goto fail; + /* Check connector changes */ + for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { + struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state); + struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); + + /* Skip connectors that are disabled or part of modeset already. */ + if (!old_con_state->crtc && !new_con_state->crtc) + continue; + + if (!new_con_state->crtc) + continue; + + new_crtc_state = drm_atomic_get_crtc_state(state, new_con_state->crtc); + if (IS_ERR(new_crtc_state)) { + ret = PTR_ERR(new_crtc_state); + goto fail; + } + + if (dm_old_con_state->abm_level != + dm_new_con_state->abm_level) + new_crtc_state->connectors_changed = true; + } + #if defined(CONFIG_DRM_AMD_DC_DCN) if (adev->asic_type >= CHIP_NAVI10) { for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 998f729976bf..e5a6d9115949 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -35,6 +35,7 @@ #include "dmub/dmub_srv.h" #include "resource.h" #include "dsc.h" +#include "dc_link_dp.h" struct dmub_debugfs_trace_header { uint32_t entry_count; @@ -1150,7 +1151,7 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, return result; } -static ssize_t dp_dsc_bytes_per_pixel_read(struct file *f, char __user *buf, +static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { char *rd_buf = NULL; @@ -1186,7 +1187,7 @@ static ssize_t dp_dsc_bytes_per_pixel_read(struct file *f, char __user *buf, snprintf(rd_buf_ptr, str_len, "%d\n", - dsc_state.dsc_bytes_per_pixel); + dsc_state.dsc_bits_per_pixel); rd_buf_ptr += str_len; while (size) { @@ -1460,9 +1461,9 @@ static const struct file_operations dp_dsc_slice_height_debugfs_fops = { .llseek = default_llseek }; -static const struct file_operations dp_dsc_bytes_per_pixel_debugfs_fops = { +static const struct file_operations dp_dsc_bits_per_pixel_debugfs_fops = { .owner = THIS_MODULE, - .read = dp_dsc_bytes_per_pixel_read, + .read = dp_dsc_bits_per_pixel_read, .llseek = default_llseek }; @@ -1552,7 +1553,7 @@ static const struct { {"dsc_clock_en", &dp_dsc_clock_en_debugfs_fops}, {"dsc_slice_width", &dp_dsc_slice_width_debugfs_fops}, {"dsc_slice_height", &dp_dsc_slice_height_debugfs_fops}, - {"dsc_bytes_per_pixel", &dp_dsc_bytes_per_pixel_debugfs_fops}, + {"dsc_bits_per_pixel", &dp_dsc_bits_per_pixel_debugfs_fops}, {"dsc_pic_width", &dp_dsc_pic_width_debugfs_fops}, {"dsc_pic_height", &dp_dsc_pic_height_debugfs_fops}, {"dsc_chunk_size", &dp_dsc_chunk_size_debugfs_fops}, diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 008d4d11339d..ad394aefa5d9 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -2834,6 +2834,8 @@ static const struct dc_vbios_funcs vbios_funcs = { .bios_parser_destroy = bios_parser_destroy, .get_board_layout_info = bios_get_board_layout_info, + + .get_atom_dc_golden_table = NULL }; static bool bios_parser_construct( diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index b8684131151d..078b7e344185 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2079,6 +2079,85 @@ static uint16_t bios_parser_pack_data_tables( return 0; } +static struct atom_dc_golden_table_v1 *bios_get_golden_table( + struct bios_parser *bp, + uint32_t rev_major, + uint32_t rev_minor, + uint16_t *dc_golden_table_ver) +{ + struct atom_display_controller_info_v4_4 *disp_cntl_tbl_4_4 = NULL; + uint32_t dc_golden_offset = 0; + *dc_golden_table_ver = 0; + + if (!DATA_TABLES(dce_info)) + return NULL; + + /* ver.4.4 or higher */ + switch (rev_major) { + case 4: + switch (rev_minor) { + case 4: + disp_cntl_tbl_4_4 = GET_IMAGE(struct atom_display_controller_info_v4_4, + DATA_TABLES(dce_info)); + if (!disp_cntl_tbl_4_4) + return NULL; + dc_golden_offset = DATA_TABLES(dce_info) + disp_cntl_tbl_4_4->dc_golden_table_offset; + *dc_golden_table_ver = disp_cntl_tbl_4_4->dc_golden_table_ver; + break; + } + break; + } + + if (!dc_golden_offset) + return NULL; + + if (*dc_golden_table_ver != 1) + return NULL; + + return GET_IMAGE(struct atom_dc_golden_table_v1, + dc_golden_offset); +} + +static enum bp_result bios_get_atom_dc_golden_table( + struct dc_bios *dcb) +{ + struct bios_parser *bp = BP_FROM_DCB(dcb); + enum bp_result result = BP_RESULT_OK; + struct atom_dc_golden_table_v1 *atom_dc_golden_table = NULL; + struct atom_common_table_header *header; + struct atom_data_revision tbl_revision; + uint16_t dc_golden_table_ver = 0; + + header = GET_IMAGE(struct atom_common_table_header, + DATA_TABLES(dce_info)); + if (!header) + return BP_RESULT_UNSUPPORTED; + + get_atom_data_table_revision(header, &tbl_revision); + + atom_dc_golden_table = bios_get_golden_table(bp, + tbl_revision.major, + tbl_revision.minor, + &dc_golden_table_ver); + + if (!atom_dc_golden_table) + return BP_RESULT_UNSUPPORTED; + + dcb->golden_table.dc_golden_table_ver = dc_golden_table_ver; + dcb->golden_table.aux_dphy_rx_control0_val = atom_dc_golden_table->aux_dphy_rx_control0_val; + dcb->golden_table.aux_dphy_rx_control1_val = atom_dc_golden_table->aux_dphy_rx_control1_val; + dcb->golden_table.aux_dphy_tx_control_val = atom_dc_golden_table->aux_dphy_tx_control_val; + dcb->golden_table.dc_gpio_aux_ctrl_0_val = atom_dc_golden_table->dc_gpio_aux_ctrl_0_val; + dcb->golden_table.dc_gpio_aux_ctrl_1_val = atom_dc_golden_table->dc_gpio_aux_ctrl_1_val; + dcb->golden_table.dc_gpio_aux_ctrl_2_val = atom_dc_golden_table->dc_gpio_aux_ctrl_2_val; + dcb->golden_table.dc_gpio_aux_ctrl_3_val = atom_dc_golden_table->dc_gpio_aux_ctrl_3_val; + dcb->golden_table.dc_gpio_aux_ctrl_4_val = atom_dc_golden_table->dc_gpio_aux_ctrl_4_val; + dcb->golden_table.dc_gpio_aux_ctrl_5_val = atom_dc_golden_table->dc_gpio_aux_ctrl_5_val; + + return result; +} + + static const struct dc_vbios_funcs vbios_funcs = { .get_connectors_number = bios_parser_get_connectors_number, @@ -2128,6 +2207,8 @@ static const struct dc_vbios_funcs vbios_funcs = { .get_board_layout_info = bios_get_board_layout_info, .pack_data_tables = bios_parser_pack_data_tables, + + .get_atom_dc_golden_table = bios_get_atom_dc_golden_table }; static bool bios_parser2_construct( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c index 3fab9296918a..e133edc587d3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c @@ -85,12 +85,77 @@ static int rv1_determine_dppclk_threshold(struct clk_mgr_internal *clk_mgr, stru return disp_clk_threshold; } -static void ramp_up_dispclk_with_dpp(struct clk_mgr_internal *clk_mgr, struct dc *dc, struct dc_clocks *new_clocks) +static void ramp_up_dispclk_with_dpp( + struct clk_mgr_internal *clk_mgr, + struct dc *dc, + struct dc_clocks *new_clocks, + bool safe_to_lower) { int i; int dispclk_to_dpp_threshold = rv1_determine_dppclk_threshold(clk_mgr, new_clocks); bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; + /* this function is to change dispclk, dppclk and dprefclk according to + * bandwidth requirement. Its call stack is rv1_update_clocks --> + * update_clocks --> dcn10_prepare_bandwidth / dcn10_optimize_bandwidth + * --> prepare_bandwidth / optimize_bandwidth. before change dcn hw, + * prepare_bandwidth will be called first to allow enough clock, + * watermark for change, after end of dcn hw change, optimize_bandwidth + * is executed to lower clock to save power for new dcn hw settings. + * + * below is sequence of commit_planes_for_stream: + * + * step 1: prepare_bandwidth - raise clock to have enough bandwidth + * step 2: lock_doublebuffer_enable + * step 3: pipe_control_lock(true) - make dchubp register change will + * not take effect right way + * step 4: apply_ctx_for_surface - program dchubp + * step 5: pipe_control_lock(false) - dchubp register change take effect + * step 6: optimize_bandwidth --> dc_post_update_surfaces_to_stream + * for full_date, optimize clock to save power + * + * at end of step 1, dcn clocks (dprefclk, dispclk, dppclk) may be + * changed for new dchubp configuration. but real dcn hub dchubps are + * still running with old configuration until end of step 5. this need + * clocks settings at step 1 should not less than that before step 1. + * this is checked by two conditions: 1. if (should_set_clock(safe_to_lower + * , new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) || + * new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz) + * 2. request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz + * + * the second condition is based on new dchubp configuration. dppclk + * for new dchubp may be different from dppclk before step 1. + * for example, before step 1, dchubps are as below: + * pipe 0: recout=(0,40,1920,980) viewport=(0,0,1920,979) + * pipe 1: recout=(0,0,1920,1080) viewport=(0,0,1920,1080) + * for dppclk for pipe0 need dppclk = dispclk + * + * new dchubp pipe split configuration: + * pipe 0: recout=(0,0,960,1080) viewport=(0,0,960,1080) + * pipe 1: recout=(960,0,960,1080) viewport=(960,0,960,1080) + * dppclk only needs dppclk = dispclk /2. + * + * dispclk, dppclk are not lock by otg master lock. they take effect + * after step 1. during this transition, dispclk are the same, but + * dppclk is changed to half of previous clock for old dchubp + * configuration between step 1 and step 6. This may cause p-state + * warning intermittently. + * + * for new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz, we + * need make sure dppclk are not changed to less between step 1 and 6. + * for new_clocks->dispclk_khz > clk_mgr_base->clks.dispclk_khz, + * new display clock is raised, but we do not know ratio of + * new_clocks->dispclk_khz and clk_mgr_base->clks.dispclk_khz, + * new_clocks->dispclk_khz /2 does not guarantee equal or higher than + * old dppclk. we could ignore power saving different between + * dppclk = displck and dppclk = dispclk / 2 between step 1 and step 6. + * as long as safe_to_lower = false, set dpclk = dispclk to simplify + * condition check. + * todo: review this change for other asic. + **/ + if (!safe_to_lower) + request_dpp_div = false; + /* set disp clk to dpp clk threshold */ clk_mgr->funcs->set_dispclk(clk_mgr, dispclk_to_dpp_threshold); @@ -209,7 +274,7 @@ static void rv1_update_clocks(struct clk_mgr *clk_mgr_base, /* program dispclk on = as a w/a for sleep resume clock ramping issues */ if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) || new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz) { - ramp_up_dispclk_with_dpp(clk_mgr, dc, new_clocks); + ramp_up_dispclk_with_dpp(clk_mgr, dc, new_clocks, safe_to_lower); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; send_request_to_lower = true; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index d94fdc52be37..9133646f6d5f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -323,9 +323,10 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, /* if clock is being raised, increase refclk before lowering DTO */ if (update_dppclk || update_dispclk) dcn20_update_clocks_update_dentist(clk_mgr); - /* always update dtos unless clock is lowered and not safe to lower */ - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); + /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures + * that we do not lower dto when it is not safe to lower. We do not need to + * compare the current and new dppclk before calling this function.*/ + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ef0b5941bc50..92eb1ca1634f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1250,6 +1250,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c int i, k, l; struct dc_stream_state *dc_streams[MAX_STREAMS] = {0}; +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + dc_allow_idle_optimizations(dc, false); +#endif for (i = 0; i < context->stream_count; i++) dc_streams[i] = context->streams[i]; @@ -1838,6 +1841,11 @@ static enum surface_update_type check_update_surfaces_for_stream( int i; enum surface_update_type overall_type = UPDATE_TYPE_FAST; +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + if (dc->idle_optimizations_allowed) + overall_type = UPDATE_TYPE_FULL; + +#endif if (stream_status == NULL || stream_status->plane_count != surface_count) overall_type = UPDATE_TYPE_FULL; @@ -2306,8 +2314,14 @@ static void commit_planes_for_stream(struct dc *dc, } } - if (update_type == UPDATE_TYPE_FULL && dc->optimize_seamless_boot_streams == 0) { - dc->hwss.prepare_bandwidth(dc, context); + if (update_type == UPDATE_TYPE_FULL) { +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + dc_allow_idle_optimizations(dc, false); + +#endif + if (dc->optimize_seamless_boot_streams == 0) + dc->hwss.prepare_bandwidth(dc, context); + context_clock_trace(dc, context); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 02742cca4d84..4bd6e03a7ef3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1540,6 +1540,9 @@ static bool dc_link_construct(struct dc_link *link, } } + if (bios->funcs->get_atom_dc_golden_table) + bios->funcs->get_atom_dc_golden_table(bios); + /* * TODO check if GPIO programmed correctly * @@ -3102,6 +3105,9 @@ void core_link_enable_stream( struct dc *dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; enum dc_status status; +#if defined(CONFIG_DRM_AMD_DC_DCN3_0) + enum otg_out_mux_dest otg_out_dest = OUT_MUX_DIO; +#endif DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); if (!IS_DIAG_DC(dc->ctx->dce_environment) && @@ -3136,8 +3142,8 @@ void core_link_enable_stream( pipe_ctx->stream->link->link_state_valid = true; #if defined(CONFIG_DRM_AMD_DC_DCN3_0) - if (pipe_ctx->stream_res.tg->funcs->set_out_mux) - pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, OUT_MUX_DIO); + if (pipe_ctx->stream_res.tg->funcs->set_out_mux) + pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, otg_out_dest); #endif if (dc_is_dvi_signal(pipe_ctx->stream->signal)) @@ -3276,7 +3282,7 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx) dc_is_virtual_signal(pipe_ctx->stream->signal)) return; - if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { core_link_set_avmute(pipe_ctx, true); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 5cb7b834e459..9bc03f26efda 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1133,6 +1133,44 @@ static inline enum link_training_result perform_link_training_int( return status; } +static enum link_training_result check_link_loss_status( + struct dc_link *link, + const struct link_training_settings *link_training_setting) +{ + enum link_training_result status = LINK_TRAINING_SUCCESS; + union lane_status lane_status; + uint8_t dpcd_buf[6] = {0}; + uint32_t lane; + + core_link_read_dpcd( + link, + DP_SINK_COUNT, + (uint8_t *)(dpcd_buf), + sizeof(dpcd_buf)); + + /*parse lane status*/ + for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) { + /* + * check lanes status + */ + lane_status.raw = get_nibble_at_index(&dpcd_buf[2], lane); + + if (!lane_status.bits.CHANNEL_EQ_DONE_0 || + !lane_status.bits.CR_DONE_0 || + !lane_status.bits.SYMBOL_LOCKED_0) { + /* if one of the channel equalization, clock + * recovery or symbol lock is dropped + * consider it as (link has been + * dropped) dp sink status has changed + */ + status = LINK_TRAINING_LINK_LOSS; + break; + } + } + + return status; +} + static void initialize_training_settings( struct dc_link *link, const struct dc_link_settings *link_setting, @@ -1372,6 +1410,9 @@ static void print_status_message( case LINK_TRAINING_LQA_FAIL: lt_result = "LQA failed"; break; + case LINK_TRAINING_LINK_LOSS: + lt_result = "Link loss"; + break; default: break; } @@ -1531,6 +1572,14 @@ enum link_training_result dc_link_dp_perform_link_training( status); } + /* delay 5ms after Main Link output idle pattern and then check + * DPCD 0202h. + */ + if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) { + msleep(5); + status = check_link_loss_status(link, <_settings); + } + /* 6. print status message*/ print_status_message(link, <_settings, status); @@ -4290,22 +4339,6 @@ void dp_set_fec_enable(struct dc_link *link, bool enable) void dpcd_set_source_specific_data(struct dc_link *link) { - uint8_t dspc = 0; - enum dc_status ret; - - ret = core_link_read_dpcd(link, DP_DOWN_STREAM_PORT_COUNT, &dspc, - sizeof(dspc)); - - if (ret != DC_OK) { - DC_LOG_ERROR("Error in DP aux read transaction," - " not writing source specific data\n"); - return; - } - - /* Return if OUI unsupported */ - if (!(dspc & DP_OUI_SUPPORT)) - return; - if (!link->dc->vendor_signature.is_valid) { struct dpcd_amd_signature amd_signature; amd_signature.AMD_IEEE_TxSignature_byte1 = 0x0; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 10d69ada88e3..0257a900fe2b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -246,20 +246,18 @@ struct dc_stream_status *dc_stream_get_status( #ifndef TRIM_FSFT /** - * dc_optimize_timing() - dc to optimize timing + * dc_optimize_timing_for_fsft() - dc to optimize timing */ -bool dc_optimize_timing( - struct dc_crtc_timing *timing, +bool dc_optimize_timing_for_fsft( + struct dc_stream_state *pStream, unsigned int max_input_rate_in_khz) { - //optimization is expected to assing a value to these: - //timing->pix_clk_100hz - //timing->v_front_porch - //timing->v_total - //timing->fast_transport_output_rate_100hz; - timing->fast_transport_output_rate_100hz = timing->pix_clk_100hz; + struct dc *dc; - return true; + dc = pStream->ctx->dc; + + return (dc->hwss.optimize_timing_for_fsft && + dc->hwss.optimize_timing_for_fsft(dc, &pStream->timing, max_input_rate_in_khz)); } #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 845a3054f21f..d06d07042a12 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -133,6 +133,9 @@ struct dc_vbios_funcs { uint16_t (*pack_data_tables)( struct dc_bios *dcb, void *dst); + + enum bp_result (*get_atom_dc_golden_table)( + struct dc_bios *dcb); }; struct bios_registers { @@ -154,6 +157,7 @@ struct dc_bios { struct dc_firmware_info fw_info; bool fw_info_valid; struct dc_vram_info vram_info; + struct dc_golden_table golden_table; }; #endif /* DC_BIOS_TYPES_H */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index e4e85a159462..633442bc7ef2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -424,8 +424,8 @@ struct dc_stream_status *dc_stream_get_status( struct dc_stream_state *dc_stream); #ifndef TRIM_FSFT -bool dc_optimize_timing( - struct dc_crtc_timing *timing, +bool dc_optimize_timing_for_fsft( + struct dc_stream_state *pStream, unsigned int max_input_rate_in_khz); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 29fe5389f973..946ba929c6f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -890,6 +890,20 @@ struct dsc_dec_dpcd_caps { uint32_t branch_max_line_width; }; +struct dc_golden_table { + uint16_t dc_golden_table_ver; + uint32_t aux_dphy_rx_control0_val; + uint32_t aux_dphy_tx_control_val; + uint32_t aux_dphy_rx_control1_val; + uint32_t dc_gpio_aux_ctrl_0_val; + uint32_t dc_gpio_aux_ctrl_1_val; + uint32_t dc_gpio_aux_ctrl_2_val; + uint32_t dc_gpio_aux_ctrl_3_val; + uint32_t dc_gpio_aux_ctrl_4_val; + uint32_t dc_gpio_aux_ctrl_5_val; +}; + + #if defined(CONFIG_DRM_AMD_DC_DCN3_0) enum dc_gpu_mem_alloc_type { DC_MEM_ALLOC_TYPE_GART, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h index 384389f0e2c3..66027d496778 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h @@ -38,7 +38,8 @@ #define AUX_REG_LIST(id)\ SRI(AUX_CONTROL, DP_AUX, id), \ - SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id) + SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id), \ + SRI(AUX_DPHY_RX_CONTROL1, DP_AUX, id) #define HPD_REG_LIST(id)\ SRI(DC_HPD_CONTROL, HPD, id) @@ -107,6 +108,7 @@ struct dce110_link_enc_aux_registers { uint32_t AUX_CONTROL; uint32_t AUX_DPHY_RX_CONTROL0; + uint32_t AUX_DPHY_RX_CONTROL1; }; struct dce110_link_enc_hpd_registers { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 82e67bd81f2d..5167d6b8a48d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -233,8 +233,8 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->frame_cap_ind = psr_context->psrFrameCaptureIndicationReq; copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR ? true : false; + copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; copy_settings_data->init_sdp_deadline = psr_context->sdpTransmitLineNumDeadline; - copy_settings_data->debug.bitfields.use_hw_lock_mgr = 0; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index da0897fe3b54..a643927e272b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -390,6 +390,8 @@ void dcn10_log_hw_state(struct dc *dc, } DTN_INFO("\n"); + // dcn_dsc_state struct field bytes_per_pixel was renamed to bits_per_pixel + // TODO: Update golden log header to reflect this name change DTN_INFO("DSC: CLOCK_EN SLICE_WIDTH Bytes_pp\n"); for (i = 0; i < pool->res_cap->num_dsc; i++) { struct display_stream_compressor *dsc = pool->dscs[i]; @@ -400,7 +402,7 @@ void dcn10_log_hw_state(struct dc *dc, dsc->inst, s.dsc_clock_en, s.dsc_slice_width, - s.dsc_bytes_per_pixel); + s.dsc_bits_per_pixel); DTN_INFO("\n"); } DTN_INFO("\n"); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h index cf59ab0034dc..04dabed5f1c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h @@ -31,10 +31,10 @@ #define TO_DCN10_LINK_ENC(link_encoder)\ container_of(link_encoder, struct dcn10_link_encoder, base) - #define AUX_REG_LIST(id)\ SRI(AUX_CONTROL, DP_AUX, id), \ - SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id) + SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id), \ + SRI(AUX_DPHY_RX_CONTROL1, DP_AUX, id) #define HPD_REG_LIST(id)\ SRI(DC_HPD_CONTROL, HPD, id) @@ -73,6 +73,7 @@ struct dcn10_link_enc_aux_registers { uint32_t AUX_CONTROL; uint32_t AUX_DPHY_RX_CONTROL0; uint32_t AUX_DPHY_TX_CONTROL; + uint32_t AUX_DPHY_RX_CONTROL1; }; struct dcn10_link_enc_hpd_registers { @@ -443,7 +444,10 @@ struct dcn10_link_enc_registers { type AUX_TX_PRECHARGE_LEN; \ type AUX_TX_PRECHARGE_SYMBOLS; \ type AUX_MODE_DET_CHECK_DELAY;\ - type DPCS_DBG_CBUS_DIS + type DPCS_DBG_CBUS_DIS;\ + type AUX_RX_PRECHARGE_SKIP;\ + type AUX_RX_TIMEOUT_LEN;\ + type AUX_RX_TIMEOUT_LEN_MUL struct dcn10_link_enc_shift { DCN_LINK_ENCODER_REG_FIELD_LIST(uint8_t); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c index ba50214d6c32..79b640e202eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c @@ -156,7 +156,7 @@ static void dsc2_read_state(struct display_stream_compressor *dsc, struct dcn_ds REG_GET(DSC_TOP_CONTROL, DSC_CLOCK_EN, &s->dsc_clock_en); REG_GET(DSCC_PPS_CONFIG3, SLICE_WIDTH, &s->dsc_slice_width); - REG_GET(DSCC_PPS_CONFIG1, BITS_PER_PIXEL, &s->dsc_bytes_per_pixel); + REG_GET(DSCC_PPS_CONFIG1, BITS_PER_PIXEL, &s->dsc_bits_per_pixel); REG_GET(DSCC_PPS_CONFIG3, SLICE_HEIGHT, &s->dsc_slice_height); REG_GET(DSCC_PPS_CONFIG1, CHUNK_SIZE, &s->dsc_chunk_size); REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, &s->dsc_pic_width); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 7725a406c16e..66180b4332f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2498,3 +2498,30 @@ void dcn20_fpga_init_hw(struct dc *dc) tg->funcs->tg_init(tg); } } +#ifndef TRIM_FSFT +bool dcn20_optimize_timing_for_fsft(struct dc *dc, + struct dc_crtc_timing *timing, + unsigned int max_input_rate_in_khz) +{ + unsigned int old_v_front_porch; + unsigned int old_v_total; + unsigned int max_input_rate_in_100hz; + unsigned long long new_v_total; + + max_input_rate_in_100hz = max_input_rate_in_khz * 10; + if (max_input_rate_in_100hz < timing->pix_clk_100hz) + return false; + + old_v_total = timing->v_total; + old_v_front_porch = timing->v_front_porch; + + timing->fast_transport_output_rate_100hz = timing->pix_clk_100hz; + timing->pix_clk_100hz = max_input_rate_in_100hz; + + new_v_total = div_u64((unsigned long long)old_v_total * max_input_rate_in_100hz, timing->pix_clk_100hz); + + timing->v_total = new_v_total; + timing->v_front_porch = old_v_front_porch + (timing->v_total - old_v_total); + return true; +} +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h index 63ce763f148e..83220e34c1a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.h @@ -132,5 +132,10 @@ int dcn20_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config); +#ifndef TRIM_FSFT +bool dcn20_optimize_timing_for_fsft(struct dc *dc, + struct dc_crtc_timing *timing, + unsigned int max_input_rate_in_khz); +#endif #endif /* __DC_HWSS_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index 2380392b916e..3dde6f26de47 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -88,6 +88,9 @@ static const struct hw_sequencer_funcs dcn20_funcs = { .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, +#ifndef TRIM_FSFT + .optimize_timing_for_fsft = dcn20_optimize_timing_for_fsft, +#endif }; static const struct hwseq_private_funcs dcn20_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c index 8d209dae66e6..15c2ff264ff6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c @@ -309,7 +309,6 @@ bool dcn20_link_encoder_is_in_alt_mode(struct link_encoder *enc) void enc2_hw_init(struct link_encoder *enc) { struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); - /* 00 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__1to2 : 1/2 01 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__3to4 : 3/4 @@ -333,9 +332,18 @@ void enc2_hw_init(struct link_encoder *enc) AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3 AUX_RX_DETECTION_THRESHOLD [30:28] = 1 */ - AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); + if (enc->ctx->dc_bios->golden_table.dc_golden_table_ver > 0) { + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, enc->ctx->dc_bios->golden_table.aux_dphy_rx_control0_val); + + AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, enc->ctx->dc_bios->golden_table.aux_dphy_tx_control_val); + + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL1, enc->ctx->dc_bios->golden_table.aux_dphy_rx_control1_val); + } else { + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); + + AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c4d); - AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a); + } //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32; // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h index db09f40075c2..bf0044f7417e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.h @@ -191,7 +191,10 @@ LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_DETECTION_THRESHOLD, mask_sh), \ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_LEN, mask_sh),\ LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_TX_PRECHARGE_SYMBOLS, mask_sh),\ - LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh) + LE_SF(DP_AUX0_AUX_DPHY_TX_CONTROL, AUX_MODE_DET_CHECK_DELAY, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_PRECHARGE_SKIP, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL1, AUX_RX_TIMEOUT_LEN_MUL, mask_sh) #define UNIPHY_DCN2_REG_LIST(id) \ SRI(CLOCK_ENABLE, SYMCLK, id), \ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 968a89bbcf24..790baf552695 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2223,7 +2223,7 @@ int dcn20_populate_dml_pipes_from_context( if (!res_ctx->pipe_ctx[i].plane_state) { pipes[pipe_cnt].pipe.src.is_hsplit = pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled; pipes[pipe_cnt].pipe.src.source_scan = dm_horz; - pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_linear; + pipes[pipe_cnt].pipe.src.sw_mode = dm_sw_4kb_s; pipes[pipe_cnt].pipe.src.macro_tile_size = dm_64k_tile; pipes[pipe_cnt].pipe.src.viewport_width = timing->h_addressable; if (pipes[pipe_cnt].pipe.src.viewport_width > 1920) @@ -2235,7 +2235,7 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.surface_width_y = pipes[pipe_cnt].pipe.src.viewport_width; pipes[pipe_cnt].pipe.src.surface_height_c = pipes[pipe_cnt].pipe.src.viewport_height; pipes[pipe_cnt].pipe.src.surface_width_c = pipes[pipe_cnt].pipe.src.viewport_width; - pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 63) / 64) * 64; /* linear sw only */ + pipes[pipe_cnt].pipe.src.data_pitch = ((pipes[pipe_cnt].pipe.src.viewport_width + 255) / 256) * 256; pipes[pipe_cnt].pipe.src.source_format = dm_444_32; pipes[pipe_cnt].pipe.dest.recout_width = pipes[pipe_cnt].pipe.src.viewport_width; /*vp_width/hratio*/ pipes[pipe_cnt].pipe.dest.recout_height = pipes[pipe_cnt].pipe.src.viewport_height; /*vp_height/vratio*/ @@ -3069,8 +3069,7 @@ void dcn20_calculate_dlg_params( int pipe_cnt, int vlevel) { - int i, j, pipe_idx, pipe_idx_unsplit; - bool visited[MAX_PIPES] = { 0 }; + int i, pipe_idx; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3089,55 +3088,17 @@ void dcn20_calculate_dlg_params( if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; - /* - * An artifact of dml pipe split/odm is that pipes get merged back together for - * calculation. Therefore we need to only extract for first pipe in ascending index order - * and copy into the other split half. - */ - for (i = 0, pipe_idx = 0, pipe_idx_unsplit = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - if (!visited[pipe_idx]) { - display_pipe_source_params_st *src = &pipes[pipe_idx].pipe.src; - display_pipe_dest_params_st *dst = &pipes[pipe_idx].pipe.dest; - - dst->vstartup_start = context->bw_ctx.dml.vba.VStartup[pipe_idx_unsplit]; - dst->vupdate_offset = context->bw_ctx.dml.vba.VUpdateOffsetPix[pipe_idx_unsplit]; - dst->vupdate_width = context->bw_ctx.dml.vba.VUpdateWidthPix[pipe_idx_unsplit]; - dst->vready_offset = context->bw_ctx.dml.vba.VReadyOffsetPix[pipe_idx_unsplit]; - /* - * j iterates inside pipes array, unlike i which iterates inside - * pipe_ctx array - */ - if (src->is_hsplit) - for (j = pipe_idx + 1; j < pipe_cnt; j++) { - display_pipe_source_params_st *src_j = &pipes[j].pipe.src; - display_pipe_dest_params_st *dst_j = &pipes[j].pipe.dest; - - if (src_j->is_hsplit && !visited[j] - && src->hsplit_grp == src_j->hsplit_grp) { - dst_j->vstartup_start = context->bw_ctx.dml.vba.VStartup[pipe_idx_unsplit]; - dst_j->vupdate_offset = context->bw_ctx.dml.vba.VUpdateOffsetPix[pipe_idx_unsplit]; - dst_j->vupdate_width = context->bw_ctx.dml.vba.VUpdateWidthPix[pipe_idx_unsplit]; - dst_j->vready_offset = context->bw_ctx.dml.vba.VReadyOffsetPix[pipe_idx_unsplit]; - visited[j] = true; - } - } - visited[pipe_idx] = true; - pipe_idx_unsplit++; - } - pipe_idx++; - } - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - ASSERT(visited[pipe_idx]); context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; pipe_idx++; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 177d0dc8927a..b187f71afa65 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -92,6 +92,9 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, +#ifndef TRIM_FSFT + .optimize_timing_for_fsft = dcn20_optimize_timing_for_fsft, +#endif }; static const struct hwseq_private_funcs dcn21_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c index c29326e9856a..2ae159e2dd6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c @@ -62,7 +62,7 @@ static const struct link_encoder_funcs dcn30_link_enc_funcs = { .read_state = link_enc2_read_state, .validate_output_with_stream = dcn30_link_encoder_validate_output_with_stream, - .hw_init = enc2_hw_init, + .hw_init = enc3_hw_init, .setup = dcn10_link_encoder_setup, .enable_tmds_output = dcn10_link_encoder_enable_tmds_output, .enable_dp_output = dcn20_link_encoder_enable_dp_output, @@ -203,3 +203,54 @@ void dcn30_link_encoder_construct( enc10->base.features.flags.bits.HDMI_6GB_EN = 0; } } + +#define AUX_REG(reg)\ + (enc10->aux_regs->reg) + +#define AUX_REG_READ(reg_name) \ + dm_read_reg(CTX, AUX_REG(reg_name)) + +#define AUX_REG_WRITE(reg_name, val) \ + dm_write_reg(CTX, AUX_REG(reg_name), val) +void enc3_hw_init(struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + +/* + 00 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__1to2 : 1/2 + 01 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__3to4 : 3/4 + 02 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__7to8 : 7/8 + 03 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__15to16 : 15/16 + 04 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__31to32 : 31/32 + 05 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__63to64 : 63/64 + 06 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__127to128 : 127/128 + 07 - DP_AUX_DPHY_RX_DETECTION_THRESHOLD__255to256 : 255/256 +*/ + +/* + AUX_REG_UPDATE_5(AUX_DPHY_RX_CONTROL0, + AUX_RX_START_WINDOW = 1 [6:4] + AUX_RX_RECEIVE_WINDOW = 1 default is 2 [10:8] + AUX_RX_HALF_SYM_DETECT_LEN = 1 [13:12] default is 1 + AUX_RX_TRANSITION_FILTER_EN = 1 [16] default is 1 + AUX_RX_ALLOW_BELOW_THRESHOLD_PHASE_DETECT [17] is 0 default is 0 + AUX_RX_ALLOW_BELOW_THRESHOLD_START [18] is 1 default is 1 + AUX_RX_ALLOW_BELOW_THRESHOLD_STOP [19] is 1 default is 1 + AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3 + AUX_RX_DETECTION_THRESHOLD [30:28] = 1 +*/ + AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); + + AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a); + + //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32; + // Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk + // 27MHz -> 0xd + // 100MHz -> 0x32 + // 48MHz -> 0x18 + + // Set TMDS_CTL0 to 1. This is a legacy setting. + REG_UPDATE(TMDS_CTL_BITS, TMDS_CTL0, 1); + + dcn10_aux_initialize(enc10); +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h index 585d1ce63db1..8e9fd59ccde8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h @@ -73,4 +73,6 @@ void dcn30_link_encoder_construct( const struct dcn10_link_enc_shift *link_shift, const struct dcn10_link_enc_mask *link_mask); +void enc3_hw_init(struct link_encoder *enc); + #endif /* __DC_LINK_ENCODER__DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c index 1b354c219d0a..9afee7160490 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c @@ -26,6 +26,7 @@ #include "dce110/dce110_hw_sequencer.h" #include "dcn10/dcn10_hw_sequencer.h" #include "dcn20/dcn20_hwseq.h" +#include "dcn21/dcn21_hwseq.h" #include "dcn30_hwseq.h" static const struct hw_sequencer_funcs dcn30_funcs = { @@ -87,8 +88,8 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, - .set_backlight_level = dce110_set_backlight_level, - .set_abm_immediate_disable = dce110_set_abm_immediate_disable, + .set_backlight_level = dcn21_set_backlight_level, + .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, }; static const struct hwseq_private_funcs dcn30_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 7916a7ea9336..afdd4f0d9d71 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -154,23 +154,11 @@ dml_get_pipe_attr_func(refcyc_per_meta_chunk_vblank_c_in_us, mode_lib->vba.TimeP dml_get_pipe_attr_func(refcyc_per_meta_chunk_flip_l_in_us, mode_lib->vba.TimePerMetaChunkFlip); dml_get_pipe_attr_func(refcyc_per_meta_chunk_flip_c_in_us, mode_lib->vba.TimePerChromaMetaChunkFlip); +dml_get_pipe_attr_func(vstartup, mode_lib->vba.VStartup); dml_get_pipe_attr_func(vupdate_offset, mode_lib->vba.VUpdateOffsetPix); dml_get_pipe_attr_func(vupdate_width, mode_lib->vba.VUpdateWidthPix); dml_get_pipe_attr_func(vready_offset, mode_lib->vba.VReadyOffsetPix); -unsigned int get_vstartup_calculated( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes, - unsigned int which_pipe) -{ - unsigned int which_plane; - - recalculate_params(mode_lib, pipes, num_pipes); - which_plane = mode_lib->vba.pipe_plane[which_pipe]; - return mode_lib->vba.VStartup[which_plane]; -} - double get_total_immediate_flip_bytes( struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, @@ -479,7 +467,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.AudioSampleLayout[mode_lib->vba.NumberOfActivePlanes] = 1; mode_lib->vba.DRAMClockChangeLatencyOverride = 0.0; - mode_lib->vba.DSCEnabled[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_enable; + mode_lib->vba.DSCEnabled[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_enable;; + mode_lib->vba.DSCEnable[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_enable; mode_lib->vba.NumberOfDSCSlices[mode_lib->vba.NumberOfActivePlanes] = dout->dsc_slices; mode_lib->vba.DSCInputBitPerComponent[mode_lib->vba.NumberOfActivePlanes] = diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 756d8eb1221c..21e5111ea7a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -98,16 +98,11 @@ dml_get_pipe_attr_decl(refcyc_per_meta_chunk_vblank_c_in_us); dml_get_pipe_attr_decl(refcyc_per_meta_chunk_flip_l_in_us); dml_get_pipe_attr_decl(refcyc_per_meta_chunk_flip_c_in_us); +dml_get_pipe_attr_decl(vstartup); dml_get_pipe_attr_decl(vupdate_offset); dml_get_pipe_attr_decl(vupdate_width); dml_get_pipe_attr_decl(vready_offset); -unsigned int get_vstartup_calculated( - struct display_mode_lib *mode_lib, - const display_e2e_pipe_params_st *pipes, - unsigned int num_pipes, - unsigned int which_pipe); - double get_total_immediate_flip_bytes( struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 4e6e18bbef5d..72743058836d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -71,8 +71,9 @@ enum dentist_divider_range { #define CTX \ clk_mgr->base.ctx + #define DC_LOGGER \ - clk_mgr->ctx->logger + clk_mgr->base.ctx->logger diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h index 5915994f9eb8..f520e13aee4c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h @@ -55,7 +55,7 @@ struct dsc_optc_config { struct dcn_dsc_state { uint32_t dsc_clock_en; uint32_t dsc_slice_width; - uint32_t dsc_bytes_per_pixel; + uint32_t dsc_bits_per_pixel; uint32_t dsc_slice_height; uint32_t dsc_pic_width; uint32_t dsc_pic_height; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 720ce5e458d8..3c986717dcd5 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -116,6 +116,11 @@ struct hw_sequencer_funcs { void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx, int num_pipes, const struct dc_static_screen_params *events); +#ifndef TRIM_FSFT + bool (*optimize_timing_for_fsft)(struct dc *dc, + struct dc_crtc_timing *timing, + unsigned int max_input_rate_in_khz); +#endif /* Stream Related */ void (*enable_stream)(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/include/link_service_types.h b/drivers/gpu/drm/amd/display/include/link_service_types.h index 4869d4562e4d..550f46e9b95f 100644 --- a/drivers/gpu/drm/amd/display/include/link_service_types.h +++ b/drivers/gpu/drm/amd/display/include/link_service_types.h @@ -66,6 +66,8 @@ enum link_training_result { /* other failure during EQ step */ LINK_TRAINING_EQ_FAIL_EQ, LINK_TRAINING_LQA_FAIL, + /* one of the CR,EQ or symbol lock is dropped */ + LINK_TRAINING_LINK_LOSS, }; struct link_training_settings { diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 7a2500fbf3f2..81820f3d6b3b 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -829,10 +829,13 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, switch (packet_type) { case PACKET_TYPE_FS_V3: #ifndef TRIM_FSFT + // always populate with pixel rate. build_vrr_infopacket_v3( stream->signal, vrr, stream->timing.flags.FAST_TRANSPORT, - stream->timing.fast_transport_output_rate_100hz, + (stream->timing.flags.FAST_TRANSPORT) ? + stream->timing.fast_transport_output_rate_100hz : + stream->timing.pix_clk_100hz, app_tf, infopacket); #else build_vrr_infopacket_v3(stream->signal, vrr, app_tf, infopacket); diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index c2544c81dfb2..3e526c394f6c 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -941,7 +941,6 @@ struct atom_display_controller_info_v4_1 uint8_t reserved3[8]; }; - struct atom_display_controller_info_v4_2 { struct atom_common_table_header table_header; @@ -976,6 +975,59 @@ struct atom_display_controller_info_v4_2 uint8_t reserved3[8]; }; +struct atom_display_controller_info_v4_4 { + struct atom_common_table_header table_header; + uint32_t display_caps; + uint32_t bootup_dispclk_10khz; + uint16_t dce_refclk_10khz; + uint16_t i2c_engine_refclk_10khz; + uint16_t dvi_ss_percentage; // in unit of 0.001% + uint16_t dvi_ss_rate_10hz; + uint16_t hdmi_ss_percentage; // in unit of 0.001% + uint16_t hdmi_ss_rate_10hz; + uint16_t dp_ss_percentage; // in unit of 0.001% + uint16_t dp_ss_rate_10hz; + uint8_t dvi_ss_mode; // enum of atom_spread_spectrum_mode + uint8_t hdmi_ss_mode; // enum of atom_spread_spectrum_mode + uint8_t dp_ss_mode; // enum of atom_spread_spectrum_mode + uint8_t ss_reserved; + uint8_t dfp_hardcode_mode_num; // DFP hardcode mode number defined in StandardVESA_TimingTable when EDID is not available + uint8_t dfp_hardcode_refreshrate;// DFP hardcode mode refreshrate defined in StandardVESA_TimingTable when EDID is not available + uint8_t vga_hardcode_mode_num; // VGA hardcode mode number defined in StandardVESA_TimingTable when EDID is not avablable + uint8_t vga_hardcode_refreshrate;// VGA hardcode mode number defined in StandardVESA_TimingTable when EDID is not avablable + uint16_t dpphy_refclk_10khz; + uint16_t hw_chip_id; + uint8_t dcnip_min_ver; + uint8_t dcnip_max_ver; + uint8_t max_disp_pipe_num; + uint8_t max_vbios_active_disp_pipum; + uint8_t max_ppll_num; + uint8_t max_disp_phy_num; + uint8_t max_aux_pairs; + uint8_t remotedisplayconfig; + uint32_t dispclk_pll_vco_freq; + uint32_t dp_ref_clk_freq; + uint32_t max_mclk_chg_lat; // Worst case blackout duration for a memory clock frequency (p-state) change, units of 100s of ns (0.1 us) + uint32_t max_sr_exit_lat; // Worst case memory self refresh exit time, units of 100ns of ns (0.1us) + uint32_t max_sr_enter_exit_lat; // Worst case memory self refresh entry followed by immediate exit time, units of 100ns of ns (0.1us) + uint16_t dc_golden_table_offset; // point of struct of atom_dc_golden_table_vxx + uint16_t dc_golden_table_ver; + uint32_t reserved3[3]; +}; + +struct atom_dc_golden_table_v1 +{ + uint32_t aux_dphy_rx_control0_val; + uint32_t aux_dphy_tx_control_val; + uint32_t aux_dphy_rx_control1_val; + uint32_t dc_gpio_aux_ctrl_0_val; + uint32_t dc_gpio_aux_ctrl_1_val; + uint32_t dc_gpio_aux_ctrl_2_val; + uint32_t dc_gpio_aux_ctrl_3_val; + uint32_t dc_gpio_aux_ctrl_4_val; + uint32_t dc_gpio_aux_ctrl_5_val; + uint32_t reserved[23]; +}; enum dce_info_caps_def { diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 838a369c9ec3..0826625573dc 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -133,6 +133,78 @@ int smu_get_dpm_freq_range(struct smu_context *smu, return ret; } +static int smu_dpm_set_vcn_enable_locked(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret = 0; + + if (!smu->ppt_funcs->dpm_set_vcn_enable) + return 0; + + if (atomic_read(&power_gate->vcn_gated) ^ enable) + return 0; + + ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable); + if (!ret) + atomic_set(&power_gate->vcn_gated, !enable); + + return ret; +} + +static int smu_dpm_set_vcn_enable(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret = 0; + + mutex_lock(&power_gate->vcn_gate_lock); + + ret = smu_dpm_set_vcn_enable_locked(smu, enable); + + mutex_unlock(&power_gate->vcn_gate_lock); + + return ret; +} + +static int smu_dpm_set_jpeg_enable_locked(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret = 0; + + if (!smu->ppt_funcs->dpm_set_jpeg_enable) + return 0; + + if (atomic_read(&power_gate->jpeg_gated) ^ enable) + return 0; + + ret = smu->ppt_funcs->dpm_set_jpeg_enable(smu, enable); + if (!ret) + atomic_set(&power_gate->jpeg_gated, !enable); + + return ret; +} + +static int smu_dpm_set_jpeg_enable(struct smu_context *smu, + bool enable) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int ret = 0; + + mutex_lock(&power_gate->jpeg_gate_lock); + + ret = smu_dpm_set_jpeg_enable_locked(smu, enable); + + mutex_unlock(&power_gate->jpeg_gate_lock); + + return ret; +} + /** * smu_dpm_set_power_gate - power gate/ungate the specific IP block * @@ -353,6 +425,45 @@ static int smu_early_init(void *handle) return smu_set_funcs(adev); } +static int smu_set_default_dpm_table(struct smu_context *smu) +{ + struct smu_power_context *smu_power = &smu->smu_power; + struct smu_power_gate *power_gate = &smu_power->power_gate; + int vcn_gate, jpeg_gate; + int ret = 0; + + if (!smu->ppt_funcs->set_default_dpm_table) + return 0; + + mutex_lock(&power_gate->vcn_gate_lock); + mutex_lock(&power_gate->jpeg_gate_lock); + + vcn_gate = atomic_read(&power_gate->vcn_gated); + jpeg_gate = atomic_read(&power_gate->jpeg_gated); + + ret = smu_dpm_set_vcn_enable_locked(smu, true); + if (ret) + goto err0_out; + + ret = smu_dpm_set_jpeg_enable_locked(smu, true); + if (ret) + goto err1_out; + + ret = smu->ppt_funcs->set_default_dpm_table(smu); + if (ret) + dev_err(smu->adev->dev, + "Failed to setup default dpm clock tables!\n"); + + smu_dpm_set_jpeg_enable_locked(smu, !jpeg_gate); +err1_out: + smu_dpm_set_vcn_enable_locked(smu, !vcn_gate); +err0_out: + mutex_unlock(&power_gate->jpeg_gate_lock); + mutex_unlock(&power_gate->vcn_gate_lock); + + return ret; +} + static int smu_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -579,6 +690,10 @@ static int smu_smc_table_sw_init(struct smu_context *smu) if (ret) return ret; + ret = smu_i2c_init(smu, &smu->adev->pm.smu_i2c); + if (ret) + return ret; + return 0; } @@ -586,6 +701,8 @@ static int smu_smc_table_sw_fini(struct smu_context *smu) { int ret; + smu_i2c_fini(smu, &smu->adev->pm.smu_i2c); + ret = smu_free_memory_pool(smu); if (ret) return ret; @@ -643,6 +760,11 @@ static int smu_sw_init(void *handle) smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); + atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); + mutex_init(&smu->smu_power.power_gate.vcn_gate_lock); + mutex_init(&smu->smu_power.power_gate.jpeg_gate_lock); + smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; @@ -734,7 +856,7 @@ static int smu_smc_hw_setup(struct smu_context *smu) uint32_t pcie_gen = 0, pcie_width = 0; int ret; - if (smu_is_dpm_running(smu) && adev->in_suspend) { + if (adev->in_suspend && smu_is_dpm_running(smu)) { dev_info(adev->dev, "dpm has been enabled\n"); return 0; } @@ -844,10 +966,6 @@ static int smu_smc_hw_setup(struct smu_context *smu) return ret; } - ret = smu_i2c_init(smu, &adev->pm.smu_i2c); - if (ret) - return ret; - ret = smu_disable_umc_cdr_12gbps_workaround(smu); if (ret) { dev_err(adev->dev, "Workaround failed to disable UMC CDR feature on 12Gbps SKU!\n"); @@ -1046,8 +1164,6 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; int ret = 0; - smu_i2c_fini(smu, &adev->pm.smu_i2c); - cancel_work_sync(&smu->throttling_logging_work); ret = smu_disable_thermal_alert(smu); @@ -1590,6 +1706,9 @@ int smu_set_mp1_state(struct smu_context *smu, } ret = smu_send_smc_msg(smu, msg, NULL); + /* some asics may not support those messages */ + if (ret == -EINVAL) + ret = 0; if (ret) dev_err(smu->adev->dev, "[PrepareMp1] Failed!\n"); @@ -1944,6 +2063,10 @@ int smu_read_sensor(struct smu_context *smu, mutex_lock(&smu->mutex); + if (smu->ppt_funcs->read_sensor) + if (!smu->ppt_funcs->read_sensor(smu, sensor, data, size)) + goto unlock; + switch (sensor) { case AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK: *((uint32_t *)data) = pstate_table->gfxclk_pstate.standard * 100; @@ -1966,7 +2089,7 @@ int smu_read_sensor(struct smu_context *smu, *size = 4; break; case AMDGPU_PP_SENSOR_VCN_POWER_STATE: - *(uint32_t *)data = smu->smu_power.power_gate.vcn_gated ? 0 : 1; + *(uint32_t *)data = atomic_read(&smu->smu_power.power_gate.vcn_gated) ? 0: 1; *size = 4; break; case AMDGPU_PP_SENSOR_MIN_FAN_RPM: @@ -1974,11 +2097,12 @@ int smu_read_sensor(struct smu_context *smu, *size = 4; break; default: - if (smu->ppt_funcs->read_sensor) - ret = smu->ppt_funcs->read_sensor(smu, sensor, data, size); + *size = 0; + ret = -EOPNOTSUPP; break; } +unlock: mutex_unlock(&smu->mutex); return ret; diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 3b9182c8c53f..6c991de8f371 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1849,8 +1849,6 @@ static bool arcturus_is_dpm_running(struct smu_context *smu) static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -1861,7 +1859,6 @@ static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) return ret; } } - power_gate->vcn_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, 0); @@ -1870,7 +1867,6 @@ static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) return ret; } } - power_gate->vcn_gated = true; } return ret; @@ -2080,22 +2076,11 @@ static const struct i2c_algorithm arcturus_i2c_algo = { .functionality = arcturus_i2c_func, }; -static bool arcturus_i2c_adapter_is_added(struct i2c_adapter *control) -{ - struct amdgpu_device *adev = to_amdgpu_device(control); - - return control->dev.parent == &adev->pdev->dev; -} - static int arcturus_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; - /* smu_i2c_eeprom_init may be called twice in sriov */ - if (arcturus_i2c_adapter_is_added(control)) - return 0; - control->owner = THIS_MODULE; control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; @@ -2111,9 +2096,6 @@ static int arcturus_i2c_control_init(struct smu_context *smu, struct i2c_adapter static void arcturus_i2c_control_fini(struct smu_context *smu, struct i2c_adapter *control) { - if (!arcturus_i2c_adapter_is_added(control)) - return; - i2c_del_adapter(control); } diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 28312d6dc187..074458eb5407 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -292,8 +292,10 @@ struct smu_dpm_context { struct smu_power_gate { bool uvd_gated; bool vce_gated; - bool vcn_gated; - bool jpeg_gated; + atomic_t vcn_gated; + atomic_t jpeg_gated; + struct mutex vcn_gate_lock; + struct mutex jpeg_gate_lock; }; struct smu_power_context { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h index b2232e24d82f..aa2708fccb6d 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU11_DRIVER_IF_VERSION 0x33 +#define SMU11_DRIVER_IF_VERSION 0x34 #define PPTABLE_Sienna_Cichlid_SMU_VERSION 5 @@ -968,9 +968,15 @@ typedef struct { typedef struct { uint32_t CurrClock[PPCLK_COUNT]; - uint16_t AverageGfxclkFrequency; - uint16_t AverageFclkFrequency; - uint16_t AverageUclkFrequency ; + + uint16_t AverageGfxclkFrequencyPreDs; + uint16_t AverageGfxclkFrequencyPostDs; + uint16_t AverageFclkFrequencyPreDs; + uint16_t AverageFclkFrequencyPostDs; + uint16_t AverageUclkFrequencyPreDs ; + uint16_t AverageUclkFrequencyPostDs ; + + uint16_t AverageGfxActivity ; uint16_t AverageUclkActivity ; uint8_t CurrSocVoltageOffset ; @@ -988,6 +994,7 @@ typedef struct { uint16_t TemperatureLiquid0 ; uint16_t TemperatureLiquid1 ; uint16_t TemperaturePlx ; + uint16_t Padding16 ; uint32_t ThrottlerStatus ; uint8_t LinkDpmLevel; @@ -1006,8 +1013,10 @@ typedef struct { uint16_t AverageDclk0Frequency ; uint16_t AverageVclk1Frequency ; uint16_t AverageDclk1Frequency ; - uint16_t VcnActivityPercentage ; //place holder, David N. to provide full sequence - uint16_t padding16_2; + uint16_t VcnActivityPercentage ; //place holder, David N. to provide full sequence + uint8_t PcieRate ; + uint8_t PcieWidth ; + } SmuMetrics_t; typedef struct { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index 429f5aa8924a..6a42331aba8a 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -30,8 +30,8 @@ #define SMU11_DRIVER_IF_VERSION_NV10 0x36 #define SMU11_DRIVER_IF_VERSION_NV12 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x36 -#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x33 -#define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0x2 +#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x34 +#define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0x3 /* MP Apertures */ #define MP0_Public 0x03800000 diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 6aaf483858a0..9f62af9abd23 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -785,8 +785,6 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) static int navi10_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -796,14 +794,12 @@ static int navi10_dpm_set_vcn_enable(struct smu_context *smu, bool enable) if (ret) return ret; } - power_gate->vcn_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PowerDownVcn, NULL); if (ret) return ret; } - power_gate->vcn_gated = true; } return ret; @@ -811,8 +807,6 @@ static int navi10_dpm_set_vcn_enable(struct smu_context *smu, bool enable) static int navi10_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -821,14 +815,12 @@ static int navi10_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) if (ret) return ret; } - power_gate->jpeg_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) { ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PowerDownJpeg, NULL); if (ret) return ret; } - power_gate->jpeg_gated = true; } return ret; @@ -2457,22 +2449,11 @@ static const struct i2c_algorithm navi10_i2c_algo = { .functionality = navi10_i2c_func, }; -static bool navi10_i2c_adapter_is_added(struct i2c_adapter *control) -{ - struct amdgpu_device *adev = to_amdgpu_device(control); - - return control->dev.parent == &adev->pdev->dev; -} - static int navi10_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; - /* smu_i2c_eeprom_init may be called twice in sriov */ - if (navi10_i2c_adapter_is_added(control)) - return 0; - control->owner = THIS_MODULE; control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; @@ -2488,9 +2469,6 @@ static int navi10_i2c_control_init(struct smu_context *smu, struct i2c_adapter * static void navi10_i2c_control_fini(struct smu_context *smu, struct i2c_adapter *control) { - if (!navi10_i2c_adapter_is_added(control)) - return; - i2c_del_adapter(control); } diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 575ae4be98a2..dbb676c482fd 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -459,8 +459,6 @@ static enum amd_pm_state_type renoir_get_current_power_state(struct smu_context static int renoir_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -470,14 +468,12 @@ static int renoir_dpm_set_vcn_enable(struct smu_context *smu, bool enable) if (ret) return ret; } - power_gate->vcn_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PowerDownVcn, NULL); if (ret) return ret; } - power_gate->vcn_gated = true; } return ret; @@ -485,8 +481,6 @@ static int renoir_dpm_set_vcn_enable(struct smu_context *smu, bool enable) static int renoir_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -495,14 +489,12 @@ static int renoir_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) if (ret) return ret; } - power_gate->jpeg_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_JPEG_PG_BIT)) { ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerDownJpeg, 0, NULL); if (ret) return ret; } - power_gate->jpeg_gated = true; } return ret; diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c index 59da3ca2a4ca..3865dbed5f93 100644 --- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c @@ -70,14 +70,16 @@ FEATURE_MASK(FEATURE_DPM_FCLK_BIT) | \ FEATURE_MASK(FEATURE_DPM_DCEFCLK_BIT)) +#define SMU_11_0_7_GFX_BUSY_THRESHOLD 15 + static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), - MSG_MAP(SetAllowedFeaturesMaskLow, PPSMC_MSG_SetAllowedFeaturesMaskLow, 1), - MSG_MAP(SetAllowedFeaturesMaskHigh, PPSMC_MSG_SetAllowedFeaturesMaskHigh, 1), - MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 1), - MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 1), + MSG_MAP(SetAllowedFeaturesMaskLow, PPSMC_MSG_SetAllowedFeaturesMaskLow, 0), + MSG_MAP(SetAllowedFeaturesMaskHigh, PPSMC_MSG_SetAllowedFeaturesMaskHigh, 0), + MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 0), + MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), MSG_MAP(EnableSmuFeaturesLow, PPSMC_MSG_EnableSmuFeaturesLow, 1), MSG_MAP(EnableSmuFeaturesHigh, PPSMC_MSG_EnableSmuFeaturesHigh, 1), MSG_MAP(DisableSmuFeaturesLow, PPSMC_MSG_DisableSmuFeaturesLow, 1), @@ -85,42 +87,43 @@ static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetRunningSmuFeaturesLow, 1), MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetRunningSmuFeaturesHigh, 1), MSG_MAP(SetWorkloadMask, PPSMC_MSG_SetWorkloadMask, 1), - MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 1), - MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), - MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 1), - MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 1), - MSG_MAP(SetToolsDramAddrLow, PPSMC_MSG_SetToolsDramAddrLow, 1), - MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram, 1), - MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu, 1), - MSG_MAP(UseDefaultPPTable, PPSMC_MSG_UseDefaultPPTable, 1), - MSG_MAP(EnterBaco, PPSMC_MSG_EnterBaco, 1), - MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 1), - MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 1), + MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), + MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 0), + MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 0), + MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 0), + MSG_MAP(SetToolsDramAddrLow, PPSMC_MSG_SetToolsDramAddrLow, 0), + MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram, 0), + MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu, 0), + MSG_MAP(UseDefaultPPTable, PPSMC_MSG_UseDefaultPPTable, 0), + MSG_MAP(EnterBaco, PPSMC_MSG_EnterBaco, 0), + MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 0), + MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 0), MSG_MAP(SetHardMinByFreq, PPSMC_MSG_SetHardMinByFreq, 1), - MSG_MAP(SetHardMaxByFreq, PPSMC_MSG_SetHardMaxByFreq, 1), + MSG_MAP(SetHardMaxByFreq, PPSMC_MSG_SetHardMaxByFreq, 0), MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1), MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1), MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), - MSG_MAP(SetGeminiMode, PPSMC_MSG_SetGeminiMode, 1), - MSG_MAP(SetGeminiApertureHigh, PPSMC_MSG_SetGeminiApertureHigh, 1), - MSG_MAP(SetGeminiApertureLow, PPSMC_MSG_SetGeminiApertureLow, 1), - MSG_MAP(OverridePcieParameters, PPSMC_MSG_OverridePcieParameters, 1), - MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 1), - MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 1), - MSG_MAP(SetUclkFastSwitch, PPSMC_MSG_SetUclkFastSwitch, 1), - MSG_MAP(SetVideoFps, PPSMC_MSG_SetVideoFps, 1), + MSG_MAP(SetGeminiMode, PPSMC_MSG_SetGeminiMode, 0), + MSG_MAP(SetGeminiApertureHigh, PPSMC_MSG_SetGeminiApertureHigh, 0), + MSG_MAP(SetGeminiApertureLow, PPSMC_MSG_SetGeminiApertureLow, 0), + MSG_MAP(OverridePcieParameters, PPSMC_MSG_OverridePcieParameters, 0), + MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 0), + MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0), + MSG_MAP(SetUclkFastSwitch, PPSMC_MSG_SetUclkFastSwitch, 0), + MSG_MAP(SetVideoFps, PPSMC_MSG_SetVideoFps, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 1), - MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1), - MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1), - MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), + MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 0), + MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 0), + MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0), MSG_MAP(GetDcModeMaxDpmFreq, PPSMC_MSG_GetDcModeMaxDpmFreq, 1), - MSG_MAP(ExitBaco, PPSMC_MSG_ExitBaco, 1), - MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 1), - MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1), - MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 1), - MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1), - MSG_MAP(BacoAudioD3PME, PPSMC_MSG_BacoAudioD3PME, 1), - MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 1), + MSG_MAP(ExitBaco, PPSMC_MSG_ExitBaco, 0), + MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 0), + MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 0), + MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 0), + MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 0), + MSG_MAP(BacoAudioD3PME, PPSMC_MSG_BacoAudioD3PME, 0), + MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), }; static struct cmn2asic_mapping sienna_cichlid_clk_map[SMU_CLK_COUNT] = { @@ -442,13 +445,16 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, *value = metrics->CurrClock[PPCLK_DCEFCLK]; break; case METRICS_AVERAGE_GFXCLK: - *value = metrics->AverageGfxclkFrequency; + if (metrics->AverageGfxActivity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) + *value = metrics->AverageGfxclkFrequencyPostDs; + else + *value = metrics->AverageGfxclkFrequencyPreDs; break; case METRICS_AVERAGE_FCLK: - *value = metrics->AverageFclkFrequency; + *value = metrics->AverageFclkFrequencyPostDs; break; case METRICS_AVERAGE_UCLK: - *value = metrics->AverageUclkFrequency; + *value = metrics->AverageUclkFrequencyPostDs; break; case METRICS_AVERAGE_GFXACTIVITY: *value = metrics->AverageGfxActivity; @@ -760,10 +766,7 @@ static int sienna_cichlid_set_default_dpm_table(struct smu_context *smu) static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; struct amdgpu_device *adev = smu->adev; - int ret = 0; if (enable) { @@ -779,7 +782,6 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl return ret; } } - power_gate->vcn_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerDownVcn, 0, NULL); @@ -792,7 +794,6 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl return ret; } } - power_gate->vcn_gated = true; } return ret; @@ -800,8 +801,6 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl static int sienna_cichlid_dpm_set_jpeg_enable(struct smu_context *smu, bool enable) { - struct smu_power_context *smu_power = &smu->smu_power; - struct smu_power_gate *power_gate = &smu_power->power_gate; int ret = 0; if (enable) { @@ -810,14 +809,12 @@ static int sienna_cichlid_dpm_set_jpeg_enable(struct smu_context *smu, bool enab if (ret) return ret; } - power_gate->jpeg_gated = false; } else { if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_PowerDownJpeg, 0, NULL); if (ret) return ret; } - power_gate->jpeg_gated = true; } return ret; @@ -2624,22 +2621,11 @@ static const struct i2c_algorithm sienna_cichlid_i2c_algo = { .functionality = sienna_cichlid_i2c_func, }; -static bool sienna_cichlid_i2c_adapter_is_added(struct i2c_adapter *control) -{ - struct amdgpu_device *adev = to_amdgpu_device(control); - - return control->dev.parent == &adev->pdev->dev; -} - static int sienna_cichlid_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; - /* smu_i2c_eeprom_init may be called twice in sriov */ - if (sienna_cichlid_i2c_adapter_is_added(control)) - return 0; - control->owner = THIS_MODULE; control->class = I2C_CLASS_SPD; control->dev.parent = &adev->pdev->dev; @@ -2655,9 +2641,6 @@ static int sienna_cichlid_i2c_control_init(struct smu_context *smu, struct i2c_a static void sienna_cichlid_i2c_control_fini(struct smu_context *smu, struct i2c_adapter *control) { - if (!sienna_cichlid_i2c_adapter_is_added(control)) - return; - i2c_del_adapter(control); } diff --git a/drivers/gpu/drm/amd/powerplay/smu_cmn.c b/drivers/gpu/drm/amd/powerplay/smu_cmn.c index be4b678d0e60..5c23c44c33bd 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_cmn.c +++ b/drivers/gpu/drm/amd/powerplay/smu_cmn.c @@ -166,7 +166,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, switch (type) { case CMN2ASIC_MAPPING_MSG: - if (index > SMU_MSG_MAX_COUNT || + if (index >= SMU_MSG_MAX_COUNT || !smu->message_map) return -EINVAL; @@ -181,7 +181,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, return msg_mapping.map_to; case CMN2ASIC_MAPPING_CLK: - if (index > SMU_CLK_COUNT || + if (index >= SMU_CLK_COUNT || !smu->clock_map) return -EINVAL; @@ -192,7 +192,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, return mapping.map_to; case CMN2ASIC_MAPPING_FEATURE: - if (index > SMU_FEATURE_COUNT || + if (index >= SMU_FEATURE_COUNT || !smu->feature_map) return -EINVAL; @@ -203,7 +203,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, return mapping.map_to; case CMN2ASIC_MAPPING_TABLE: - if (index > SMU_TABLE_COUNT || + if (index >= SMU_TABLE_COUNT || !smu->table_map) return -EINVAL; @@ -214,7 +214,7 @@ int smu_cmn_to_asic_specific_index(struct smu_context *smu, return mapping.map_to; case CMN2ASIC_MAPPING_PWR: - if (index > SMU_POWER_SOURCE_COUNT || + if (index >= SMU_POWER_SOURCE_COUNT || !smu->pwr_src_map) return -EINVAL; diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index d0deaefd3feb..264073d4e263 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -60,7 +60,6 @@ #define smu_disable_all_features_with_exception(smu, mask) smu_ppt_funcs(disable_all_features_with_exception, 0, smu, mask) #define smu_is_dpm_running(smu) smu_ppt_funcs(is_dpm_running, 0 , smu) #define smu_notify_display_change(smu) smu_ppt_funcs(notify_display_change, 0, smu) -#define smu_set_default_dpm_table(smu) smu_ppt_funcs(set_default_dpm_table, 0, smu) #define smu_populate_umd_state_clk(smu) smu_ppt_funcs(populate_umd_state_clk, 0, smu) #define smu_set_default_od8_settings(smu) smu_ppt_funcs(set_default_od8_settings, 0, smu) #define smu_enable_thermal_alert(smu) smu_ppt_funcs(enable_thermal_alert, 0, smu) @@ -77,8 +76,6 @@ #define smu_get_dal_power_level(smu, clocks) smu_ppt_funcs(get_dal_power_level, 0, smu, clocks) #define smu_get_perf_level(smu, designation, level) smu_ppt_funcs(get_perf_level, 0, smu, designation, level) #define smu_get_current_shallow_sleep_clocks(smu, clocks) smu_ppt_funcs(get_current_shallow_sleep_clocks, 0, smu, clocks) -#define smu_dpm_set_vcn_enable(smu, enable) smu_ppt_funcs(dpm_set_vcn_enable, 0, smu, enable) -#define smu_dpm_set_jpeg_enable(smu, enable) smu_ppt_funcs(dpm_set_jpeg_enable, 0, smu, enable) #define smu_set_watermarks_table(smu, clock_ranges) smu_ppt_funcs(set_watermarks_table, 0, smu, clock_ranges) #define smu_thermal_temperature_range_update(smu, range, rw) smu_ppt_funcs(thermal_temperature_range_update, 0, smu, range, rw) #define smu_register_irq_handler(smu) smu_ppt_funcs(register_irq_handler, 0, smu) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index fd82402065e6..7b950a582a28 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1029,6 +1029,7 @@ int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return 0; if (enable) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 02159ca29fa2..c18169aa59ce 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2725,7 +2725,10 @@ static int ci_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool ci_is_dpm_running(struct pp_hwmgr *hwmgr) { - return ci_is_smc_ram_running(hwmgr); + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, + VOLTAGE_CONTROLLER_ON)) + ? true : false; } static int ci_smu_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 09b32289497e..b23cb2fec3f3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -4308,11 +4308,11 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, { int ret; - port = drm_dp_mst_topology_get_port_validated(mgr, port); - if (!port) + if (slots < 0) return false; - if (slots < 0) + port = drm_dp_mst_topology_get_port_validated(mgr, port); + if (!port) return false; if (port->vcpi.vcpi > 0) { @@ -4328,6 +4328,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, if (ret) { DRM_DEBUG_KMS("failed to init vcpi slots=%d max=63 ret=%d\n", DIV_ROUND_UP(pbn, mgr->pbn_div), ret); + drm_dp_mst_topology_put_port(port); goto out; } DRM_DEBUG_KMS("initing vcpi for pbn=%d slots=%d\n", diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index bc38322f306e..13068fdf4331 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -815,8 +815,7 @@ static void drm_dev_release(struct kref *ref) drm_managed_release(dev); - if (dev->managed.final_kfree) - kfree(dev->managed.final_kfree); + kfree(dev->managed.final_kfree); } /** diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index d4e7c8370565..19d73868490e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -879,6 +879,9 @@ err: * @file_priv: drm file-private structure * * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. */ int drm_gem_open_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index d00ea384dcbf..58f5dc2f6dd5 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -121,6 +121,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T101HA"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Asus T103HAF */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T103HAF"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* GPD MicroPC (generic strings, also match on bios date) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index e946032b13e4..2c2bf24140c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -469,7 +469,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) locked = 1; } ret = pin_user_pages_remote - (work->task, mm, + (mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, flags, diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c index 6639ee9b05d3..48593932bddf 100644 --- a/drivers/gpu/drm/omapdrm/dss/dispc.c +++ b/drivers/gpu/drm/omapdrm/dss/dispc.c @@ -4915,6 +4915,7 @@ static int dispc_runtime_resume(struct device *dev) static const struct dev_pm_ops dispc_pm_ops = { .runtime_suspend = dispc_runtime_suspend, .runtime_resume = dispc_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; struct platform_driver omap_dispchw_driver = { diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 79ddfbfd1b58..eeccf40bae41 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5467,6 +5467,7 @@ static int dsi_runtime_resume(struct device *dev) static const struct dev_pm_ops dsi_pm_ops = { .runtime_suspend = dsi_runtime_suspend, .runtime_resume = dsi_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; struct platform_driver omap_dsihw_driver = { diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index 4d5739fa4a5d..6ccbc29c4ce4 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1614,6 +1614,7 @@ static int dss_runtime_resume(struct device *dev) static const struct dev_pm_ops dss_pm_ops = { .runtime_suspend = dss_runtime_suspend, .runtime_resume = dss_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; struct platform_driver omap_dsshw_driver = { diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c index 4406ce2a08b4..e0817934ee16 100644 --- a/drivers/gpu/drm/omapdrm/dss/venc.c +++ b/drivers/gpu/drm/omapdrm/dss/venc.c @@ -903,6 +903,7 @@ static int venc_runtime_resume(struct device *dev) static const struct dev_pm_ops venc_pm_ops = { .runtime_suspend = venc_runtime_suspend, .runtime_resume = venc_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; static const struct of_device_id venc_of_match[] = { diff --git a/drivers/gpu/drm/omapdrm/omap_connector.c b/drivers/gpu/drm/omapdrm/omap_connector.c index 528764566b17..de95dc1b861c 100644 --- a/drivers/gpu/drm/omapdrm/omap_connector.c +++ b/drivers/gpu/drm/omapdrm/omap_connector.c @@ -89,7 +89,7 @@ static enum drm_mode_status omap_connector_mode_valid(struct drm_connector *conn struct drm_display_mode *mode) { struct omap_connector *omap_connector = to_omap_connector(connector); - struct drm_display_mode new_mode = { { 0 } }; + struct drm_display_mode new_mode = {}; enum drm_mode_status status; status = omap_connector_mode_fixup(omap_connector->output, mode, diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c index 808c8af58fd5..09485c7f0d6f 100644 --- a/drivers/gpu/drm/tidss/tidss_kms.c +++ b/drivers/gpu/drm/tidss/tidss_kms.c @@ -154,7 +154,7 @@ static int tidss_dispc_modeset_init(struct tidss_device *tidss) break; case DISPC_VP_DPI: enc_type = DRM_MODE_ENCODER_DPI; - conn_type = DRM_MODE_CONNECTOR_LVDS; + conn_type = DRM_MODE_CONNECTOR_DPI; break; default: WARN_ON(1); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f297fd5e02d4..cc6a4e7551e3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -287,11 +287,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, */ if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) { - bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED); - - ret = ttm_tt_create(bo, zero); - if (ret) - goto out_err; + if (bo->ttm == NULL) { + bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED); + ret = ttm_tt_create(bo, zero); + if (ret) + goto out_err; + } ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement); if (ret) @@ -652,8 +653,13 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, placement.num_busy_placement = 0; bdev->driver->evict_flags(bo, &placement); - if (!placement.num_placement && !placement.num_busy_placement) - return ttm_bo_pipeline_gutting(bo); + if (!placement.num_placement && !placement.num_busy_placement) { + ret = ttm_bo_pipeline_gutting(bo); + if (ret) + return ret; + + return ttm_tt_create(bo, false); + } evict_mem = bo->mem; evict_mem.mm_node = NULL; @@ -1192,8 +1198,13 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, /* * Remove the backing store if no placement is given. */ - if (!placement->num_placement && !placement->num_busy_placement) - return ttm_bo_pipeline_gutting(bo); + if (!placement->num_placement && !placement->num_busy_placement) { + ret = ttm_bo_pipeline_gutting(bo); + if (ret) + return ret; + + return ttm_tt_create(bo, false); + } /* * Check whether we need to move buffer. @@ -1210,6 +1221,14 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, ttm_flag_masked(&bo->mem.placement, new_flags, ~TTM_PL_MASK_MEMTYPE); } + /* + * We might need to add a TTM. + */ + if (bo->mem.mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { + ret = ttm_tt_create(bo, true); + if (ret) + return ret; + } return 0; } EXPORT_SYMBOL(ttm_bo_validate); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 7fb3e0bcbab4..e6c8bd254055 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -531,15 +531,12 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, .interruptible = false, .no_wait_gpu = false }; - struct ttm_tt *ttm; + struct ttm_tt *ttm = bo->ttm; pgprot_t prot; int ret; - ret = ttm_tt_create(bo, true); - if (ret) - return ret; + BUG_ON(!ttm); - ttm = bo->ttm; ret = ttm_tt_populate(ttm, &ctx); if (ret) return ret; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index d7a6537dd6ee..33526c5df0e8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -351,11 +351,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, }; - if (ttm_tt_create(bo, true)) { - ret = VM_FAULT_OOM; - goto out_io_unlock; - } - ttm = bo->ttm; if (ttm_tt_populate(bo->ttm, &ctx)) { ret = VM_FAULT_OOM; @@ -510,8 +505,10 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { - unsigned long offset = (addr) - vma->vm_start; struct ttm_buffer_object *bo = vma->vm_private_data; + unsigned long offset = (addr) - vma->vm_start + + ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) + << PAGE_SHIFT); int ret; if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->num_pages) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 9d1c7177384c..3437711ddb43 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -50,9 +50,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) dma_resv_assert_held(bo->base.resv); - if (bo->ttm) - return 0; - if (bdev->need_dma32) page_flags |= TTM_PAGE_FLAG_DMA32; @@ -70,6 +67,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) page_flags |= TTM_PAGE_FLAG_SG; break; default: + bo->ttm = NULL; pr_err("Illegal buffer object type\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 4284c4bd444d..e67e2e8f6e6f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3037,7 +3037,7 @@ static int vmw_cmd_dx_bind_streamoutput(struct vmw_private *dev_priv, res = vmw_dx_streamoutput_lookup(vmw_context_res_man(ctx_node->ctx), cmd->body.soid); if (IS_ERR(res)) { - DRM_ERROR("Cound not find streamoutput to bind.\n"); + DRM_ERROR("Could not find streamoutput to bind.\n"); return PTR_ERR(res); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index bbce45d142aa..312ed0881a99 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -186,7 +186,7 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, /* TODO handle none page aligned offsets */ /* TODO handle more dst & src != 0 */ /* TODO handle more then one copy */ - DRM_ERROR("Cant snoop dma request for cursor!\n"); + DRM_ERROR("Can't snoop dma request for cursor!\n"); DRM_ERROR("(%u, %u, %u) (%u, %u, %u) (%ux%ux%u) %u %u\n", box->srcx, box->srcy, box->srcz, box->x, box->y, box->z, @@ -2575,7 +2575,7 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, ++i; } - if (i != unit) { + if (&con->head == &dev_priv->dev->mode_config.connector_list) { DRM_ERROR("Could not find initial display unit.\n"); ret = -EINVAL; goto out_unlock; @@ -2599,13 +2599,13 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, break; } - if (mode->type & DRM_MODE_TYPE_PREFERRED) - *p_mode = mode; - else { + if (&mode->head == &con->modes) { WARN_ONCE(true, "Could not find initial preferred mode.\n"); *p_mode = list_first_entry(&con->modes, struct drm_display_mode, head); + } else { + *p_mode = mode; } out_unlock: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 16dafff5cab1..c4017c7a24db 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -81,7 +81,7 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv) struct vmw_legacy_display_unit *entry; struct drm_framebuffer *fb = NULL; struct drm_crtc *crtc = NULL; - int i = 0; + int i; /* If there is no display topology the host just assumes * that the guest will set the same layout as the host. @@ -92,12 +92,11 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv) crtc = &entry->base.crtc; w = max(w, crtc->x + crtc->mode.hdisplay); h = max(h, crtc->y + crtc->mode.vdisplay); - i++; } if (crtc == NULL) return 0; - fb = entry->base.crtc.primary->state->fb; + fb = crtc->primary->state->fb; return vmw_kms_write_svga(dev_priv, w, h, fb->pitches[0], fb->format->cpp[0] * 8, @@ -388,8 +387,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) ldu->base.is_implicit = true; /* Initialize primary plane */ - vmw_du_plane_reset(primary); - ret = drm_universal_plane_init(dev, &ldu->base.primary, 0, &vmw_ldu_plane_funcs, vmw_primary_plane_formats, @@ -403,8 +400,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_helper_add(primary, &vmw_ldu_primary_plane_helper_funcs); /* Initialize cursor plane */ - vmw_du_plane_reset(cursor); - ret = drm_universal_plane_init(dev, &ldu->base.cursor, 0, &vmw_ldu_cursor_funcs, vmw_cursor_plane_formats, @@ -418,7 +413,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_helper_add(cursor, &vmw_ldu_cursor_plane_helper_funcs); - vmw_du_connector_reset(connector); ret = drm_connector_init(dev, connector, &vmw_legacy_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); if (ret) { @@ -446,7 +440,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) goto err_free_encoder; } - vmw_du_crtc_reset(crtc); ret = drm_crtc_init_with_planes(dev, crtc, &ldu->base.primary, &ldu->base.cursor, &vmw_legacy_crtc_funcs, NULL); @@ -521,6 +514,8 @@ int vmw_kms_ldu_init_display(struct vmw_private *dev_priv) dev_priv->active_display_unit = vmw_du_legacy; + drm_mode_config_reset(dev); + DRM_INFO("Legacy Display Unit initialized\n"); return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 32a22e4eddb1..4bf0f5ec4fc2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -859,8 +859,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) sou->base.is_implicit = false; /* Initialize primary plane */ - vmw_du_plane_reset(primary); - ret = drm_universal_plane_init(dev, &sou->base.primary, 0, &vmw_sou_plane_funcs, vmw_primary_plane_formats, @@ -875,8 +873,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_enable_fb_damage_clips(primary); /* Initialize cursor plane */ - vmw_du_plane_reset(cursor); - ret = drm_universal_plane_init(dev, &sou->base.cursor, 0, &vmw_sou_cursor_funcs, vmw_cursor_plane_formats, @@ -890,7 +886,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_helper_add(cursor, &vmw_sou_cursor_plane_helper_funcs); - vmw_du_connector_reset(connector); ret = drm_connector_init(dev, connector, &vmw_sou_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); if (ret) { @@ -918,8 +913,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) goto err_free_encoder; } - - vmw_du_crtc_reset(crtc); ret = drm_crtc_init_with_planes(dev, crtc, &sou->base.primary, &sou->base.cursor, &vmw_screen_object_crtc_funcs, NULL); @@ -973,6 +966,8 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) dev_priv->active_display_unit = vmw_du_screen_object; + drm_mode_config_reset(dev); + DRM_INFO("Screen Objects Display Unit initialized\n"); return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 16b385629688..cf3aafd00837 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1738,8 +1738,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) stdu->base.is_implicit = false; /* Initialize primary plane */ - vmw_du_plane_reset(primary); - ret = drm_universal_plane_init(dev, primary, 0, &vmw_stdu_plane_funcs, vmw_primary_plane_formats, @@ -1754,8 +1752,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_enable_fb_damage_clips(primary); /* Initialize cursor plane */ - vmw_du_plane_reset(cursor); - ret = drm_universal_plane_init(dev, cursor, 0, &vmw_stdu_cursor_funcs, vmw_cursor_plane_formats, @@ -1769,8 +1765,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) drm_plane_helper_add(cursor, &vmw_stdu_cursor_plane_helper_funcs); - vmw_du_connector_reset(connector); - ret = drm_connector_init(dev, connector, &vmw_stdu_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); if (ret) { @@ -1798,7 +1792,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) goto err_free_encoder; } - vmw_du_crtc_reset(crtc); ret = drm_crtc_init_with_planes(dev, crtc, &stdu->base.primary, &stdu->base.cursor, &vmw_stdu_crtc_funcs, NULL); @@ -1894,6 +1887,8 @@ int vmw_kms_stdu_init_display(struct vmw_private *dev_priv) } } + drm_mode_config_reset(dev); + DRM_INFO("Screen Target Display device initialized\n"); return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 126f93c0b0b8..3914bfee0533 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -1969,7 +1969,7 @@ static int vmw_surface_dirty_alloc(struct vmw_resource *res) num_mip = 1; num_subres = num_layers * num_mip; - dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]); + dirty_size = struct_size(dirty, boxes, num_subres); acc_size = ttm_round_pot(dirty_size); ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv), acc_size, &ctx); diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 3e660fb111b3..013c9e0e412c 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -157,7 +157,8 @@ int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline, int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, u64 dbuf_cookie, u32 width, u32 height, - u32 bpp, u64 size, struct page **pages) + u32 bpp, u64 size, u32 offset, + struct page **pages) { struct xen_drm_front_evtchnl *evtchnl; struct xen_drm_front_dbuf *dbuf; @@ -194,6 +195,7 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, req->op.dbuf_create.gref_directory = xen_front_pgdir_shbuf_get_dir_start(&dbuf->shbuf); req->op.dbuf_create.buffer_sz = size; + req->op.dbuf_create.data_ofs = offset; req->op.dbuf_create.dbuf_cookie = dbuf_cookie; req->op.dbuf_create.width = width; req->op.dbuf_create.height = height; @@ -400,15 +402,15 @@ static int xen_drm_drv_dumb_create(struct drm_file *filp, args->size = args->pitch * args->height; obj = xen_drm_front_gem_create(dev, args->size); - if (IS_ERR_OR_NULL(obj)) { - ret = PTR_ERR_OR_ZERO(obj); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); goto fail; } ret = xen_drm_front_dbuf_create(drm_info->front_info, xen_drm_front_dbuf_to_cookie(obj), args->width, args->height, args->bpp, - args->size, + args->size, 0, xen_drm_front_gem_get_pages(obj)); if (ret) goto fail_backend; diff --git a/drivers/gpu/drm/xen/xen_drm_front.h b/drivers/gpu/drm/xen/xen_drm_front.h index f92c258350ca..54486d89650e 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.h +++ b/drivers/gpu/drm/xen/xen_drm_front.h @@ -145,7 +145,7 @@ int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline, int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, u64 dbuf_cookie, u32 width, u32 height, - u32 bpp, u64 size, struct page **pages); + u32 bpp, u64 size, u32 offset, struct page **pages); int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info, u64 dbuf_cookie, u64 fb_cookie, u32 width, diff --git a/drivers/gpu/drm/xen/xen_drm_front_conn.c b/drivers/gpu/drm/xen/xen_drm_front_conn.c index 459702fa990e..44f1f70c0aed 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_conn.c +++ b/drivers/gpu/drm/xen/xen_drm_front_conn.c @@ -33,6 +33,7 @@ static const u32 plane_formats[] = { DRM_FORMAT_ARGB4444, DRM_FORMAT_XRGB1555, DRM_FORMAT_ARGB1555, + DRM_FORMAT_YUYV, }; const u32 *xen_drm_front_conn_get_formats(int *format_count) diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index f0b85e094111..39ff95b75357 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -83,7 +83,7 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) size = round_up(size, PAGE_SIZE); xen_obj = gem_create_obj(dev, size); - if (IS_ERR_OR_NULL(xen_obj)) + if (IS_ERR(xen_obj)) return xen_obj; if (drm_info->front_info->cfg.be_alloc) { @@ -117,7 +117,7 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) */ xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); xen_obj->pages = drm_gem_get_pages(&xen_obj->base); - if (IS_ERR_OR_NULL(xen_obj->pages)) { + if (IS_ERR(xen_obj->pages)) { ret = PTR_ERR(xen_obj->pages); xen_obj->pages = NULL; goto fail; @@ -136,7 +136,7 @@ struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev, struct xen_gem_object *xen_obj; xen_obj = gem_create(dev, size); - if (IS_ERR_OR_NULL(xen_obj)) + if (IS_ERR(xen_obj)) return ERR_CAST(xen_obj); return &xen_obj->base; @@ -194,7 +194,7 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, size = attach->dmabuf->size; xen_obj = gem_create_obj(dev, size); - if (IS_ERR_OR_NULL(xen_obj)) + if (IS_ERR(xen_obj)) return ERR_CAST(xen_obj); ret = gem_alloc_pages_array(xen_obj, size); @@ -210,7 +210,8 @@ xen_drm_front_gem_import_sg_table(struct drm_device *dev, ret = xen_drm_front_dbuf_create(drm_info->front_info, xen_drm_front_dbuf_to_cookie(&xen_obj->base), - 0, 0, 0, size, xen_obj->pages); + 0, 0, 0, size, sgt->sgl->offset, + xen_obj->pages); if (ret < 0) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c index 78096bbcd226..ef11b1e4de39 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_kms.c +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c @@ -60,7 +60,7 @@ fb_create(struct drm_device *dev, struct drm_file *filp, int ret; fb = drm_gem_fb_create_with_funcs(dev, filp, mode_cmd, &fb_funcs); - if (IS_ERR_OR_NULL(fb)) + if (IS_ERR(fb)) return fb; gem_obj = fb->obj[0]; diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 821f7a71e182..99158ee67d02 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -44,7 +44,7 @@ MODULE_PARM_DESC(aux_timeout_ms, "DP aux timeout value in msec (default: 50)"); */ static uint zynqmp_dp_power_on_delay_ms = 4; module_param_named(power_on_delay_ms, zynqmp_dp_power_on_delay_ms, uint, 0444); -MODULE_PARM_DESC(aux_timeout_ms, "DP power on delay in msec (default: 4)"); +MODULE_PARM_DESC(power_on_delay_ms, "DP power on delay in msec (default: 4)"); /* Link configuration registers */ #define ZYNQMP_DP_LINK_BW_SET 0x0 @@ -567,34 +567,37 @@ static int zynqmp_dp_mode_configure(struct zynqmp_dp *dp, int pclock, u8 current_bw) { int max_rate = dp->link_config.max_rate; - u8 bws[3] = { DP_LINK_BW_1_62, DP_LINK_BW_2_7, DP_LINK_BW_5_4 }; + u8 bw_code; u8 max_lanes = dp->link_config.max_lanes; u8 max_link_rate_code = drm_dp_link_rate_to_bw_code(max_rate); u8 bpp = dp->config.bpp; u8 lane_cnt; - s8 i; - if (current_bw == DP_LINK_BW_1_62) { + /* Downshift from current bandwidth */ + switch (current_bw) { + case DP_LINK_BW_5_4: + bw_code = DP_LINK_BW_2_7; + break; + case DP_LINK_BW_2_7: + bw_code = DP_LINK_BW_1_62; + break; + case DP_LINK_BW_1_62: dev_err(dp->dev, "can't downshift. already lowest link rate\n"); return -EINVAL; - } - - for (i = ARRAY_SIZE(bws) - 1; i >= 0; i--) { - if (current_bw && bws[i] >= current_bw) - continue; - - if (bws[i] <= max_link_rate_code) - break; + default: + /* If not given, start with max supported */ + bw_code = max_link_rate_code; + break; } for (lane_cnt = 1; lane_cnt <= max_lanes; lane_cnt <<= 1) { int bw; u32 rate; - bw = drm_dp_bw_code_to_link_rate(bws[i]); + bw = drm_dp_bw_code_to_link_rate(bw_code); rate = zynqmp_dp_max_rate(bw, lane_cnt, bpp); if (pclock <= rate) { - dp->mode.bw_code = bws[i]; + dp->mode.bw_code = bw_code; dp->mode.lane_cnt = lane_cnt; dp->mode.pclock = pclock; return dp->mode.bw_code; @@ -1308,7 +1311,7 @@ zynqmp_dp_connector_detect(struct drm_connector *connector, bool force) ret = drm_dp_dpcd_read(&dp->aux, 0x0, dp->dpcd, sizeof(dp->dpcd)); if (ret < 0) { - dev_dbg(dp->dev, "DPCD read failes"); + dev_dbg(dp->dev, "DPCD read failed"); goto disconnected; } diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index f2f3ef8af271..5180c5687ee5 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -529,7 +529,7 @@ EXPORT_SYMBOL(vga_get); * * 0 on success, negative error code on failure. */ -int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) +static int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { struct vga_device *vgadev; unsigned long flags; @@ -554,7 +554,6 @@ bail: spin_unlock_irqrestore(&vga_lock, flags); return rc; } -EXPORT_SYMBOL(vga_tryget); /** * vga_put - release lock on legacy VGA resources diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index b50081cacf04..910b6e90866c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -86,6 +86,10 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val, struct die_args *die = (struct die_args *)args; struct pt_regs *regs = die->regs; + /* Don't notify Hyper-V if the die event is other than oops */ + if (val != DIE_OOPS) + return NOTIFY_DONE; + /* * Hyper-V should be notified only once about a panic. If we will be * doing hyperv_report_panic_msg() later with kmsg data, don't do diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 388978775be0..710fbef9a9c2 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -542,8 +542,8 @@ int i2c_pca_add_numbered_bus(struct i2c_adapter *adap) } EXPORT_SYMBOL(i2c_pca_add_numbered_bus); -MODULE_AUTHOR("Ian Campbell <[email protected]>, " - "Wolfram Sang <[email protected]>"); +MODULE_AUTHOR("Ian Campbell <[email protected]>"); +MODULE_AUTHOR("Wolfram Sang <[email protected]>"); MODULE_DESCRIPTION("I2C-Bus PCA9564/PCA9665 algorithm"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 88639e52c73a..293e7a0760e7 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -146,6 +146,7 @@ config I2C_I801 Elkhart Lake (PCH) Tiger Lake (PCH) Jasper Lake (SOC) + Emmitsburg (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index a43deea390f5..fb93152845f4 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -519,9 +519,9 @@ static struct pci_driver ali1535_driver = { module_pci_driver(ali1535_driver); -MODULE_AUTHOR("Frodo Looijaard <[email protected]>, " - "Philip Edelbrock <[email protected]>, " - "Mark D. Studebaker <[email protected]> " - "and Dan Eaton <[email protected]>"); +MODULE_AUTHOR("Frodo Looijaard <[email protected]>"); +MODULE_AUTHOR("Philip Edelbrock <[email protected]>"); +MODULE_AUTHOR("Mark D. Studebaker <[email protected]>"); +MODULE_AUTHOR("Dan Eaton <[email protected]>"); MODULE_DESCRIPTION("ALI1535 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 02185a1cfa77..cc58feacd082 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -502,8 +502,8 @@ static struct pci_driver ali15x3_driver = { module_pci_driver(ali15x3_driver); -MODULE_AUTHOR ("Frodo Looijaard <[email protected]>, " - "Philip Edelbrock <[email protected]>, " - "and Mark D. Studebaker <[email protected]>"); +MODULE_AUTHOR("Frodo Looijaard <[email protected]>"); +MODULE_AUTHOR("Philip Edelbrock <[email protected]>"); +MODULE_AUTHOR("Mark D. Studebaker <[email protected]>"); MODULE_DESCRIPTION("ALI15X3 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c index 2b14fef5bf26..34862ad3423e 100644 --- a/drivers/i2c/busses/i2c-amd8111.c +++ b/drivers/i2c/busses/i2c-amd8111.c @@ -381,7 +381,7 @@ static s32 amd8111_access(struct i2c_adapter * adap, u16 addr, if (status) return status; len = min_t(u8, len, I2C_SMBUS_BLOCK_MAX); - /* fall through */ + fallthrough; case I2C_SMBUS_I2C_BLOCK_DATA: for (i = 0; i < len; i++) { status = amd_ec_read(smbus, AMD_SMB_DATA + i, diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index f51702d86a90..31268074c422 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -504,7 +504,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) goto error_and_stop; } irq_handled |= ASPEED_I2CD_INTR_TX_ACK; - /* fall through */ + fallthrough; case ASPEED_I2C_MASTER_TX_FIRST: if (bus->buf_index < msg->len) { bus->master_state = ASPEED_I2C_MASTER_TX; @@ -520,7 +520,7 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status) /* RX may not have completed yet (only address cycle) */ if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE)) goto out_no_complete; - /* fall through */ + fallthrough; case ASPEED_I2C_MASTER_RX: if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) { dev_err(bus->dev, "master failed to RX\n"); diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 363d540a8345..66864f9cf7ac 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -816,79 +816,16 @@ error: return ret; } -static void at91_prepare_twi_recovery(struct i2c_adapter *adap) -{ - struct at91_twi_dev *dev = i2c_get_adapdata(adap); - - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_gpio); -} - -static void at91_unprepare_twi_recovery(struct i2c_adapter *adap) -{ - struct at91_twi_dev *dev = i2c_get_adapdata(adap); - - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); -} - static int at91_init_twi_recovery_gpio(struct platform_device *pdev, struct at91_twi_dev *dev) { struct i2c_bus_recovery_info *rinfo = &dev->rinfo; - dev->pinctrl = devm_pinctrl_get(&pdev->dev); - if (!dev->pinctrl || IS_ERR(dev->pinctrl)) { + rinfo->pinctrl = devm_pinctrl_get(&pdev->dev); + if (!rinfo->pinctrl || IS_ERR(rinfo->pinctrl)) { dev_info(dev->dev, "can't get pinctrl, bus recovery not supported\n"); - return PTR_ERR(dev->pinctrl); + return PTR_ERR(rinfo->pinctrl); } - - dev->pinctrl_pins_default = pinctrl_lookup_state(dev->pinctrl, - PINCTRL_STATE_DEFAULT); - dev->pinctrl_pins_gpio = pinctrl_lookup_state(dev->pinctrl, - "gpio"); - if (IS_ERR(dev->pinctrl_pins_default) || - IS_ERR(dev->pinctrl_pins_gpio)) { - dev_info(&pdev->dev, "pinctrl states incomplete for recovery\n"); - return -EINVAL; - } - - /* - * pins will be taken as GPIO, so we might as well inform pinctrl about - * this and move the state to GPIO - */ - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_gpio); - - rinfo->sda_gpiod = devm_gpiod_get(&pdev->dev, "sda", GPIOD_IN); - if (PTR_ERR(rinfo->sda_gpiod) == -EPROBE_DEFER) - return -EPROBE_DEFER; - - rinfo->scl_gpiod = devm_gpiod_get(&pdev->dev, "scl", - GPIOD_OUT_HIGH_OPEN_DRAIN); - if (PTR_ERR(rinfo->scl_gpiod) == -EPROBE_DEFER) - return -EPROBE_DEFER; - - if (IS_ERR(rinfo->sda_gpiod) || - IS_ERR(rinfo->scl_gpiod)) { - dev_info(&pdev->dev, "recovery information incomplete\n"); - if (!IS_ERR(rinfo->sda_gpiod)) { - gpiod_put(rinfo->sda_gpiod); - rinfo->sda_gpiod = NULL; - } - if (!IS_ERR(rinfo->scl_gpiod)) { - gpiod_put(rinfo->scl_gpiod); - rinfo->scl_gpiod = NULL; - } - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); - return -EINVAL; - } - - /* change the state of the pins back to their default state */ - pinctrl_select_state(dev->pinctrl, dev->pinctrl_pins_default); - - dev_info(&pdev->dev, "using scl, sda for recovery\n"); - - rinfo->prepare_recovery = at91_prepare_twi_recovery; - rinfo->unprepare_recovery = at91_unprepare_twi_recovery; - rinfo->recover_bus = i2c_generic_scl_recovery; dev->adapter.bus_recovery_info = rinfo; return 0; diff --git a/drivers/i2c/busses/i2c-at91.h b/drivers/i2c/busses/i2c-at91.h index 7e7b4955ca7f..eae673ae786c 100644 --- a/drivers/i2c/busses/i2c-at91.h +++ b/drivers/i2c/busses/i2c-at91.h @@ -157,9 +157,6 @@ struct at91_twi_dev { struct at91_twi_dma dma; bool slave_detected; struct i2c_bus_recovery_info rinfo; - struct pinctrl *pinctrl; - struct pinctrl_state *pinctrl_pins_default; - struct pinctrl_state *pinctrl_pins_gpio; #ifdef CONFIG_I2C_AT91_SLAVE_EXPERIMENTAL unsigned smr; struct i2c_client *slave; diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 8a3c98866fb7..688e92818821 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -1078,7 +1078,7 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) if (!iproc_i2c->slave) return -EINVAL; - iproc_i2c->slave = NULL; + disable_irq(iproc_i2c->irq); /* disable all slave interrupts */ tmp = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); @@ -1091,6 +1091,17 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) tmp &= ~BIT(S_CFG_EN_NIC_SMB_ADDR3_SHIFT); iproc_i2c_wr_reg(iproc_i2c, S_CFG_SMBUS_ADDR_OFFSET, tmp); + /* flush TX/RX FIFOs */ + tmp = (BIT(S_FIFO_RX_FLUSH_SHIFT) | BIT(S_FIFO_TX_FLUSH_SHIFT)); + iproc_i2c_wr_reg(iproc_i2c, S_FIFO_CTRL_OFFSET, tmp); + + /* clear all pending slave interrupts */ + iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, ISR_MASK_SLAVE); + + iproc_i2c->slave = NULL; + + enable_irq(iproc_i2c->irq); + return 0; } diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index d9b86fcc3825..5dc519516292 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -392,7 +392,7 @@ static const struct i2c_algorithm bcm2835_i2c_algo = { /* * The BCM2835 was reported to have problems with clock stretching: - * http://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html + * https://www.advamation.com/knowhow/raspberrypi/rpi-i2c-bug.html * https://www.raspberrypi.org/forums/viewtopic.php?p=146272 */ static const struct i2c_adapter_quirks bcm2835_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 8522134f9ea9..55c83a7a24f3 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -90,7 +90,7 @@ static int mfld_setup(struct pci_dev *pdev, struct dw_pci_controller *c) switch (pdev->device) { case 0x0817: dev->timings.bus_freq_hz = I2C_MAX_STANDARD_MODE_FREQ; - /* fall through */ + fallthrough; case 0x0818: case 0x0819: c->bus_num = pdev->device - 0x817 + 3; diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c index 332f00437479..f67639dc74b7 100644 --- a/drivers/i2c/busses/i2c-digicolor.c +++ b/drivers/i2c/busses/i2c-digicolor.c @@ -187,7 +187,7 @@ static irqreturn_t dc_i2c_irq(int irq, void *dev_id) break; } i2c->state = STATE_WRITE; - /* fall through */ + fallthrough; case STATE_WRITE: if (i2c->msgbuf_ptr < i2c->msg->len) dc_i2c_write_buf(i2c); diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index 73f139690e4e..843b31a0f752 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -846,11 +846,10 @@ static void pch_i2c_remove(struct pci_dev *pdev) kfree(adap_info); } -#ifdef CONFIG_PM -static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state) +static int __maybe_unused pch_i2c_suspend(struct device *dev) { - int ret; int i; + struct pci_dev *pdev = to_pci_dev(dev); struct adapter_info *adap_info = pci_get_drvdata(pdev); void __iomem *p = adap_info->pch_data[0].pch_base_address; @@ -872,34 +871,13 @@ static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state) ioread32(p + PCH_I2CSR), ioread32(p + PCH_I2CBUFSTA), ioread32(p + PCH_I2CESRSTA)); - ret = pci_save_state(pdev); - - if (ret) { - pch_pci_err(pdev, "pci_save_state\n"); - return ret; - } - - pci_enable_wake(pdev, PCI_D3hot, 0); - pci_disable_device(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - return 0; } -static int pch_i2c_resume(struct pci_dev *pdev) +static int __maybe_unused pch_i2c_resume(struct device *dev) { int i; - struct adapter_info *adap_info = pci_get_drvdata(pdev); - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - if (pci_enable_device(pdev) < 0) { - pch_pci_err(pdev, "pch_i2c_resume:pci_enable_device FAILED\n"); - return -EIO; - } - - pci_enable_wake(pdev, PCI_D3hot, 0); + struct adapter_info *adap_info = dev_get_drvdata(dev); for (i = 0; i < adap_info->ch_num; i++) pch_i2c_init(&adap_info->pch_data[i]); @@ -908,18 +886,15 @@ static int pch_i2c_resume(struct pci_dev *pdev) return 0; } -#else -#define pch_i2c_suspend NULL -#define pch_i2c_resume NULL -#endif + +static SIMPLE_DEV_PM_OPS(pch_i2c_pm_ops, pch_i2c_suspend, pch_i2c_resume); static struct pci_driver pch_pcidriver = { .name = KBUILD_MODNAME, .id_table = pch_pcidev_id, .probe = pch_i2c_probe, .remove = pch_i2c_remove, - .suspend = pch_i2c_suspend, - .resume = pch_i2c_resume + .driver.pm = &pch_i2c_pm_ops, }; module_pci_driver(pch_pcidriver); diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c index 1a319352e51b..a08554c1a570 100644 --- a/drivers/i2c/busses/i2c-emev2.c +++ b/drivers/i2c/busses/i2c-emev2.c @@ -442,6 +442,7 @@ static struct platform_driver em_i2c_driver = { module_platform_driver(em_i2c_driver); MODULE_DESCRIPTION("EMEV2 I2C bus driver"); -MODULE_AUTHOR("Ian Molton and Wolfram Sang <[email protected]>"); +MODULE_AUTHOR("Ian Molton"); +MODULE_AUTHOR("Wolfram Sang <[email protected]>"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, em_i2c_ids); diff --git a/drivers/i2c/busses/i2c-fsi.c b/drivers/i2c/busses/i2c-fsi.c index 977d6f524649..10332693edf0 100644 --- a/drivers/i2c/busses/i2c-fsi.c +++ b/drivers/i2c/busses/i2c-fsi.c @@ -703,7 +703,7 @@ static int fsi_i2c_probe(struct device *dev) for (port_no = 0; port_no < ports; port_no++) { np = fsi_i2c_find_port_of_node(dev->of_node, port_no); - if (np && !of_device_is_available(np)) + if (!of_device_is_available(np)) continue; port = kzalloc(sizeof(*port), GFP_KERNEL); diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index fea644921a76..e32ef3f01fe8 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -54,6 +54,7 @@ * Sunrise Point-H (PCH) 0xa123 32 hard yes yes yes * Sunrise Point-LP (PCH) 0x9d23 32 hard yes yes yes * DNV (SOC) 0x19df 32 hard yes yes yes + * Emmitsburg (PCH) 0x1bc9 32 hard yes yes yes * Broxton (SOC) 0x5ad4 32 hard yes yes yes * Lewisburg (PCH) 0xa1a3 32 hard yes yes yes * Lewisburg Supersku (PCH) 0xa223 32 hard yes yes yes @@ -67,6 +68,7 @@ * Comet Lake-H (PCH) 0x06a3 32 hard yes yes yes * Elkhart Lake (PCH) 0x4b23 32 hard yes yes yes * Tiger Lake-LP (PCH) 0xa0a3 32 hard yes yes yes + * Tiger Lake-H (PCH) 0x43a3 32 hard yes yes yes * Jasper Lake (SOC) 0x4da3 32 hard yes yes yes * Comet Lake-V (PCH) 0xa3a3 32 hard yes yes yes * @@ -207,6 +209,7 @@ #define PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS 0x0f12 #define PCI_DEVICE_ID_INTEL_CDF_SMBUS 0x18df #define PCI_DEVICE_ID_INTEL_DNV_SMBUS 0x19df +#define PCI_DEVICE_ID_INTEL_EBG_SMBUS 0x1bc9 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */ @@ -221,6 +224,7 @@ #define PCI_DEVICE_ID_INTEL_GEMINILAKE_SMBUS 0x31d4 #define PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS 0x34a3 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS 0x3b30 +#define PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS 0x43a3 #define PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS 0x4b23 #define PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS 0x4da3 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS 0x5ad4 @@ -1062,6 +1066,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EBG_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) }, @@ -1074,6 +1079,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_V_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS) }, { 0, } }; @@ -1748,7 +1754,9 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_COMETLAKE_H_SMBUS: case PCI_DEVICE_ID_INTEL_ELKHART_LAKE_SMBUS: case PCI_DEVICE_ID_INTEL_TIGERLAKE_LP_SMBUS: + case PCI_DEVICE_ID_INTEL_TIGERLAKE_H_SMBUS: case PCI_DEVICE_ID_INTEL_JASPER_LAKE_SMBUS: + case PCI_DEVICE_ID_INTEL_EBG_SMBUS: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; @@ -1765,19 +1773,19 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS1: case PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS2: priv->features |= FEATURE_IDF; - /* fall through */ + fallthrough; default: priv->features |= FEATURE_BLOCK_PROC; priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801DB_3: priv->features |= FEATURE_SMBUS_PEC; priv->features |= FEATURE_BLOCK_BUFFER; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801CA_3: priv->features |= FEATURE_HOST_NOTIFY; - /* fall through */ + fallthrough; case PCI_DEVICE_ID_INTEL_82801BA_2: case PCI_DEVICE_ID_INTEL_82801AB_3: case PCI_DEVICE_ID_INTEL_82801AA_3: @@ -1986,7 +1994,8 @@ static void __exit i2c_i801_exit(void) pci_unregister_driver(&i801_driver); } -MODULE_AUTHOR("Mark D. Studebaker <[email protected]>, Jean Delvare <[email protected]>"); +MODULE_AUTHOR("Mark D. Studebaker <[email protected]>"); +MODULE_AUTHOR("Jean Delvare <[email protected]>"); MODULE_DESCRIPTION("I801 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index deef69e56906..efc14041d45b 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -48,11 +48,13 @@ #define I2C_DMA_CON_TX 0x0000 #define I2C_DMA_CON_RX 0x0001 +#define I2C_DMA_ASYNC_MODE 0x0004 +#define I2C_DMA_SKIP_CONFIG 0x0010 +#define I2C_DMA_DIR_CHANGE 0x0200 #define I2C_DMA_START_EN 0x0001 #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 #define I2C_DMA_HARD_RST 0x0002 -#define I2C_DMA_4G_MODE 0x0001 #define MAX_SAMPLE_CNT_DIV 8 #define MAX_STEP_CNT_DIV 64 @@ -201,10 +203,11 @@ struct mtk_i2c_compatible { unsigned char dcm: 1; unsigned char auto_restart: 1; unsigned char aux_len_reg: 1; - unsigned char support_33bits: 1; unsigned char timing_adjust: 1; unsigned char dma_sync: 1; unsigned char ltiming_adjust: 1; + unsigned char apdma_sync: 1; + unsigned char max_dma_support; }; struct mtk_i2c_ac_timing { @@ -250,14 +253,13 @@ struct mtk_i2c { /** * struct i2c_spec_values: - * min_low_ns: min LOW period of the SCL clock - * min_su_sta_ns: min set-up time for a repeated START condition - * max_hd_dat_ns: max data hold time - * min_su_dat_ns: min data set-up time + * @min_low_ns: min LOW period of the SCL clock + * @min_su_sta_ns: min set-up time for a repeated START condition + * @max_hd_dat_ns: max data hold time + * @min_su_dat_ns: min data set-up time */ struct i2c_spec_values { unsigned int min_low_ns; - unsigned int min_high_ns; unsigned int min_su_sta_ns; unsigned int max_hd_dat_ns; unsigned int min_su_dat_ns; @@ -307,10 +309,11 @@ static const struct mtk_i2c_compatible mt2712_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 1, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, + .max_dma_support = 33, }; static const struct mtk_i2c_compatible mt6577_compat = { @@ -320,10 +323,11 @@ static const struct mtk_i2c_compatible mt6577_compat = { .dcm = 1, .auto_restart = 0, .aux_len_reg = 0, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt6589_compat = { @@ -333,10 +337,11 @@ static const struct mtk_i2c_compatible mt6589_compat = { .dcm = 0, .auto_restart = 0, .aux_len_reg = 0, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt7622_compat = { @@ -346,10 +351,11 @@ static const struct mtk_i2c_compatible mt7622_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 0, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, + .max_dma_support = 32, }; static const struct mtk_i2c_compatible mt8173_compat = { @@ -358,10 +364,11 @@ static const struct mtk_i2c_compatible mt8173_compat = { .dcm = 1, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 0, .dma_sync = 0, .ltiming_adjust = 0, + .apdma_sync = 0, + .max_dma_support = 33, }; static const struct mtk_i2c_compatible mt8183_compat = { @@ -371,10 +378,25 @@ static const struct mtk_i2c_compatible mt8183_compat = { .dcm = 0, .auto_restart = 1, .aux_len_reg = 1, - .support_33bits = 1, .timing_adjust = 1, .dma_sync = 1, .ltiming_adjust = 1, + .apdma_sync = 0, + .max_dma_support = 33, +}; + +static const struct mtk_i2c_compatible mt8192_compat = { + .quirks = &mt8183_i2c_quirks, + .regs = mt_i2c_regs_v2, + .pmic_i2c = 0, + .dcm = 0, + .auto_restart = 1, + .aux_len_reg = 1, + .timing_adjust = 1, + .dma_sync = 1, + .ltiming_adjust = 1, + .apdma_sync = 1, + .max_dma_support = 36, }; static const struct of_device_id mtk_i2c_of_match[] = { @@ -384,6 +406,7 @@ static const struct of_device_id mtk_i2c_of_match[] = { { .compatible = "mediatek,mt7622-i2c", .data = &mt7622_compat }, { .compatible = "mediatek,mt8173-i2c", .data = &mt8173_compat }, { .compatible = "mediatek,mt8183-i2c", .data = &mt8183_compat }, + { .compatible = "mediatek,mt8192-i2c", .data = &mt8192_compat }, {} }; MODULE_DEVICE_TABLE(of, mtk_i2c_of_match); @@ -786,11 +809,6 @@ static int mtk_i2c_set_speed(struct mtk_i2c *i2c, unsigned int parent_clk) return 0; } -static inline u32 mtk_i2c_set_4g_mode(dma_addr_t addr) -{ - return (addr & BIT_ULL(32)) ? I2C_DMA_4G_MODE : I2C_DMA_CLR_FLAG; -} - static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, int num, int left_num) { @@ -798,6 +816,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, u16 start_reg; u16 control_reg; u16 restart_flag = 0; + u16 dma_sync = 0; u32 reg_4g_mode; u8 *dma_rd_buf = NULL; u8 *dma_wr_buf = NULL; @@ -851,10 +870,16 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, mtk_i2c_writew(i2c, num, OFFSET_TRANSAC_LEN); } + if (i2c->dev_comp->apdma_sync) { + dma_sync = I2C_DMA_SKIP_CONFIG | I2C_DMA_ASYNC_MODE; + if (i2c->op == I2C_MASTER_WRRD) + dma_sync |= I2C_DMA_DIR_CHANGE; + } + /* Prepare buffer data to start transfer */ if (i2c->op == I2C_MASTER_RD) { writel(I2C_DMA_INT_FLAG_NONE, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CON_RX, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CON_RX | dma_sync, i2c->pdmabase + OFFSET_CON); dma_rd_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_rd_buf) @@ -868,8 +893,8 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(rpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(rpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_RX_4G_MODE); } @@ -877,7 +902,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, writel(msgs->len, i2c->pdmabase + OFFSET_RX_LEN); } else if (i2c->op == I2C_MASTER_WR) { writel(I2C_DMA_INT_FLAG_NONE, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CON_TX, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CON_TX | dma_sync, i2c->pdmabase + OFFSET_CON); dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_wr_buf) @@ -891,8 +916,8 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(wpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(wpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_TX_4G_MODE); } @@ -900,7 +925,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, writel(msgs->len, i2c->pdmabase + OFFSET_TX_LEN); } else { writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_INT_FLAG); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_CON); + writel(I2C_DMA_CLR_FLAG | dma_sync, i2c->pdmabase + OFFSET_CON); dma_wr_buf = i2c_get_dma_safe_msg_buf(msgs, 1); if (!dma_wr_buf) @@ -937,11 +962,11 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs, return -ENOMEM; } - if (i2c->dev_comp->support_33bits) { - reg_4g_mode = mtk_i2c_set_4g_mode(wpaddr); + if (i2c->dev_comp->max_dma_support > 32) { + reg_4g_mode = upper_32_bits(wpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_TX_4G_MODE); - reg_4g_mode = mtk_i2c_set_4g_mode(rpaddr); + reg_4g_mode = upper_32_bits(rpaddr); writel(reg_4g_mode, i2c->pdmabase + OFFSET_RX_4G_MODE); } @@ -1215,8 +1240,9 @@ static int mtk_i2c_probe(struct platform_device *pdev) return -EINVAL; } - if (i2c->dev_comp->support_33bits) { - ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(33)); + if (i2c->dev_comp->max_dma_support > 32) { + ret = dma_set_mask(&pdev->dev, + DMA_BIT_MASK(i2c->dev_comp->max_dma_support)); if (ret) { dev_err(&pdev->dev, "dma_set_mask return error.\n"); return ret; diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 829b8c98ae51..8d9d4ffdcd24 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -251,7 +251,7 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) MV64XXX_I2C_STATE_WAITING_FOR_ADDR_2_ACK; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_WR_ADDR_2_ACK: /* 0xd0 */ case MV64XXX_I2C_STATUS_MAST_WR_ACK: /* 0x28 */ if ((drv_data->bytes_left == 0) @@ -282,14 +282,14 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) MV64XXX_I2C_STATE_WAITING_FOR_ADDR_2_ACK; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_RD_ADDR_2_ACK: /* 0xe0 */ if (drv_data->bytes_left == 0) { drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP; drv_data->state = MV64XXX_I2C_STATE_IDLE; break; } - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_STATUS_MAST_RD_DATA_ACK: /* 0x50 */ if (status != MV64XXX_I2C_STATUS_MAST_RD_DATA_ACK) drv_data->action = MV64XXX_I2C_ACTION_CONTINUE; @@ -417,8 +417,7 @@ mv64xxx_i2c_do_action(struct mv64xxx_i2c_data *drv_data) "mv64xxx_i2c_do_action: Invalid action: %d\n", drv_data->action); drv_data->rc = -EIO; - - /* FALLTHRU */ + fallthrough; case MV64XXX_I2C_ACTION_SEND_STOP: drv_data->cntl_bits &= ~MV64XXX_I2C_REG_CONTROL_INTEN; writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_STOP, diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index e1e8d4ef9aa7..d4b1b0865f67 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -1122,6 +1122,7 @@ static void __exit nmk_i2c_exit(void) subsys_initcall(nmk_i2c_init); module_exit(nmk_i2c_exit); -MODULE_AUTHOR("Sachin Verma, Srinidhi KASAGAR"); +MODULE_AUTHOR("Sachin Verma"); +MODULE_AUTHOR("Srinidhi KASAGAR"); MODULE_DESCRIPTION("Nomadik/Ux500 I2C driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 69740a4ff1db..8c1b31ed0c42 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -1032,7 +1032,7 @@ static struct pci_driver piix4_driver = { module_pci_driver(piix4_driver); -MODULE_AUTHOR("Frodo Looijaard <[email protected]> and " - "Philip Edelbrock <[email protected]>"); +MODULE_AUTHOR("Frodo Looijaard <[email protected]>"); +MODULE_AUTHOR("Philip Edelbrock <[email protected]>"); MODULE_DESCRIPTION("PIIX4 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 5d7207c10f1d..8c4ec7f13f5a 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -781,7 +781,8 @@ static void __exit i2c_adap_pnx_exit(void) platform_driver_unregister(&i2c_pnx_driver); } -MODULE_AUTHOR("Vitaly Wool, Dennis Kovalev <[email protected]>"); +MODULE_AUTHOR("Vitaly Wool"); +MODULE_AUTHOR("Dennis Kovalev <[email protected]>"); MODULE_DESCRIPTION("I2C driver for Philips IP3204-based I2C busses"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:pnx-i2c"); diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 2e3e1bb75013..9e883474db8c 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -583,13 +583,14 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) rcar_i2c_write(priv, ICSIER, SDR | SSR | SAR); } - rcar_i2c_write(priv, ICSSR, ~SAR & 0xff); + /* Clear SSR, too, because of old STOPs to other clients than us */ + rcar_i2c_write(priv, ICSSR, ~(SAR | SSR) & 0xff); } /* master sent stop */ if (ssr_filtered & SSR) { i2c_slave_event(priv->slave, I2C_SLAVE_STOP, &value); - rcar_i2c_write(priv, ICSIER, SAR | SSR); + rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSSR, ~SSR & 0xff); } @@ -853,7 +854,7 @@ static int rcar_reg_slave(struct i2c_client *slave) priv->slave = slave; rcar_i2c_write(priv, ICSAR, slave->addr); rcar_i2c_write(priv, ICSSR, 0); - rcar_i2c_write(priv, ICSIER, SAR | SSR); + rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSCR, SIE | SDBS); return 0; @@ -865,12 +866,14 @@ static int rcar_unreg_slave(struct i2c_client *slave) WARN_ON(!priv->slave); - /* disable irqs and ensure none is running before clearing ptr */ + /* ensure no irq is running before clearing ptr */ + disable_irq(priv->irq); rcar_i2c_write(priv, ICSIER, 0); - rcar_i2c_write(priv, ICSCR, 0); + rcar_i2c_write(priv, ICSSR, 0); + enable_irq(priv->irq); + rcar_i2c_write(priv, ICSCR, SDBS); rcar_i2c_write(priv, ICSAR, 0); /* Gen2: must be 0 if not using slave */ - synchronize_irq(priv->irq); priv->slave = NULL; pm_runtime_put(rcar_i2c_priv_to_dev(priv)); diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 15324bfbc6cb..8e3cc85d1921 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/i2c.h> #include <linux/interrupt.h> +#include <linux/iopoll.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/platform_device.h> @@ -1040,8 +1041,21 @@ static int rk3x_i2c_setup(struct rk3x_i2c *i2c, struct i2c_msg *msgs, int num) return ret; } -static int rk3x_i2c_xfer(struct i2c_adapter *adap, - struct i2c_msg *msgs, int num) +static int rk3x_i2c_wait_xfer_poll(struct rk3x_i2c *i2c) +{ + ktime_t timeout = ktime_add_ms(ktime_get(), WAIT_TIMEOUT); + + while (READ_ONCE(i2c->busy) && + ktime_compare(ktime_get(), timeout) < 0) { + udelay(5); + rk3x_i2c_irq(0, i2c); + } + + return !i2c->busy; +} + +static int rk3x_i2c_xfer_common(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num, bool polling) { struct rk3x_i2c *i2c = (struct rk3x_i2c *)adap->algo_data; unsigned long timeout, flags; @@ -1075,8 +1089,12 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap, rk3x_i2c_start(i2c); - timeout = wait_event_timeout(i2c->wait, !i2c->busy, - msecs_to_jiffies(WAIT_TIMEOUT)); + if (!polling) { + timeout = wait_event_timeout(i2c->wait, !i2c->busy, + msecs_to_jiffies(WAIT_TIMEOUT)); + } else { + timeout = rk3x_i2c_wait_xfer_poll(i2c); + } spin_lock_irqsave(&i2c->lock, flags); @@ -1110,6 +1128,18 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap, return ret < 0 ? ret : num; } +static int rk3x_i2c_xfer(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + return rk3x_i2c_xfer_common(adap, msgs, num, false); +} + +static int rk3x_i2c_xfer_polling(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + return rk3x_i2c_xfer_common(adap, msgs, num, true); +} + static __maybe_unused int rk3x_i2c_resume(struct device *dev) { struct rk3x_i2c *i2c = dev_get_drvdata(dev); @@ -1126,6 +1156,7 @@ static u32 rk3x_i2c_func(struct i2c_adapter *adap) static const struct i2c_algorithm rk3x_i2c_algorithm = { .master_xfer = rk3x_i2c_xfer, + .master_xfer_atomic = rk3x_i2c_xfer_polling, .functionality = rk3x_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index 2cca1b21e26e..cab725559999 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -932,6 +932,7 @@ static void __exit sh_mobile_i2c_adap_exit(void) module_exit(sh_mobile_i2c_adap_exit); MODULE_DESCRIPTION("SuperH Mobile I2C Bus Controller driver"); -MODULE_AUTHOR("Magnus Damm and Wolfram Sang"); +MODULE_AUTHOR("Magnus Damm"); +MODULE_AUTHOR("Wolfram Sang"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:i2c-sh_mobile"); diff --git a/drivers/i2c/busses/i2c-sibyte.c b/drivers/i2c/busses/i2c-sibyte.c index 9dcea2ba7168..8f71f01cb169 100644 --- a/drivers/i2c/busses/i2c-sibyte.c +++ b/drivers/i2c/busses/i2c-sibyte.c @@ -180,6 +180,7 @@ static void __exit i2c_sibyte_exit(void) module_init(i2c_sibyte_init); module_exit(i2c_sibyte_exit); -MODULE_AUTHOR("Kip Walker (Broadcom Corp.), Steven J. Hill <[email protected]>"); +MODULE_AUTHOR("Kip Walker (Broadcom Corp.)"); +MODULE_AUTHOR("Steven J. Hill <[email protected]>"); MODULE_DESCRIPTION("SMBus adapter routines for SiByte boards"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-sirf.c b/drivers/i2c/busses/i2c-sirf.c index d7f72ec331e8..30db8fafe078 100644 --- a/drivers/i2c/busses/i2c-sirf.c +++ b/drivers/i2c/busses/i2c-sirf.c @@ -470,6 +470,6 @@ static struct platform_driver i2c_sirfsoc_driver = { module_platform_driver(i2c_sirfsoc_driver); MODULE_DESCRIPTION("SiRF SoC I2C master controller driver"); -MODULE_AUTHOR("Zhiwu Song <[email protected]>, " - "Xiangzhen Ye <[email protected]>"); +MODULE_AUTHOR("Zhiwu Song <[email protected]>"); +MODULE_AUTHOR("Xiangzhen Ye <[email protected]>"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index c9a3dba6a75d..31be1811d5e6 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -398,8 +398,7 @@ static irqreturn_t synquacer_i2c_isr(int irq, void *dev_id) if (i2c->state == STATE_READ) goto prepare_read; - - /* fall through */ + fallthrough; case STATE_WRITE: if (bsr & SYNQUACER_I2C_BSR_LRB) { diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 15772964a05f..00d3e4d7a01e 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -293,6 +293,8 @@ struct tegra_i2c_dev { bool is_curr_atomic_xfer; }; +static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev, bool clk_reinit); + static void dvc_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned long reg) { @@ -419,7 +421,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) dma_addr_t dma_phys; int err; - if (!i2c_dev->hw->has_apb_dma) + if (!i2c_dev->hw->has_apb_dma || i2c_dev->is_vi) return 0; if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) { @@ -655,32 +657,47 @@ static int __maybe_unused tegra_i2c_runtime_resume(struct device *dev) if (ret) return ret; - if (!i2c_dev->hw->has_single_clk_source) { - ret = clk_enable(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, - "Enabling fast clk failed, err %d\n", ret); - return ret; - } + ret = clk_enable(i2c_dev->fast_clk); + if (ret < 0) { + dev_err(i2c_dev->dev, + "Enabling fast clk failed, err %d\n", ret); + return ret; } - if (i2c_dev->slow_clk) { - ret = clk_enable(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to enable slow clock: %d\n", ret); - return ret; - } + ret = clk_enable(i2c_dev->slow_clk); + if (ret < 0) { + dev_err(dev, "failed to enable slow clock: %d\n", ret); + goto disable_fast_clk; } ret = clk_enable(i2c_dev->div_clk); if (ret < 0) { dev_err(i2c_dev->dev, "Enabling div clk failed, err %d\n", ret); - clk_disable(i2c_dev->fast_clk); - return ret; + goto disable_slow_clk; + } + + /* + * VI I2C device is attached to VE power domain which goes through + * power ON/OFF during PM runtime resume/suspend. So, controller + * should go through reset and need to re-initialize after power + * domain ON. + */ + if (i2c_dev->is_vi) { + ret = tegra_i2c_init(i2c_dev, true); + if (ret) + goto disable_div_clk; } return 0; + +disable_div_clk: + clk_disable(i2c_dev->div_clk); +disable_slow_clk: + clk_disable(i2c_dev->slow_clk); +disable_fast_clk: + clk_disable(i2c_dev->fast_clk); + return ret; } static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) @@ -688,12 +705,8 @@ static int __maybe_unused tegra_i2c_runtime_suspend(struct device *dev) struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev); clk_disable(i2c_dev->div_clk); - - if (i2c_dev->slow_clk) - clk_disable(i2c_dev->slow_clk); - - if (!i2c_dev->hw->has_single_clk_source) - clk_disable(i2c_dev->fast_clk); + clk_disable(i2c_dev->slow_clk); + clk_disable(i2c_dev->fast_clk); return pinctrl_pm_select_idle_state(i2c_dev->dev); } @@ -1716,20 +1729,16 @@ static int tegra_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, i2c_dev); - if (!i2c_dev->hw->has_single_clk_source) { - ret = clk_prepare(i2c_dev->fast_clk); - if (ret < 0) { - dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); - return ret; - } + ret = clk_prepare(i2c_dev->fast_clk); + if (ret < 0) { + dev_err(i2c_dev->dev, "Clock prepare failed %d\n", ret); + return ret; } - if (i2c_dev->slow_clk) { - ret = clk_prepare(i2c_dev->slow_clk); - if (ret < 0) { - dev_err(dev, "failed to prepare slow clock: %d\n", ret); - goto unprepare_fast_clk; - } + ret = clk_prepare(i2c_dev->slow_clk); + if (ret < 0) { + dev_err(dev, "failed to prepare slow clock: %d\n", ret); + goto unprepare_fast_clk; } if (i2c_dev->bus_clk_rate > I2C_MAX_FAST_MODE_FREQ && @@ -1750,7 +1759,15 @@ static int tegra_i2c_probe(struct platform_device *pdev) goto unprepare_slow_clk; } - pm_runtime_irq_safe(&pdev->dev); + /* + * VI I2C is in VE power domain which is not always on and not + * an IRQ safe. So, IRQ safe device can't be attached to a non-IRQ + * safe domain as it prevents powering off the PM domain. + * Also, VI I2C device don't need to use runtime IRQ safe as it will + * not be used for atomic transfers. + */ + if (!i2c_dev->is_vi) + pm_runtime_irq_safe(&pdev->dev); pm_runtime_enable(&pdev->dev); if (!pm_runtime_enabled(&pdev->dev)) { ret = tegra_i2c_runtime_resume(&pdev->dev); @@ -1835,12 +1852,10 @@ unprepare_div_clk: clk_unprepare(i2c_dev->div_clk); unprepare_slow_clk: - if (i2c_dev->is_vi) - clk_unprepare(i2c_dev->slow_clk); + clk_unprepare(i2c_dev->slow_clk); unprepare_fast_clk: - if (!i2c_dev->hw->has_single_clk_source) - clk_unprepare(i2c_dev->fast_clk); + clk_unprepare(i2c_dev->fast_clk); return ret; } @@ -1859,12 +1874,8 @@ static int tegra_i2c_remove(struct platform_device *pdev) tegra_i2c_runtime_suspend(&pdev->dev); clk_unprepare(i2c_dev->div_clk); - - if (i2c_dev->slow_clk) - clk_unprepare(i2c_dev->slow_clk); - - if (!i2c_dev->hw->has_single_clk_source) - clk_unprepare(i2c_dev->fast_clk); + clk_unprepare(i2c_dev->slow_clk); + clk_unprepare(i2c_dev->fast_clk); tegra_i2c_release_dma(i2c_dev); return 0; diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 4abc7771af06..970ccdcbb889 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -228,7 +228,7 @@ static s32 vt596_access(struct i2c_adapter *adap, u16 addr, goto exit_unsupported; if (read_write == I2C_SMBUS_READ) outb_p(data->block[0], SMBHSTDAT0); - /* Fall through */ + fallthrough; case I2C_SMBUS_BLOCK_DATA: outb_p(command, SMBHSTCMD); if (read_write == I2C_SMBUS_WRITE) { @@ -489,9 +489,9 @@ static void __exit i2c_vt596_exit(void) } } -MODULE_AUTHOR("Kyosti Malkki <[email protected]>, " - "Mark D. Studebaker <[email protected]> and " - "Jean Delvare <[email protected]>"); +MODULE_AUTHOR("Kyosti Malkki <[email protected]>"); +MODULE_AUTHOR("Mark D. Studebaker <[email protected]>"); +MODULE_AUTHOR("Jean Delvare <[email protected]>"); MODULE_DESCRIPTION("vt82c596 SMBus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index bd9afa383d12..7b42a18bd05c 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -151,7 +151,7 @@ static void scx200_acb_machine(struct scx200_acb_iface *iface, u8 status) case state_repeat_start: outb(inb(ACBCTL1) | ACBCTL1_START, ACBCTL1); - /* fallthrough */ + fallthrough; case state_quick: if (iface->address_byte & 1) { diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 26f03a14a478..34a9609f256d 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -32,6 +32,7 @@ #include <linux/of_device.h> #include <linux/of.h> #include <linux/of_irq.h> +#include <linux/pinctrl/consumer.h> #include <linux/pm_domain.h> #include <linux/pm_runtime.h> #include <linux/pm_wakeirq.h> @@ -181,6 +182,8 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap) if (bri->prepare_recovery) bri->prepare_recovery(adap); + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_gpio); /* * If we can set SDA, we will always create a STOP to ensure additional @@ -236,6 +239,8 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap) if (bri->unprepare_recovery) bri->unprepare_recovery(adap); + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_default); return ret; } @@ -251,13 +256,135 @@ int i2c_recover_bus(struct i2c_adapter *adap) } EXPORT_SYMBOL_GPL(i2c_recover_bus); -static void i2c_init_recovery(struct i2c_adapter *adap) +static void i2c_gpio_init_pinctrl_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + struct device *dev = &adap->dev; + struct pinctrl *p = bri->pinctrl; + + /* + * we can't change states without pinctrl, so remove the states if + * populated + */ + if (!p) { + bri->pins_default = NULL; + bri->pins_gpio = NULL; + return; + } + + if (!bri->pins_default) { + bri->pins_default = pinctrl_lookup_state(p, + PINCTRL_STATE_DEFAULT); + if (IS_ERR(bri->pins_default)) { + dev_dbg(dev, PINCTRL_STATE_DEFAULT " state not found for GPIO recovery\n"); + bri->pins_default = NULL; + } + } + if (!bri->pins_gpio) { + bri->pins_gpio = pinctrl_lookup_state(p, "gpio"); + if (IS_ERR(bri->pins_gpio)) + bri->pins_gpio = pinctrl_lookup_state(p, "recovery"); + + if (IS_ERR(bri->pins_gpio)) { + dev_dbg(dev, "no gpio or recovery state found for GPIO recovery\n"); + bri->pins_gpio = NULL; + } + } + + /* for pinctrl state changes, we need all the information */ + if (bri->pins_default && bri->pins_gpio) { + dev_info(dev, "using pinctrl states for GPIO recovery"); + } else { + bri->pinctrl = NULL; + bri->pins_default = NULL; + bri->pins_gpio = NULL; + } +} + +static int i2c_gpio_init_generic_recovery(struct i2c_adapter *adap) +{ + struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; + struct device *dev = &adap->dev; + struct gpio_desc *gpiod; + int ret = 0; + + /* + * don't touch the recovery information if the driver is not using + * generic SCL recovery + */ + if (bri->recover_bus && bri->recover_bus != i2c_generic_scl_recovery) + return 0; + + /* + * pins might be taken as GPIO, so we should inform pinctrl about + * this and move the state to GPIO + */ + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_gpio); + + /* + * if there is incomplete or no recovery information, see if generic + * GPIO recovery is available + */ + if (!bri->scl_gpiod) { + gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN); + if (PTR_ERR(gpiod) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto cleanup_pinctrl_state; + } + if (!IS_ERR(gpiod)) { + bri->scl_gpiod = gpiod; + bri->recover_bus = i2c_generic_scl_recovery; + dev_info(dev, "using generic GPIOs for recovery\n"); + } + } + + /* SDA GPIOD line is optional, so we care about DEFER only */ + if (!bri->sda_gpiod) { + /* + * We have SCL. Pull SCL low and wait a bit so that SDA glitches + * have no effect. + */ + gpiod_direction_output(bri->scl_gpiod, 0); + udelay(10); + gpiod = devm_gpiod_get(dev, "sda", GPIOD_IN); + + /* Wait a bit in case of a SDA glitch, and then release SCL. */ + udelay(10); + gpiod_direction_output(bri->scl_gpiod, 1); + + if (PTR_ERR(gpiod) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto cleanup_pinctrl_state; + } + if (!IS_ERR(gpiod)) + bri->sda_gpiod = gpiod; + } + +cleanup_pinctrl_state: + /* change the state of the pins back to their default state */ + if (bri->pinctrl) + pinctrl_select_state(bri->pinctrl, bri->pins_default); + + return ret; +} + +static int i2c_gpio_init_recovery(struct i2c_adapter *adap) +{ + i2c_gpio_init_pinctrl_recovery(adap); + return i2c_gpio_init_generic_recovery(adap); +} + +static int i2c_init_recovery(struct i2c_adapter *adap) { struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; char *err_str; if (!bri) - return; + return 0; + + if (i2c_gpio_init_recovery(adap) == -EPROBE_DEFER) + return -EPROBE_DEFER; if (!bri->recover_bus) { err_str = "no recover_bus() found"; @@ -273,10 +400,7 @@ static void i2c_init_recovery(struct i2c_adapter *adap) if (gpiod_get_direction(bri->sda_gpiod) == 0) bri->set_sda = set_sda_gpio_value; } - return; - } - - if (bri->recover_bus == i2c_generic_scl_recovery) { + } else if (bri->recover_bus == i2c_generic_scl_recovery) { /* Generic SCL recovery */ if (!bri->set_scl || !bri->get_scl) { err_str = "no {get|set}_scl() found"; @@ -288,10 +412,12 @@ static void i2c_init_recovery(struct i2c_adapter *adap) } } - return; + return 0; err: dev_err(&adap->dev, "Not using recovery: %s\n", err_str); adap->bus_recovery_info = NULL; + + return -EINVAL; } static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client) @@ -319,11 +445,9 @@ static int i2c_device_probe(struct device *dev) if (!client) return 0; - driver = to_i2c_driver(dev->driver); - client->irq = client->init_irq; - if (!client->irq && !driver->disable_i2c_core_irq_mapping) { + if (!client->irq) { int irq = -ENOENT; if (client->flags & I2C_CLIENT_HOST_NOTIFY) { @@ -349,6 +473,8 @@ static int i2c_device_probe(struct device *dev) client->irq = irq; } + driver = to_i2c_driver(dev->driver); + /* * An I2C ID table is not mandatory, if and only if, a suitable OF * or ACPI ID table is supplied for the probing device. @@ -1227,7 +1353,7 @@ static int i2c_setup_host_notify_irq_domain(struct i2c_adapter *adap) if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_HOST_NOTIFY)) return 0; - domain = irq_domain_create_linear(adap->dev.fwnode, + domain = irq_domain_create_linear(adap->dev.parent->fwnode, I2C_ADDR_7BITS_COUNT, &i2c_host_notify_irq_ops, adap); if (!domain) @@ -1318,12 +1444,16 @@ static int i2c_register_adapter(struct i2c_adapter *adap) if (res) goto out_reg; - dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name); - pm_runtime_no_callbacks(&adap->dev); pm_suspend_ignore_children(&adap->dev, true); pm_runtime_enable(&adap->dev); + res = i2c_init_recovery(adap); + if (res == -EPROBE_DEFER) + goto out_reg; + + dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name); + #ifdef CONFIG_I2C_COMPAT res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev, adap->dev.parent); @@ -1332,8 +1462,6 @@ static int i2c_register_adapter(struct i2c_adapter *adap) "Failed to create compatibility class link\n"); #endif - i2c_init_recovery(adap); - /* create pre-declared device nodes */ of_i2c_register_devices(adap); i2c_acpi_register_devices(adap); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index da020acc9bbd..6ceb11cc4be1 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -761,8 +761,8 @@ static void __exit i2c_dev_exit(void) unregister_chrdev_region(MKDEV(I2C_MAJOR, 0), I2C_MINORS); } -MODULE_AUTHOR("Frodo Looijaard <[email protected]> and " - "Simon G. Vogl <[email protected]>"); +MODULE_AUTHOR("Frodo Looijaard <[email protected]>"); +MODULE_AUTHOR("Simon G. Vogl <[email protected]>"); MODULE_DESCRIPTION("I2C /dev entries driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c index 593f2fd39d17..5c7ae421cacf 100644 --- a/drivers/i2c/i2c-slave-eeprom.c +++ b/drivers/i2c/i2c-slave-eeprom.c @@ -66,7 +66,7 @@ static int i2c_slave_eeprom_slave_cb(struct i2c_client *client, case I2C_SLAVE_READ_PROCESSED: /* The previous byte made it to the bus, get next one */ eeprom->buffer_idx++; - /* fallthrough */ + fallthrough; case I2C_SLAVE_READ_REQUESTED: spin_lock(&eeprom->buffer_lock); *val = eeprom->buffer[eeprom->buffer_idx & eeprom->address_mask]; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 5e32f61a2fe4..cc6b4befde7c 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -439,7 +439,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, * complex (and doesn't gain us much performance in most use * cases). */ - npages = get_user_pages_remote(owning_process, owning_mm, + npages = get_user_pages_remote(owning_mm, user_virt, gup_num_pages, flags, local_page_list, NULL, NULL); mmap_read_unlock(owning_mm); diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index e4b025c5637c..c259108ab6dd 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -495,7 +495,7 @@ static void do_fault(struct work_struct *work) if (access_error(vma, fault)) goto out; - ret = handle_mm_fault(vma, address, flags); + ret = handle_mm_fault(vma, address, flags, NULL); out: mmap_read_unlock(mm); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 442623ac4b47..95c3164a2302 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -962,7 +962,8 @@ static irqreturn_t prq_event_thread(int irq, void *d) goto invalid; ret = handle_mm_fault(vma, address, - req->wr_req ? FAULT_FLAG_WRITE : 0); + req->wr_req ? FAULT_FLAG_WRITE : 0, + NULL); if (ret & VM_FAULT_ERROR) goto invalid; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 9162856de1b1..e972138a14ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1728,7 +1728,7 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev) /* hardware completion status should be available by this time */ if (ret) { dev_err(&hdev->pdev->dev, - "could'nt get reset done status from h/w, timeout!\n"); + "couldn't get reset done status from h/w, timeout!\n"); return ret; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index c6adc776f3c8..16bda7381ba0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -334,19 +334,14 @@ void hinic_devlink_unregister(struct hinic_devlink_priv *priv) static int chip_fault_show(struct devlink_fmsg *fmsg, struct hinic_fault_event *event) { - char fault_level[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = { - "fatal", "reset", "flr", "general", "suggestion"}; - char level_str[FAULT_SHOW_STR_LEN + 1] = {0}; - u8 level; + const char * const level_str[FAULT_LEVEL_MAX + 1] = { + "fatal", "reset", "flr", "general", "suggestion", "Unknown"}; + u8 fault_level; int err; - level = event->event.chip.err_level; - if (level < FAULT_LEVEL_MAX) - strncpy(level_str, fault_level[level], strlen(fault_level[level])); - else - strncpy(level_str, "Unknown", strlen("Unknown")); - - if (level == FAULT_LEVEL_SERIOUS_FLR) { + fault_level = (event->event.chip.err_level < FAULT_LEVEL_MAX) ? + event->event.chip.err_level : FAULT_LEVEL_MAX; + if (fault_level == FAULT_LEVEL_SERIOUS_FLR) { err = devlink_fmsg_u32_pair_put(fmsg, "Function level err func_id", (u32)event->event.chip.func_id); if (err) @@ -361,7 +356,7 @@ static int chip_fault_show(struct devlink_fmsg *fmsg, if (err) return err; - err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str); + err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str[fault_level]); if (err) return err; @@ -381,18 +376,15 @@ static int chip_fault_show(struct devlink_fmsg *fmsg, static int fault_report_show(struct devlink_fmsg *fmsg, struct hinic_fault_event *event) { - char fault_type[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = { + const char * const type_str[FAULT_TYPE_MAX + 1] = { "chip", "ucode", "mem rd timeout", "mem wr timeout", - "reg rd timeout", "reg wr timeout", "phy fault"}; - char type_str[FAULT_SHOW_STR_LEN + 1] = {0}; + "reg rd timeout", "reg wr timeout", "phy fault", "Unknown"}; + u8 fault_type; int err; - if (event->type < FAULT_TYPE_MAX) - strncpy(type_str, fault_type[event->type], strlen(fault_type[event->type])); - else - strncpy(type_str, "Unknown", strlen("Unknown")); + fault_type = (event->type < FAULT_TYPE_MAX) ? event->type : FAULT_TYPE_MAX; - err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str); + err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str[fault_type]); if (err) return err; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index dc6e645f2689..701eb81e09a7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -504,8 +504,6 @@ enum hinic_fault_type { FAULT_TYPE_MAX, }; -#define FAULT_SHOW_STR_LEN 16 - enum hinic_fault_err_level { FAULT_LEVEL_FATAL, FAULT_LEVEL_SERIOUS_RESET, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 1944bf5264db..26988ad7ec97 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -412,7 +412,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->flags = flags; - new->q.info = devm_kzalloc(dev, sizeof(*new->q.info) * num_descs, + new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info), GFP_KERNEL); if (!new->q.info) { netdev_err(lif->netdev, "Cannot allocate queue info\n"); @@ -462,7 +462,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED; } - new->cq.info = devm_kzalloc(dev, sizeof(*new->cq.info) * num_descs, + new->cq.info = devm_kcalloc(dev, num_descs, sizeof(*new->cq.info), GFP_KERNEL); if (!new->cq.info) { netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 20b1b43a0e39..1166b98d8bb2 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -474,13 +474,24 @@ static int emac_clks_phase1_init(struct platform_device *pdev, ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]); if (ret) - return ret; + goto disable_clk_axi; ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); if (ret) - return ret; + goto disable_clk_cfg_ahb; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + if (ret) + goto disable_clk_cfg_ahb; - return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + return 0; + +disable_clk_cfg_ahb: + clk_disable_unprepare(adpt->clk[EMAC_CLK_CFG_AHB]); +disable_clk_axi: + clk_disable_unprepare(adpt->clk[EMAC_CLK_AXI]); + + return ret; } /* Enable clocks; needs emac_clks_phase1_init to be called before */ diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 36598d0542ed..206d70f9d95b 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -979,7 +979,8 @@ static int ef100_process_design_param(struct efx_nic *efx, * EFX_MIN_DMAQ_SIZE is divisible by GRANULARITY. * This is very unlikely to fail. */ - if (EFX_MIN_DMAQ_SIZE % reader->value) { + if (!reader->value || reader->value > EFX_MIN_DMAQ_SIZE || + EFX_MIN_DMAQ_SIZE % (u32)reader->value) { netif_err(efx, probe, efx->net_dev, "%s size granularity is %llu, can't guarantee safety\n", reader->type == ESE_EF100_DP_GZ_RXQ_SIZE_GRANULARITY ? "RXQ" : "TXQ", diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 02102c781a8c..bf3250e0e59c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -351,6 +351,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) plat_dat->has_gmac = true; plat_dat->bsp_priv = gmac; plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed; + plat_dat->multicast_filter_bins = 0; err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (err) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index efc6ec1b8027..fc8759f146c7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -164,6 +164,9 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF; } else if (dev->flags & IFF_ALLMULTI) { value = GMAC_FRAME_FILTER_PM; /* pass all multi */ + } else if (!netdev_mc_empty(dev) && (mcbitslog2 == 0)) { + /* Fall back to all multicast if we've no filter */ + value = GMAC_FRAME_FILTER_PM; } else if (!netdev_mc_empty(dev)) { struct netdev_hw_addr *ha; diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index a7610eb55f30..1901ba277413 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -208,13 +208,6 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable) MV_V2_TEMP_CTRL_MASK, val); } -static void mv3310_hwmon_disable(void *data) -{ - struct phy_device *phydev = data; - - mv3310_hwmon_config(phydev, false); -} - static int mv3310_hwmon_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -238,10 +231,6 @@ static int mv3310_hwmon_probe(struct phy_device *phydev) if (ret) return ret; - ret = devm_add_action_or_reset(dev, mv3310_hwmon_disable, phydev); - if (ret) - return ret; - priv->hwmon_dev = devm_hwmon_device_register_with_info(dev, priv->hwmon_name, phydev, &mv3310_hwmon_chip_info, NULL); @@ -426,6 +415,11 @@ static int mv3310_probe(struct phy_device *phydev) return phy_sfp_probe(phydev, &mv3310_sfp_ops); } +static void mv3310_remove(struct phy_device *phydev) +{ + mv3310_hwmon_config(phydev, false); +} + static int mv3310_suspend(struct phy_device *phydev) { return mv3310_power_down(phydev); @@ -784,6 +778,7 @@ static struct phy_driver mv3310_drivers[] = { .read_status = mv3310_read_status, .get_tunable = mv3310_get_tunable, .set_tunable = mv3310_set_tunable, + .remove = mv3310_remove, }, { .phy_id = MARVELL_PHY_ID_88E2110, @@ -798,6 +793,7 @@ static struct phy_driver mv3310_drivers[] = { .read_status = mv3310_read_status, .get_tunable = mv3310_get_tunable, .set_tunable = mv3310_set_tunable, + .remove = mv3310_remove, }, }; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1b9523595839..57d44648c8dd 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -615,7 +615,9 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, if (c45_ids) dev->c45_ids = *c45_ids; dev->irq = bus->irq[addr]; + dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); + device_initialize(&mdiodev->dev); dev->state = PHY_DOWN; @@ -649,10 +651,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, ret = phy_request_driver_module(dev, phy_id); } - if (!ret) { - device_initialize(&mdiodev->dev); - } else { - kfree(dev); + if (ret) { + put_device(&mdiodev->dev); dev = ERR_PTR(ret); } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7d39f998535d..2b02fefd094d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1504,7 +1504,7 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa) sa->sa_family = dev->type; - ret = eth_platform_get_mac_address(&dev->dev, sa->sa_data); + ret = eth_platform_get_mac_address(&tp->udev->dev, sa->sa_data); if (ret < 0) { if (tp->version == RTL_VER_01) { ret = pla_ocp_read(tp, PLA_IDR, 8, sa->sa_data); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ca395f9679d0..2818015324b8 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -886,7 +886,8 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, switch (protocol) { case IPPROTO_TCP: - ctx->l4_hdr_size = tcp_hdrlen(skb); + ctx->l4_hdr_size = skb->encapsulation ? inner_tcp_hdrlen(skb) : + tcp_hdrlen(skb); break; case IPPROTO_UDP: ctx->l4_hdr_size = sizeof(struct udphdr); diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index b2868433718f..1ea15f2123ed 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -157,6 +157,12 @@ static netdev_tx_t lapbeth_xmit(struct sk_buff *skb, if (!netif_running(dev)) goto drop; + /* There should be a pseudo header of 1 byte added by upper layers. + * Check to make sure it is there before reading it. + */ + if (skb->len < 1) + goto drop; + switch (skb->data[0]) { case X25_IFACE_DATA: break; @@ -305,6 +311,7 @@ static void lapbeth_setup(struct net_device *dev) dev->netdev_ops = &lapbeth_netdev_ops; dev->needs_free_netdev = true; dev->type = ARPHRD_X25; + dev->hard_header_len = 0; dev->mtu = 1000; dev->addr_len = 0; } @@ -331,7 +338,8 @@ static int lapbeth_new_device(struct net_device *dev) * then this driver prepends a length field of 2 bytes, * then the underlying Ethernet device prepends its own header. */ - ndev->hard_header_len = -1 + 3 + 2 + dev->hard_header_len; + ndev->needed_headroom = -1 + 3 + 2 + dev->hard_header_len + + dev->needed_headroom; lapbeth = netdev_priv(ndev); lapbeth->axdev = ndev; diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 84640a0c13f3..de7984463595 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -307,6 +307,14 @@ static netdev_tx_t x25_asy_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } + /* There should be a pseudo header of 1 byte added by upper layers. + * Check to make sure it is there before reading it. + */ + if (skb->len < 1) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + switch (skb->data[0]) { case X25_IFACE_DATA: break; @@ -752,6 +760,12 @@ static void x25_asy_setup(struct net_device *dev) dev->type = ARPHRD_X25; dev->tx_queue_len = 10; + /* When transmitting data: + * first this driver removes a pseudo header of 1 byte, + * then the lapb module prepends an LAPB header of at most 3 bytes. + */ + dev->needed_headroom = 3 - 1; + /* New-style flags. */ dev->flags = IFF_NOARP; } diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 5368452eb5a6..d4314fba0269 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1270,7 +1270,7 @@ sba_ioc_init_pluto(struct parisc_device *sba, struct ioc *ioc, int ioc_num) ** (one that doesn't overlap memory or LMMIO space) in the ** IBASE and IMASK registers. */ - ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE); + ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1fffffULL; iova_space_size = ~(READ_REG(ioc->ioc_hpa + IOC_IMASK) & 0xFFFFFFFFUL) + 1; if ((ioc->ibase < 0xfed00000UL) && ((ioc->ibase + iova_space_size) > 0xfee00000UL)) { diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 90bc7969b199..8cf8c1be2666 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -186,7 +186,9 @@ #define MLXPLAT_CPLD_WD_RESET_ACT_MASK GENMASK(7, 1) #define MLXPLAT_CPLD_WD_FAN_ACT_MASK (GENMASK(7, 0) & ~BIT(4)) #define MLXPLAT_CPLD_WD_COUNT_ACT_MASK (GENMASK(7, 0) & ~BIT(7)) +#define MLXPLAT_CPLD_WD_CPBLTY_MASK (GENMASK(7, 0) & ~BIT(6)) #define MLXPLAT_CPLD_WD_DFLT_TIMEOUT 30 +#define MLXPLAT_CPLD_WD3_DFLT_TIMEOUT 600 #define MLXPLAT_CPLD_WD_MAX_DEVS 2 /* mlxplat_priv - platform private data @@ -2084,6 +2086,84 @@ static struct mlxreg_core_platform_data mlxplat_mlxcpld_wd_set_type2[] = { }, }; +/* Watchdog type3: hardware implementation version 3 + * Can be on all systems. It's differentiated by WD capability bit. + * Old systems (MSN2700, MSN2410, MSN2740, MSN2100 and MSN2140) + * still have only one main watchdog. + */ +static struct mlxreg_core_data mlxplat_mlxcpld_wd_main_regs_type3[] = { + { + .label = "action", + .reg = MLXPLAT_CPLD_LPC_REG_WD2_ACT_OFFSET, + .mask = MLXPLAT_CPLD_WD_RESET_ACT_MASK, + .bit = 0, + }, + { + .label = "timeout", + .reg = MLXPLAT_CPLD_LPC_REG_WD2_TMR_OFFSET, + .mask = MLXPLAT_CPLD_WD_TYPE2_TO_MASK, + .health_cntr = MLXPLAT_CPLD_WD3_DFLT_TIMEOUT, + }, + { + .label = "timeleft", + .reg = MLXPLAT_CPLD_LPC_REG_WD2_TMR_OFFSET, + .mask = MLXPLAT_CPLD_WD_TYPE2_TO_MASK, + }, + { + .label = "ping", + .reg = MLXPLAT_CPLD_LPC_REG_WD2_ACT_OFFSET, + .mask = MLXPLAT_CPLD_WD_RESET_ACT_MASK, + .bit = 0, + }, + { + .label = "reset", + .reg = MLXPLAT_CPLD_LPC_REG_RESET_CAUSE_OFFSET, + .mask = GENMASK(7, 0) & ~BIT(6), + .bit = 6, + }, +}; + +static struct mlxreg_core_data mlxplat_mlxcpld_wd_aux_regs_type3[] = { + { + .label = "action", + .reg = MLXPLAT_CPLD_LPC_REG_WD3_ACT_OFFSET, + .mask = MLXPLAT_CPLD_WD_FAN_ACT_MASK, + .bit = 4, + }, + { + .label = "timeout", + .reg = MLXPLAT_CPLD_LPC_REG_WD3_TMR_OFFSET, + .mask = MLXPLAT_CPLD_WD_TYPE2_TO_MASK, + .health_cntr = MLXPLAT_CPLD_WD3_DFLT_TIMEOUT, + }, + { + .label = "timeleft", + .reg = MLXPLAT_CPLD_LPC_REG_WD3_TMR_OFFSET, + .mask = MLXPLAT_CPLD_WD_TYPE2_TO_MASK, + }, + { + .label = "ping", + .reg = MLXPLAT_CPLD_LPC_REG_WD3_ACT_OFFSET, + .mask = MLXPLAT_CPLD_WD_FAN_ACT_MASK, + .bit = 4, + }, +}; + +static struct mlxreg_core_platform_data mlxplat_mlxcpld_wd_set_type3[] = { + { + .data = mlxplat_mlxcpld_wd_main_regs_type3, + .counter = ARRAY_SIZE(mlxplat_mlxcpld_wd_main_regs_type3), + .version = MLX_WDT_TYPE3, + .identity = "mlx-wdt-main", + }, + { + .data = mlxplat_mlxcpld_wd_aux_regs_type3, + .counter = ARRAY_SIZE(mlxplat_mlxcpld_wd_aux_regs_type3), + .version = MLX_WDT_TYPE3, + .identity = "mlx-wdt-aux", + }, +}; + static bool mlxplat_mlxcpld_writeable_reg(struct device *dev, unsigned int reg) { switch (reg) { @@ -2114,8 +2194,10 @@ static bool mlxplat_mlxcpld_writeable_reg(struct device *dev, unsigned int reg) case MLXPLAT_CPLD_LPC_REG_WD1_TMR_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD1_ACT_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD2_TMR_OFFSET: + case MLXPLAT_CPLD_LPC_REG_WD2_TLEFT_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD2_ACT_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD3_TMR_OFFSET: + case MLXPLAT_CPLD_LPC_REG_WD3_TLEFT_OFFSET: case MLXPLAT_CPLD_LPC_REG_WD3_ACT_OFFSET: case MLXPLAT_CPLD_LPC_REG_PWM1_OFFSET: case MLXPLAT_CPLD_LPC_REG_PWM_CONTROL_OFFSET: @@ -2742,6 +2824,27 @@ static int mlxplat_mlxcpld_verify_bus_topology(int *nr) return 0; } +static int mlxplat_mlxcpld_check_wd_capability(void *regmap) +{ + u32 regval; + int i, rc; + + rc = regmap_read(regmap, MLXPLAT_CPLD_LPC_REG_PSU_I2C_CAP_OFFSET, + ®val); + if (rc) + return rc; + + if (!(regval & ~MLXPLAT_CPLD_WD_CPBLTY_MASK)) { + for (i = 0; i < ARRAY_SIZE(mlxplat_mlxcpld_wd_set_type3); i++) { + if (mlxplat_wd_data[i]) + mlxplat_wd_data[i] = + &mlxplat_mlxcpld_wd_set_type3[i]; + } + } + + return 0; +} + static int __init mlxplat_init(void) { struct mlxplat_priv *priv; @@ -2874,6 +2977,9 @@ static int __init mlxplat_init(void) } /* Add WD drivers. */ + err = mlxplat_mlxcpld_check_wd_capability(priv->regmap); + if (err) + goto fail_platform_wd_register; for (j = 0; j < MLXPLAT_CPLD_WD_MAX_DEVS; j++) { if (mlxplat_wd_data[j]) { mlxplat_wd_data[j]->regmap = priv->regmap; diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 451608e960a1..c07ceec3c6d4 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -981,7 +981,7 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) if (unlikely(copy_from_user(transfer, (void __user *)(uintptr_t)transaction.block, - transaction.count * sizeof(*transfer)))) { + array_size(sizeof(*transfer), transaction.count)))) { ret = -EFAULT; goto out_free; } @@ -994,7 +994,7 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) if (unlikely(copy_to_user((void __user *)(uintptr_t)transaction.block, transfer, - transaction.count * sizeof(*transfer)))) + array_size(sizeof(*transfer), transaction.count)))) ret = -EFAULT; out_free: @@ -1710,8 +1710,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, if (rval & RIO_PEF_SWITCH) { rio_mport_read_config_32(mport, destid, hopcount, RIO_SWP_INFO_CAR, &swpinfo); - size += (RIO_GET_TOTAL_PORTS(swpinfo) * - sizeof(rswitch->nextdev[0])) + sizeof(*rswitch); + size += struct_size(rswitch, nextdev, RIO_GET_TOTAL_PORTS(swpinfo)); } rdev = kzalloc(size, GFP_KERNEL); diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index eb8ed28533f8..19b0c33f4a62 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -330,7 +330,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, size_t size; u32 swpinfo = 0; - size = sizeof(struct rio_dev); + size = sizeof(*rdev); if (rio_mport_read_config_32(port, destid, hopcount, RIO_PEF_CAR, &result)) return NULL; @@ -338,10 +338,8 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, if (result & (RIO_PEF_SWITCH | RIO_PEF_MULTIPORT)) { rio_mport_read_config_32(port, destid, hopcount, RIO_SWP_INFO_CAR, &swpinfo); - if (result & RIO_PEF_SWITCH) { - size += (RIO_GET_TOTAL_PORTS(swpinfo) * - sizeof(rswitch->nextdev[0])) + sizeof(*rswitch); - } + if (result & RIO_PEF_SWITCH) + size += struct_size(rswitch, nextdev, RIO_GET_TOTAL_PORTS(swpinfo)); } rdev = kzalloc(size, GFP_KERNEL); diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index f3b8e6dcd879..48c536acd777 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -281,7 +281,8 @@ config RTC_DRV_DS1374 config RTC_DRV_DS1374_WDT bool "Dallas/Maxim DS1374 watchdog timer" - depends on RTC_DRV_DS1374 + depends on RTC_DRV_DS1374 && WATCHDOG + select WATCHDOG_CORE help If you say Y here you will get support for the watchdog timer in the Dallas Semiconductor DS1374 diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c index 811fe2005488..2370ac0cdb5f 100644 --- a/drivers/rtc/rtc-ab-b5ze-s3.c +++ b/drivers/rtc/rtc-ab-b5ze-s3.c @@ -7,7 +7,7 @@ * * Detailed datasheet of the chip is available here: * - * http://www.abracon.com/realtimeclock/AB-RTCMC-32.768kHz-B5ZE-S3-Application-Manual.pdf + * https://www.abracon.com/realtimeclock/AB-RTCMC-32.768kHz-B5ZE-S3-Application-Manual.pdf * * This work is based on ISL12057 driver (drivers/rtc/rtc-isl12057.c). * diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c index 4a63f0cd2321..933e4237237d 100644 --- a/drivers/rtc/rtc-bq32k.c +++ b/drivers/rtc/rtc-bq32k.c @@ -6,7 +6,7 @@ * Copyright (C) 2014 Pavel Machek <[email protected]> * * You can get hardware description at - * http://www.ti.com/lit/ds/symlink/bq32000.pdf + * https://www.ti.com/lit/ds/symlink/bq32000.pdf */ #include <linux/module.h> diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index a603f1f21125..800667d73a6f 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -261,7 +261,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev) return PTR_ERR(rtc->rtc_dev); rtc->rtc_dev->ops = &cpcap_rtc_ops; - rtc->rtc_dev->range_max = (1 << 14) * SECS_PER_DAY - 1; + rtc->rtc_dev->range_max = (timeu64_t) (DAY_MASK + 1) * SECS_PER_DAY - 1; err = cpcap_get_vendor(dev, rtc->regmap, &rtc->vendor); if (err) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 49702942bb08..54c85cdd019d 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -1668,6 +1668,8 @@ static const struct watchdog_ops ds1388_wdt_ops = { static void ds1307_wdt_register(struct ds1307 *ds1307) { struct watchdog_device *wdt; + int err; + int val; if (ds1307->type != ds_1388) return; @@ -1676,6 +1678,10 @@ static void ds1307_wdt_register(struct ds1307 *ds1307) if (!wdt) return; + err = regmap_read(ds1307->regmap, DS1388_REG_FLAG, &val); + if (!err && val & DS1388_BIT_WF) + wdt->bootstatus = WDIOF_CARDRESET; + wdt->info = &ds1388_wdt_info; wdt->ops = &ds1388_wdt_ops; wdt->timeout = 99; diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index 9c51a12cf70f..177d870bda0d 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -46,6 +46,7 @@ #define DS1374_REG_WDALM2 0x06 #define DS1374_REG_CR 0x07 /* Control */ #define DS1374_REG_CR_AIE 0x01 /* Alarm Int. Enable */ +#define DS1374_REG_CR_WDSTR 0x08 /* 1=INT, 0=RST */ #define DS1374_REG_CR_WDALM 0x20 /* 1=Watchdog, 0=Alarm */ #define DS1374_REG_CR_WACE 0x40 /* WD/Alarm counter enable */ #define DS1374_REG_SR 0x08 /* Status */ @@ -71,7 +72,9 @@ struct ds1374 { struct i2c_client *client; struct rtc_device *rtc; struct work_struct work; - +#ifdef CONFIG_RTC_DRV_DS1374_WDT + struct watchdog_device wdt; +#endif /* The mutex protects alarm operations, and prevents a race * between the enable_irq() in the workqueue and the free_irq() * in the remove function. @@ -369,238 +372,96 @@ static const struct rtc_class_ops ds1374_rtc_ops = { * ***************************************************************************** */ -static struct i2c_client *save_client; /* Default margin */ -#define WD_TIMO 131762 +#define TIMER_MARGIN_DEFAULT 32 +#define TIMER_MARGIN_MIN 1 +#define TIMER_MARGIN_MAX 4095 /* 24-bit value */ -#define DRV_NAME "DS1374 Watchdog" - -static int wdt_margin = WD_TIMO; -static unsigned long wdt_is_open; +static int wdt_margin; module_param(wdt_margin, int, 0); MODULE_PARM_DESC(wdt_margin, "Watchdog timeout in seconds (default 32s)"); +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default =" + __MODULE_STRING(WATCHDOG_NOWAYOUT)")"); + static const struct watchdog_info ds1374_wdt_info = { - .identity = "DS1374 WTD", + .identity = "DS1374 Watchdog", .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, }; -static int ds1374_wdt_settimeout(unsigned int timeout) +static int ds1374_wdt_settimeout(struct watchdog_device *wdt, unsigned int timeout) { - int ret = -ENOIOCTLCMD; - int cr; + struct ds1374 *ds1374 = watchdog_get_drvdata(wdt); + struct i2c_client *client = ds1374->client; + int ret, cr; - ret = cr = i2c_smbus_read_byte_data(save_client, DS1374_REG_CR); - if (ret < 0) - goto out; + wdt->timeout = timeout; + + cr = i2c_smbus_read_byte_data(client, DS1374_REG_CR); + if (cr < 0) + return cr; /* Disable any existing watchdog/alarm before setting the new one */ cr &= ~DS1374_REG_CR_WACE; - ret = i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr); + ret = i2c_smbus_write_byte_data(client, DS1374_REG_CR, cr); if (ret < 0) - goto out; + return ret; /* Set new watchdog time */ - ret = ds1374_write_rtc(save_client, timeout, DS1374_REG_WDALM0, 3); - if (ret) { - pr_info("couldn't set new watchdog time\n"); - goto out; - } + timeout = timeout * 4096; + ret = ds1374_write_rtc(client, timeout, DS1374_REG_WDALM0, 3); + if (ret) + return ret; /* Enable watchdog timer */ cr |= DS1374_REG_CR_WACE | DS1374_REG_CR_WDALM; + cr &= ~DS1374_REG_CR_WDSTR;/* for RST PIN */ cr &= ~DS1374_REG_CR_AIE; - ret = i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr); + ret = i2c_smbus_write_byte_data(client, DS1374_REG_CR, cr); if (ret < 0) - goto out; + return ret; return 0; -out: - return ret; } - /* * Reload the watchdog timer. (ie, pat the watchdog) */ -static void ds1374_wdt_ping(void) +static int ds1374_wdt_start(struct watchdog_device *wdt) { + struct ds1374 *ds1374 = watchdog_get_drvdata(wdt); u32 val; - int ret = 0; - ret = ds1374_read_rtc(save_client, &val, DS1374_REG_WDALM0, 3); - if (ret) - pr_info("WD TICK FAIL!!!!!!!!!! %i\n", ret); + return ds1374_read_rtc(ds1374->client, &val, DS1374_REG_WDALM0, 3); } -static void ds1374_wdt_disable(void) +static int ds1374_wdt_stop(struct watchdog_device *wdt) { + struct ds1374 *ds1374 = watchdog_get_drvdata(wdt); + struct i2c_client *client = ds1374->client; int cr; - cr = i2c_smbus_read_byte_data(save_client, DS1374_REG_CR); + cr = i2c_smbus_read_byte_data(client, DS1374_REG_CR); + if (cr < 0) + return cr; + /* Disable watchdog timer */ cr &= ~DS1374_REG_CR_WACE; - i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr); -} - -/* - * Watchdog device is opened, and watchdog starts running. - */ -static int ds1374_wdt_open(struct inode *inode, struct file *file) -{ - struct ds1374 *ds1374 = i2c_get_clientdata(save_client); - - if (MINOR(inode->i_rdev) == WATCHDOG_MINOR) { - mutex_lock(&ds1374->mutex); - if (test_and_set_bit(0, &wdt_is_open)) { - mutex_unlock(&ds1374->mutex); - return -EBUSY; - } - /* - * Activate - */ - wdt_is_open = 1; - mutex_unlock(&ds1374->mutex); - return stream_open(inode, file); - } - return -ENODEV; -} - -/* - * Close the watchdog device. - */ -static int ds1374_wdt_release(struct inode *inode, struct file *file) -{ - if (MINOR(inode->i_rdev) == WATCHDOG_MINOR) - clear_bit(0, &wdt_is_open); - - return 0; -} - -/* - * Pat the watchdog whenever device is written to. - */ -static ssize_t ds1374_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) -{ - if (len) { - ds1374_wdt_ping(); - return 1; - } - return 0; -} - -static ssize_t ds1374_wdt_read(struct file *file, char __user *data, - size_t len, loff_t *ppos) -{ - return 0; -} - -/* - * Handle commands from user-space. - */ -static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int new_margin, options; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user((struct watchdog_info __user *)arg, - &ds1374_wdt_info, sizeof(ds1374_wdt_info)) ? -EFAULT : 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, (int __user *)arg); - case WDIOC_KEEPALIVE: - ds1374_wdt_ping(); - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(new_margin, (int __user *)arg)) - return -EFAULT; - - /* the hardware's tick rate is 4096 Hz, so - * the counter value needs to be scaled accordingly - */ - new_margin <<= 12; - if (new_margin < 1 || new_margin > 16777216) - return -EINVAL; - - wdt_margin = new_margin; - ds1374_wdt_settimeout(new_margin); - ds1374_wdt_ping(); - /* fallthrough */ - case WDIOC_GETTIMEOUT: - /* when returning ... inverse is true */ - return put_user((wdt_margin >> 12), (int __user *)arg); - case WDIOC_SETOPTIONS: - if (copy_from_user(&options, (int __user *)arg, sizeof(int))) - return -EFAULT; - - if (options & WDIOS_DISABLECARD) { - pr_info("disable watchdog\n"); - ds1374_wdt_disable(); - return 0; - } - - if (options & WDIOS_ENABLECARD) { - pr_info("enable watchdog\n"); - ds1374_wdt_settimeout(wdt_margin); - ds1374_wdt_ping(); - return 0; - } - return -EINVAL; - } - return -ENOTTY; + return i2c_smbus_write_byte_data(client, DS1374_REG_CR, cr); } -static long ds1374_wdt_unlocked_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - int ret; - struct ds1374 *ds1374 = i2c_get_clientdata(save_client); - - mutex_lock(&ds1374->mutex); - ret = ds1374_wdt_ioctl(file, cmd, arg); - mutex_unlock(&ds1374->mutex); - - return ret; -} - -static int ds1374_wdt_notify_sys(struct notifier_block *this, - unsigned long code, void *unused) -{ - if (code == SYS_DOWN || code == SYS_HALT) - /* Disable Watchdog */ - ds1374_wdt_disable(); - return NOTIFY_DONE; -} - -static const struct file_operations ds1374_wdt_fops = { - .owner = THIS_MODULE, - .read = ds1374_wdt_read, - .unlocked_ioctl = ds1374_wdt_unlocked_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .write = ds1374_wdt_write, - .open = ds1374_wdt_open, - .release = ds1374_wdt_release, - .llseek = no_llseek, -}; - -static struct miscdevice ds1374_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &ds1374_wdt_fops, -}; - -static struct notifier_block ds1374_wdt_notifier = { - .notifier_call = ds1374_wdt_notify_sys, +static const struct watchdog_ops ds1374_wdt_ops = { + .owner = THIS_MODULE, + .start = ds1374_wdt_start, + .stop = ds1374_wdt_stop, + .set_timeout = ds1374_wdt_settimeout, }; - #endif /*CONFIG_RTC_DRV_DS1374_WDT*/ /* ***************************************************************************** @@ -652,16 +513,22 @@ static int ds1374_probe(struct i2c_client *client, return ret; #ifdef CONFIG_RTC_DRV_DS1374_WDT - save_client = client; - ret = misc_register(&ds1374_miscdev); + ds1374->wdt.info = &ds1374_wdt_info; + ds1374->wdt.ops = &ds1374_wdt_ops; + ds1374->wdt.timeout = TIMER_MARGIN_DEFAULT; + ds1374->wdt.min_timeout = TIMER_MARGIN_MIN; + ds1374->wdt.max_timeout = TIMER_MARGIN_MAX; + + watchdog_init_timeout(&ds1374->wdt, wdt_margin, &client->dev); + watchdog_set_nowayout(&ds1374->wdt, nowayout); + watchdog_stop_on_reboot(&ds1374->wdt); + watchdog_stop_on_unregister(&ds1374->wdt); + watchdog_set_drvdata(&ds1374->wdt, ds1374); + ds1374_wdt_settimeout(&ds1374->wdt, ds1374->wdt.timeout); + + ret = devm_watchdog_register_device(&client->dev, &ds1374->wdt); if (ret) return ret; - ret = register_reboot_notifier(&ds1374_wdt_notifier); - if (ret) { - misc_deregister(&ds1374_miscdev); - return ret; - } - ds1374_wdt_settimeout(131072); #endif return 0; @@ -670,11 +537,6 @@ static int ds1374_probe(struct i2c_client *client, static int ds1374_remove(struct i2c_client *client) { struct ds1374 *ds1374 = i2c_get_clientdata(client); -#ifdef CONFIG_RTC_DRV_DS1374_WDT - misc_deregister(&ds1374_miscdev); - ds1374_miscdev.parent = NULL; - unregister_reboot_notifier(&ds1374_wdt_notifier); -#endif if (client->irq > 0) { mutex_lock(&ds1374->mutex); diff --git a/drivers/rtc/rtc-goldfish.c b/drivers/rtc/rtc-goldfish.c index 27797157fcb3..6349d2cd3680 100644 --- a/drivers/rtc/rtc-goldfish.c +++ b/drivers/rtc/rtc-goldfish.c @@ -73,6 +73,7 @@ static int goldfish_rtc_set_alarm(struct device *dev, rtc_alarm64 = rtc_tm_to_time64(&alrm->time) * NSEC_PER_SEC; writel((rtc_alarm64 >> 32), base + TIMER_ALARM_HIGH); writel(rtc_alarm64, base + TIMER_ALARM_LOW); + writel(1, base + TIMER_IRQ_ENABLED); } else { /* * if this function was called with enabled=0 diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index f21dc6b16d88..8d141d8a5490 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -95,7 +95,7 @@ /** * struct imxdi_dev - private imxdi rtc data - * @pdev: pionter to platform dev + * @pdev: pointer to platform dev * @rtc: pointer to rtc struct * @ioaddr: IO registers pointer * @clk: input reference clock @@ -350,7 +350,7 @@ static int di_handle_invalid_and_failure_state(struct imxdi_dev *imxdi, u32 dsr) * the tamper register is locked. We cannot disable the * tamper detection. The TDCHL can only be reset by a * DRYICE POR, but we cannot force a DRYICE POR in - * softwere because we are still in "FAILURE STATE". + * software because we are still in "FAILURE STATE". * We need a DRYICE POR via battery power cycling.... */ /* diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 03ebcf1c0f3d..d51cc12114cb 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -805,17 +805,36 @@ static int max77686_rtc_remove(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int max77686_rtc_suspend(struct device *dev) { + struct max77686_rtc_info *info = dev_get_drvdata(dev); + int ret = 0; + if (device_may_wakeup(dev)) { struct max77686_rtc_info *info = dev_get_drvdata(dev); - return enable_irq_wake(info->virq); + ret = enable_irq_wake(info->virq); } - return 0; + /* + * If the main IRQ (not virtual) is the parent IRQ, then it must be + * disabled during suspend because if it happens while suspended it + * will be handled before resuming I2C. + * + * Since Main IRQ is shared, all its users should disable it to be sure + * it won't fire while one of them is still suspended. + */ + if (!info->drv_data->rtc_irq_from_platform) + disable_irq(info->rtc_irq); + + return ret; } static int max77686_rtc_resume(struct device *dev) { + struct max77686_rtc_info *info = dev_get_drvdata(dev); + + if (!info->drv_data->rtc_irq_from_platform) + enable_irq(info->rtc_irq); + if (device_may_wakeup(dev)) { struct max77686_rtc_info *info = dev_get_drvdata(dev); diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c index 1660d5e79582..21cbf7f892e8 100644 --- a/drivers/rtc/rtc-mcp795.c +++ b/drivers/rtc/rtc-mcp795.c @@ -7,7 +7,7 @@ * based on other Linux RTC drivers * * Device datasheet: - * http://ww1.microchip.com/downloads/en/DeviceDoc/22280A.pdf + * https://ww1.microchip.com/downloads/en/DeviceDoc/22280A.pdf */ #include <linux/module.h> diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9c5670776c68..ed6316992cbb 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -20,6 +20,7 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_irq.h> #include <linux/regmap.h> #include <linux/watchdog.h> @@ -28,8 +29,11 @@ #define PCF2127_BIT_CTRL1_TSF1 BIT(4) /* Control register 2 */ #define PCF2127_REG_CTRL2 0x01 +#define PCF2127_BIT_CTRL2_AIE BIT(1) #define PCF2127_BIT_CTRL2_TSIE BIT(2) +#define PCF2127_BIT_CTRL2_AF BIT(4) #define PCF2127_BIT_CTRL2_TSF2 BIT(5) +#define PCF2127_BIT_CTRL2_WDTF BIT(6) /* Control register 3 */ #define PCF2127_REG_CTRL3 0x02 #define PCF2127_BIT_CTRL3_BLIE BIT(0) @@ -46,6 +50,13 @@ #define PCF2127_REG_DW 0x07 #define PCF2127_REG_MO 0x08 #define PCF2127_REG_YR 0x09 +/* Alarm registers */ +#define PCF2127_REG_ALARM_SC 0x0A +#define PCF2127_REG_ALARM_MN 0x0B +#define PCF2127_REG_ALARM_HR 0x0C +#define PCF2127_REG_ALARM_DM 0x0D +#define PCF2127_REG_ALARM_DW 0x0E +#define PCF2127_BIT_ALARM_AE BIT(7) /* Watchdog registers */ #define PCF2127_REG_WD_CTL 0x10 #define PCF2127_BIT_WD_CTL_TF0 BIT(0) @@ -324,6 +335,112 @@ static const struct watchdog_ops pcf2127_watchdog_ops = { .set_timeout = pcf2127_wdt_set_timeout, }; +/* Alarm */ +static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + unsigned int buf[5], ctrl2; + int ret; + + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2); + if (ret) + return ret; + + ret = pcf2127_wdt_active_ping(&pcf2127->wdd); + if (ret) + return ret; + + ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf, + sizeof(buf)); + if (ret) + return ret; + + alrm->enabled = ctrl2 & PCF2127_BIT_CTRL2_AIE; + alrm->pending = ctrl2 & PCF2127_BIT_CTRL2_AF; + + alrm->time.tm_sec = bcd2bin(buf[0] & 0x7F); + alrm->time.tm_min = bcd2bin(buf[1] & 0x7F); + alrm->time.tm_hour = bcd2bin(buf[2] & 0x3F); + alrm->time.tm_mday = bcd2bin(buf[3] & 0x3F); + + return 0; +} + +static int pcf2127_rtc_alarm_irq_enable(struct device *dev, u32 enable) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2, + PCF2127_BIT_CTRL2_AIE, + enable ? PCF2127_BIT_CTRL2_AIE : 0); + if (ret) + return ret; + + return pcf2127_wdt_active_ping(&pcf2127->wdd); +} + +static int pcf2127_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + uint8_t buf[5]; + int ret; + + ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2, + PCF2127_BIT_CTRL2_AF, 0); + if (ret) + return ret; + + ret = pcf2127_wdt_active_ping(&pcf2127->wdd); + if (ret) + return ret; + + buf[0] = bin2bcd(alrm->time.tm_sec); + buf[1] = bin2bcd(alrm->time.tm_min); + buf[2] = bin2bcd(alrm->time.tm_hour); + buf[3] = bin2bcd(alrm->time.tm_mday); + buf[4] = PCF2127_BIT_ALARM_AE; /* Do not match on week day */ + + ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf, + sizeof(buf)); + if (ret) + return ret; + + return pcf2127_rtc_alarm_irq_enable(dev, alrm->enabled); +} + +static irqreturn_t pcf2127_rtc_irq(int irq, void *dev) +{ + struct pcf2127 *pcf2127 = dev_get_drvdata(dev); + unsigned int ctrl2 = 0; + int ret = 0; + + ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2); + if (ret) + return IRQ_NONE; + + if (!(ctrl2 & PCF2127_BIT_CTRL2_AF)) + return IRQ_NONE; + + regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2, + ctrl2 & ~(PCF2127_BIT_CTRL2_AF | PCF2127_BIT_CTRL2_WDTF)); + + rtc_update_irq(pcf2127->rtc, 1, RTC_IRQF | RTC_AF); + + pcf2127_wdt_active_ping(&pcf2127->wdd); + + return IRQ_HANDLED; +} + +static const struct rtc_class_ops pcf2127_rtc_alrm_ops = { + .ioctl = pcf2127_rtc_ioctl, + .read_time = pcf2127_rtc_read_time, + .set_time = pcf2127_rtc_set_time, + .read_alarm = pcf2127_rtc_read_alarm, + .set_alarm = pcf2127_rtc_set_alarm, + .alarm_irq_enable = pcf2127_rtc_alarm_irq_enable, +}; + /* sysfs interface */ static ssize_t timestamp0_store(struct device *dev, @@ -416,7 +533,7 @@ static const struct attribute_group pcf2127_attr_group = { }; static int pcf2127_probe(struct device *dev, struct regmap *regmap, - const char *name, bool has_nvmem) + int alarm_irq, const char *name, bool has_nvmem) { struct pcf2127 *pcf2127; u32 wdd_timeout; @@ -440,6 +557,23 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, pcf2127->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000; pcf2127->rtc->range_max = RTC_TIMESTAMP_END_2099; pcf2127->rtc->set_start_time = true; /* Sets actual start to 1970 */ + pcf2127->rtc->uie_unsupported = 1; + + if (alarm_irq >= 0) { + ret = devm_request_threaded_irq(dev, alarm_irq, NULL, + pcf2127_rtc_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + dev_name(dev), dev); + if (ret) { + dev_err(dev, "failed to request alarm irq\n"); + return ret; + } + } + + if (alarm_irq >= 0 || device_property_read_bool(dev, "wakeup-source")) { + device_init_wakeup(dev, true); + pcf2127->rtc->ops = &pcf2127_rtc_alrm_ops; + } pcf2127->wdd.parent = dev; pcf2127->wdd.info = &pcf2127_wdt_info; @@ -553,6 +687,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap, static const struct of_device_id pcf2127_of_match[] = { { .compatible = "nxp,pcf2127" }, { .compatible = "nxp,pcf2129" }, + { .compatible = "nxp,pca2129" }, {} }; MODULE_DEVICE_TABLE(of, pcf2127_of_match); @@ -657,13 +792,14 @@ static int pcf2127_i2c_probe(struct i2c_client *client, return PTR_ERR(regmap); } - return pcf2127_probe(&client->dev, regmap, + return pcf2127_probe(&client->dev, regmap, client->irq, pcf2127_i2c_driver.driver.name, id->driver_data); } static const struct i2c_device_id pcf2127_i2c_id[] = { { "pcf2127", 1 }, { "pcf2129", 0 }, + { "pca2129", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id); @@ -722,13 +858,15 @@ static int pcf2127_spi_probe(struct spi_device *spi) return PTR_ERR(regmap); } - return pcf2127_probe(&spi->dev, regmap, pcf2127_spi_driver.driver.name, + return pcf2127_probe(&spi->dev, regmap, spi->irq, + pcf2127_spi_driver.driver.name, spi_get_device_id(spi)->driver_data); } static const struct spi_device_id pcf2127_spi_id[] = { { "pcf2127", 1 }, { "pcf2129", 0 }, + { "pca2129", 0 }, { } }; MODULE_DEVICE_TABLE(spi, pcf2127_spi_id); diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c index 7a87f461bec8..ca55ba975aeb 100644 --- a/drivers/rtc/rtc-pcf85063.c +++ b/drivers/rtc/rtc-pcf85063.c @@ -21,8 +21,8 @@ /* * Information for this driver was pulled from the following datasheets. * - * http://www.nxp.com/documents/data_sheet/PCF85063A.pdf - * http://www.nxp.com/documents/data_sheet/PCF85063TP.pdf + * https://www.nxp.com/documents/data_sheet/PCF85063A.pdf + * https://www.nxp.com/documents/data_sheet/PCF85063TP.pdf * * PCF85063A -- Rev. 6 — 18 November 2015 * PCF85063TP -- Rev. 4 — 6 May 2015 diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 40d7450a1ce4..c6b89273feba 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -275,6 +275,7 @@ static int pl031_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) struct pl031_local *ldata = dev_get_drvdata(dev); writel(rtc_tm_to_time64(&alarm->time), ldata->base + RTC_MR); + pl031_alarm_irq_enable(dev, alarm->enabled); return 0; } diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index d5880f52dc2b..5896e5282a4e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -818,7 +818,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, struct pkey_apqn *apqns, size_t *nr_apqns) { - int rc = EINVAL; + int rc; u32 _nr_apqns, *_apqns = NULL; struct keytoken_header *hdr = (struct keytoken_header *)key; @@ -886,7 +886,7 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, u8 cur_mkvp[32], u8 alt_mkvp[32], u32 flags, struct pkey_apqn *apqns, size_t *nr_apqns) { - int rc = -EINVAL; + int rc; u32 _nr_apqns, *_apqns = NULL; if (ktype == PKEY_TYPE_CCA_DATA || ktype == PKEY_TYPE_CCA_CIPHER) { diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index de881a6cff35..620465c2a1da 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -60,6 +60,10 @@ module_param(enable_sriov, bool, 0644); MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF."); #endif +static bool disable_denylist; +module_param(disable_denylist, bool, 0444); +MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); + static inline bool vfio_vga_disabled(void) { #ifdef CONFIG_VFIO_PCI_VGA @@ -69,6 +73,44 @@ static inline bool vfio_vga_disabled(void) #endif } +static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) +{ + switch (pdev->vendor) { + case PCI_VENDOR_ID_INTEL: + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_QAT_C3XXX: + case PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF: + case PCI_DEVICE_ID_INTEL_QAT_C62X: + case PCI_DEVICE_ID_INTEL_QAT_C62X_VF: + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC: + case PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF: + return true; + default: + return false; + } + } + + return false; +} + +static bool vfio_pci_is_denylisted(struct pci_dev *pdev) +{ + if (!vfio_pci_dev_in_denylist(pdev)) + return false; + + if (disable_denylist) { + pci_warn(pdev, + "device denylist disabled - allowing device %04x:%04x.\n", + pdev->vendor, pdev->device); + return false; + } + + pci_warn(pdev, "%04x:%04x exists in vfio-pci device denylist, driver probing disallowed.\n", + pdev->vendor, pdev->device); + + return true; +} + /* * Our VGA arbiter participation is limited since we don't know anything * about the device itself. However, if the device is the only VGA device @@ -207,6 +249,8 @@ static bool vfio_pci_nointx(struct pci_dev *pdev) case 0x1580 ... 0x1581: case 0x1583 ... 0x158b: case 0x37d0 ... 0x37d2: + /* X550 */ + case 0x1563: return true; default: return false; @@ -521,14 +565,12 @@ static void vfio_pci_release(void *device_data) vfio_pci_vf_token_user_add(vdev, -1); vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); + mutex_lock(&vdev->igate); if (vdev->err_trigger) { eventfd_ctx_put(vdev->err_trigger); vdev->err_trigger = NULL; } - mutex_unlock(&vdev->igate); - - mutex_lock(&vdev->igate); if (vdev->req_trigger) { eventfd_ctx_put(vdev->req_trigger); vdev->req_trigger = NULL; @@ -1856,6 +1898,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct iommu_group *group; int ret; + if (vfio_pci_is_denylisted(pdev)) + return -EINVAL; + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; @@ -2345,6 +2390,9 @@ static int __init vfio_pci_init(void) vfio_pci_fill_ids(); + if (disable_denylist) + pr_warn("device denylist disabled.\n"); + return 0; out_driver: diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 580099afeaff..262ab0efd06c 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -627,9 +627,10 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, * that error notification via MSI can be affected for platforms that handle * MSI within the same IOVA space as DMA. */ -static const char * const vfio_driver_whitelist[] = { "pci-stub" }; +static const char * const vfio_driver_allowed[] = { "pci-stub" }; -static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) +static bool vfio_dev_driver_allowed(struct device *dev, + struct device_driver *drv) { if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); @@ -638,8 +639,8 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) return true; } - return match_string(vfio_driver_whitelist, - ARRAY_SIZE(vfio_driver_whitelist), + return match_string(vfio_driver_allowed, + ARRAY_SIZE(vfio_driver_allowed), drv->name) >= 0; } @@ -648,7 +649,7 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) * one of the following states: * - driver-less * - bound to a vfio driver - * - bound to a whitelisted driver + * - bound to an otherwise allowed driver * - a PCI interconnect device * * We use two methods to determine whether a device is bound to a vfio @@ -674,7 +675,7 @@ static int vfio_dev_viable(struct device *dev, void *data) } mutex_unlock(&group->unbound_lock); - if (!ret || !drv || vfio_dev_whitelisted(dev, drv)) + if (!ret || !drv || vfio_dev_driver_allowed(dev, drv)) return 0; device = vfio_group_get_device(group, dev); diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 16b3adc508db..fe888b5dcc00 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -383,7 +383,7 @@ static void tce_iommu_unuse_page(struct tce_container *container, struct page *page; page = pfn_to_page(hpa >> PAGE_SHIFT); - put_page(page); + unpin_user_page(page); } static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, @@ -486,7 +486,7 @@ static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) struct page *page = NULL; enum dma_data_direction direction = iommu_tce_direction(tce); - if (get_user_pages_fast(tce & PAGE_MASK, 1, + if (pin_user_pages_fast(tce & PAGE_MASK, 1, direction != DMA_TO_DEVICE ? FOLL_WRITE : 0, &page) != 1) return -EFAULT; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5e556ac9102a..6990fc711a80 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -425,7 +425,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, if (ret) { bool unlocked = false; - ret = fixup_user_fault(NULL, mm, vaddr, + ret = fixup_user_fault(mm, vaddr, FAULT_FLAG_REMOTE | (write_fault ? FAULT_FLAG_WRITE : 0), &unlocked); @@ -453,7 +453,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, flags |= FOLL_WRITE; mmap_read_lock(mm); - ret = pin_user_pages_remote(NULL, mm, vaddr, 1, flags | FOLL_LONGTERM, + ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM, page, NULL, NULL); if (ret == 1) { *pfn = page_to_pfn(page[0]); @@ -1225,8 +1225,10 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, return 0; unwind: - list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) + list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) { iommu_unmap(d->domain, iova, npage << PAGE_SHIFT); + cond_resched(); + } return ret; } @@ -2453,6 +2455,23 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, + unsigned long arg) +{ + switch (arg) { + case VFIO_TYPE1_IOMMU: + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_NESTING_IOMMU: + return 1; + case VFIO_DMA_CC_IOMMU: + if (!iommu) + return 0; + return vfio_domains_have_iommu_cache(iommu); + default: + return 0; + } +} + static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps, struct vfio_iommu_type1_info_cap_iova_range *cap_iovas, size_t size) @@ -2529,241 +2548,256 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu, return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig)); } -static long vfio_iommu_type1_ioctl(void *iommu_data, - unsigned int cmd, unsigned long arg) +static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu, + unsigned long arg) { - struct vfio_iommu *iommu = iommu_data; + struct vfio_iommu_type1_info info; unsigned long minsz; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + unsigned long capsz; + int ret; - if (cmd == VFIO_CHECK_EXTENSION) { - switch (arg) { - case VFIO_TYPE1_IOMMU: - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_NESTING_IOMMU: - return 1; - case VFIO_DMA_CC_IOMMU: - if (!iommu) - return 0; - return vfio_domains_have_iommu_cache(iommu); - default: - return 0; - } - } else if (cmd == VFIO_IOMMU_GET_INFO) { - struct vfio_iommu_type1_info info; - struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; - unsigned long capsz; - int ret; - - minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); - /* For backward compatibility, cannot require this */ - capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); + /* For backward compatibility, cannot require this */ + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; - if (info.argsz < minsz) - return -EINVAL; + if (info.argsz < minsz) + return -EINVAL; - if (info.argsz >= capsz) { - minsz = capsz; - info.cap_offset = 0; /* output, no-recopy necessary */ - } + if (info.argsz >= capsz) { + minsz = capsz; + info.cap_offset = 0; /* output, no-recopy necessary */ + } - mutex_lock(&iommu->lock); - info.flags = VFIO_IOMMU_INFO_PGSIZES; + mutex_lock(&iommu->lock); + info.flags = VFIO_IOMMU_INFO_PGSIZES; - info.iova_pgsizes = iommu->pgsize_bitmap; + info.iova_pgsizes = iommu->pgsize_bitmap; - ret = vfio_iommu_migration_build_caps(iommu, &caps); + ret = vfio_iommu_migration_build_caps(iommu, &caps); - if (!ret) - ret = vfio_iommu_iova_build_caps(iommu, &caps); + if (!ret) + ret = vfio_iommu_iova_build_caps(iommu, &caps); - mutex_unlock(&iommu->lock); + mutex_unlock(&iommu->lock); - if (ret) - return ret; + if (ret) + return ret; - if (caps.size) { - info.flags |= VFIO_IOMMU_INFO_CAPS; + if (caps.size) { + info.flags |= VFIO_IOMMU_INFO_CAPS; - if (info.argsz < sizeof(info) + caps.size) { - info.argsz = sizeof(info) + caps.size; - } else { - vfio_info_cap_shift(&caps, sizeof(info)); - if (copy_to_user((void __user *)arg + - sizeof(info), caps.buf, - caps.size)) { - kfree(caps.buf); - return -EFAULT; - } - info.cap_offset = sizeof(info); + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; } - - kfree(caps.buf); + info.cap_offset = sizeof(info); } - return copy_to_user((void __user *)arg, &info, minsz) ? - -EFAULT : 0; + kfree(caps.buf); + } - } else if (cmd == VFIO_IOMMU_MAP_DMA) { - struct vfio_iommu_type1_dma_map map; - uint32_t mask = VFIO_DMA_MAP_FLAG_READ | - VFIO_DMA_MAP_FLAG_WRITE; + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; +} - minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); +static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dma_map map; + unsigned long minsz; + uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; - if (copy_from_user(&map, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); - if (map.argsz < minsz || map.flags & ~mask) - return -EINVAL; + if (copy_from_user(&map, (void __user *)arg, minsz)) + return -EFAULT; - return vfio_dma_do_map(iommu, &map); + if (map.argsz < minsz || map.flags & ~mask) + return -EINVAL; - } else if (cmd == VFIO_IOMMU_UNMAP_DMA) { - struct vfio_iommu_type1_dma_unmap unmap; - struct vfio_bitmap bitmap = { 0 }; - int ret; + return vfio_dma_do_map(iommu, &map); +} - minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); +static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dma_unmap unmap; + struct vfio_bitmap bitmap = { 0 }; + unsigned long minsz; + int ret; - if (copy_from_user(&unmap, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); - if (unmap.argsz < minsz || - unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) - return -EINVAL; + if (copy_from_user(&unmap, (void __user *)arg, minsz)) + return -EFAULT; - if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { - unsigned long pgshift; + if (unmap.argsz < minsz || + unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) + return -EINVAL; - if (unmap.argsz < (minsz + sizeof(bitmap))) - return -EINVAL; + if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { + unsigned long pgshift; - if (copy_from_user(&bitmap, - (void __user *)(arg + minsz), - sizeof(bitmap))) - return -EFAULT; + if (unmap.argsz < (minsz + sizeof(bitmap))) + return -EINVAL; - if (!access_ok((void __user *)bitmap.data, bitmap.size)) - return -EINVAL; + if (copy_from_user(&bitmap, + (void __user *)(arg + minsz), + sizeof(bitmap))) + return -EFAULT; - pgshift = __ffs(bitmap.pgsize); - ret = verify_bitmap_size(unmap.size >> pgshift, - bitmap.size); - if (ret) - return ret; - } + if (!access_ok((void __user *)bitmap.data, bitmap.size)) + return -EINVAL; - ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap); + pgshift = __ffs(bitmap.pgsize); + ret = verify_bitmap_size(unmap.size >> pgshift, + bitmap.size); if (ret) return ret; + } + + ret = vfio_dma_do_unmap(iommu, &unmap, &bitmap); + if (ret) + return ret; - return copy_to_user((void __user *)arg, &unmap, minsz) ? + return copy_to_user((void __user *)arg, &unmap, minsz) ? -EFAULT : 0; - } else if (cmd == VFIO_IOMMU_DIRTY_PAGES) { - struct vfio_iommu_type1_dirty_bitmap dirty; - uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | - VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | - VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; - int ret = 0; +} - if (!iommu->v2) - return -EACCES; +static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, + unsigned long arg) +{ + struct vfio_iommu_type1_dirty_bitmap dirty; + uint32_t mask = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | + VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + unsigned long minsz; + int ret = 0; - minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, - flags); + if (!iommu->v2) + return -EACCES; - if (copy_from_user(&dirty, (void __user *)arg, minsz)) - return -EFAULT; + minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, flags); - if (dirty.argsz < minsz || dirty.flags & ~mask) - return -EINVAL; + if (copy_from_user(&dirty, (void __user *)arg, minsz)) + return -EFAULT; - /* only one flag should be set at a time */ - if (__ffs(dirty.flags) != __fls(dirty.flags)) - return -EINVAL; + if (dirty.argsz < minsz || dirty.flags & ~mask) + return -EINVAL; - if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { - size_t pgsize; + /* only one flag should be set at a time */ + if (__ffs(dirty.flags) != __fls(dirty.flags)) + return -EINVAL; - mutex_lock(&iommu->lock); - pgsize = 1 << __ffs(iommu->pgsize_bitmap); - if (!iommu->dirty_page_tracking) { - ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); - if (!ret) - iommu->dirty_page_tracking = true; - } - mutex_unlock(&iommu->lock); - return ret; - } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { - mutex_lock(&iommu->lock); - if (iommu->dirty_page_tracking) { - iommu->dirty_page_tracking = false; - vfio_dma_bitmap_free_all(iommu); - } - mutex_unlock(&iommu->lock); - return 0; - } else if (dirty.flags & - VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { - struct vfio_iommu_type1_dirty_bitmap_get range; - unsigned long pgshift; - size_t data_size = dirty.argsz - minsz; - size_t iommu_pgsize; - - if (!data_size || data_size < sizeof(range)) - return -EINVAL; - - if (copy_from_user(&range, (void __user *)(arg + minsz), - sizeof(range))) - return -EFAULT; + if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { + size_t pgsize; - if (range.iova + range.size < range.iova) - return -EINVAL; - if (!access_ok((void __user *)range.bitmap.data, - range.bitmap.size)) - return -EINVAL; + mutex_lock(&iommu->lock); + pgsize = 1 << __ffs(iommu->pgsize_bitmap); + if (!iommu->dirty_page_tracking) { + ret = vfio_dma_bitmap_alloc_all(iommu, pgsize); + if (!ret) + iommu->dirty_page_tracking = true; + } + mutex_unlock(&iommu->lock); + return ret; + } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { + mutex_lock(&iommu->lock); + if (iommu->dirty_page_tracking) { + iommu->dirty_page_tracking = false; + vfio_dma_bitmap_free_all(iommu); + } + mutex_unlock(&iommu->lock); + return 0; + } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { + struct vfio_iommu_type1_dirty_bitmap_get range; + unsigned long pgshift; + size_t data_size = dirty.argsz - minsz; + size_t iommu_pgsize; - pgshift = __ffs(range.bitmap.pgsize); - ret = verify_bitmap_size(range.size >> pgshift, - range.bitmap.size); - if (ret) - return ret; + if (!data_size || data_size < sizeof(range)) + return -EINVAL; - mutex_lock(&iommu->lock); + if (copy_from_user(&range, (void __user *)(arg + minsz), + sizeof(range))) + return -EFAULT; - iommu_pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); + if (range.iova + range.size < range.iova) + return -EINVAL; + if (!access_ok((void __user *)range.bitmap.data, + range.bitmap.size)) + return -EINVAL; - /* allow only smallest supported pgsize */ - if (range.bitmap.pgsize != iommu_pgsize) { - ret = -EINVAL; - goto out_unlock; - } - if (range.iova & (iommu_pgsize - 1)) { - ret = -EINVAL; - goto out_unlock; - } - if (!range.size || range.size & (iommu_pgsize - 1)) { - ret = -EINVAL; - goto out_unlock; - } + pgshift = __ffs(range.bitmap.pgsize); + ret = verify_bitmap_size(range.size >> pgshift, + range.bitmap.size); + if (ret) + return ret; - if (iommu->dirty_page_tracking) - ret = vfio_iova_dirty_bitmap(range.bitmap.data, - iommu, range.iova, range.size, - range.bitmap.pgsize); - else - ret = -EINVAL; -out_unlock: - mutex_unlock(&iommu->lock); + mutex_lock(&iommu->lock); - return ret; + iommu_pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); + + /* allow only smallest supported pgsize */ + if (range.bitmap.pgsize != iommu_pgsize) { + ret = -EINVAL; + goto out_unlock; + } + if (range.iova & (iommu_pgsize - 1)) { + ret = -EINVAL; + goto out_unlock; + } + if (!range.size || range.size & (iommu_pgsize - 1)) { + ret = -EINVAL; + goto out_unlock; } + + if (iommu->dirty_page_tracking) + ret = vfio_iova_dirty_bitmap(range.bitmap.data, + iommu, range.iova, + range.size, + range.bitmap.pgsize); + else + ret = -EINVAL; +out_unlock: + mutex_unlock(&iommu->lock); + + return ret; } - return -ENOTTY; + return -EINVAL; +} + +static long vfio_iommu_type1_ioctl(void *iommu_data, + unsigned int cmd, unsigned long arg) +{ + struct vfio_iommu *iommu = iommu_data; + + switch (cmd) { + case VFIO_CHECK_EXTENSION: + return vfio_iommu_type1_check_extension(iommu, arg); + case VFIO_IOMMU_GET_INFO: + return vfio_iommu_type1_get_info(iommu, arg); + case VFIO_IOMMU_MAP_DMA: + return vfio_iommu_type1_map_dma(iommu, arg); + case VFIO_IOMMU_UNMAP_DMA: + return vfio_iommu_type1_unmap_dma(iommu, arg); + case VFIO_IOMMU_DIRTY_PAGES: + return vfio_iommu_type1_dirty_pages(iommu, arg); + default: + return -ENOTTY; + } } static int vfio_iommu_type1_register_notifier(void *iommu_data, diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 30e73ec4ad5c..da7c88ffaa6a 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -957,7 +957,6 @@ static int fb_check_caps(struct fb_info *info, struct fb_var_screeninfo *var, int fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) { - int flags = info->flags; int ret = 0; u32 activate; struct fb_var_screeninfo old_var; @@ -1052,9 +1051,6 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) event.data = &mode; fb_notifier_call_chain(FB_EVENT_MODE_CHANGE, &event); - if (flags & FBINFO_MISC_USEREVENT) - fbcon_update_vcs(info, activate & FB_ACTIVATE_ALL); - return 0; } EXPORT_SYMBOL(fb_set_var); @@ -1105,9 +1101,9 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EFAULT; console_lock(); lock_fb_info(info); - info->flags |= FBINFO_MISC_USEREVENT; ret = fb_set_var(info, &var); - info->flags &= ~FBINFO_MISC_USEREVENT; + if (!ret) + fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); unlock_fb_info(info); console_unlock(); if (!ret && copy_to_user(argp, &var, sizeof(var))) diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c index d54c88f88991..65dae05fff8e 100644 --- a/drivers/video/fbdev/core/fbsysfs.c +++ b/drivers/video/fbdev/core/fbsysfs.c @@ -91,9 +91,9 @@ static int activate(struct fb_info *fb_info, struct fb_var_screeninfo *var) var->activate |= FB_ACTIVATE_FORCE; console_lock(); - fb_info->flags |= FBINFO_MISC_USEREVENT; err = fb_set_var(fb_info, var); - fb_info->flags &= ~FBINFO_MISC_USEREVENT; + if (!err) + fbcon_update_vcs(fb_info, var->activate & FB_ACTIVATE_ALL); console_unlock(); if (err) return err; diff --git a/drivers/video/fbdev/ps3fb.c b/drivers/video/fbdev/ps3fb.c index 9df78fb77267..203c254f8f6c 100644 --- a/drivers/video/fbdev/ps3fb.c +++ b/drivers/video/fbdev/ps3fb.c @@ -29,6 +29,7 @@ #include <linux/freezer.h> #include <linux/uaccess.h> #include <linux/fb.h> +#include <linux/fbcon.h> #include <linux/init.h> #include <asm/cell-regs.h> @@ -824,12 +825,12 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd, var = info->var; fb_videomode_to_var(&var, vmode); console_lock(); - info->flags |= FBINFO_MISC_USEREVENT; /* Force, in case only special bits changed */ var.activate |= FB_ACTIVATE_FORCE; par->new_mode_id = val; retval = fb_set_var(info, &var); - info->flags &= ~FBINFO_MISC_USEREVENT; + if (!retval) + fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); console_unlock(); } break; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 4f4687c46d38..ab7aad5a1e69 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1027,7 +1027,7 @@ config ADVANTECH_WDT If you are configuring a Linux kernel for the Advantech single-board computer, say `Y' here to support its built-in watchdog timer feature. More information can be found at - <http://www.advantech.com.tw/products/> + <https://www.advantech.com.tw/products/> config ALIM1535_WDT tristate "ALi M1535 PMU Watchdog Timer" diff --git a/drivers/watchdog/advantechwdt.c b/drivers/watchdog/advantechwdt.c index 0e4c18a2aa42..554fe85da50e 100644 --- a/drivers/watchdog/advantechwdt.c +++ b/drivers/watchdog/advantechwdt.c @@ -177,7 +177,7 @@ static long advwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (advwdt_set_heartbeat(new_timeout)) return -EINVAL; advwdt_ping(); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/alim1535_wdt.c b/drivers/watchdog/alim1535_wdt.c index 42338c7d4540..bfb9a91ca1df 100644 --- a/drivers/watchdog/alim1535_wdt.c +++ b/drivers/watchdog/alim1535_wdt.c @@ -220,7 +220,7 @@ static long ali_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; ali_keepalive(); } - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/alim7101_wdt.c b/drivers/watchdog/alim7101_wdt.c index 5af0358f4390..4ff7f5afb7aa 100644 --- a/drivers/watchdog/alim7101_wdt.c +++ b/drivers/watchdog/alim7101_wdt.c @@ -279,7 +279,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) timeout = new_timeout; wdt_keepalive(); } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c index c087027ffd5d..ff37dc91057d 100644 --- a/drivers/watchdog/ar7_wdt.c +++ b/drivers/watchdog/ar7_wdt.c @@ -235,8 +235,7 @@ static long ar7_wdt_ioctl(struct file *file, ar7_wdt_update_margin(new_margin); ar7_wdt_kick(1); spin_unlock(&wdt_lock); - /* Fall through */ - + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(margin, (int *)arg)) return -EFAULT; diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index d6dff97c280b..0f18f06a21b6 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -215,8 +215,8 @@ static long ath79_wdt_ioctl(struct file *file, unsigned int cmd, err = ath79_wdt_set_timeout(t); if (err) break; + fallthrough; - /* fallthrough */ case WDIOC_GETTIMEOUT: err = put_user(timeout, p); break; diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c index eb850a8d19df..8237c4e9c2a0 100644 --- a/drivers/watchdog/bcm_kona_wdt.c +++ b/drivers/watchdog/bcm_kona_wdt.c @@ -279,7 +279,7 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) wdt->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(wdt->base)) - return -ENODEV; + return PTR_ERR(wdt->base); wdt->resolution = SECWDOG_DEFAULT_RESOLUTION; ret = bcm_kona_wdt_set_resolution_reg(wdt); diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c index 9d09bbfdef20..7817fb976f9c 100644 --- a/drivers/watchdog/booke_wdt.c +++ b/drivers/watchdog/booke_wdt.c @@ -39,6 +39,11 @@ static bool booke_wdt_enabled; module_param(booke_wdt_enabled, bool, 0); static int booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT; module_param(booke_wdt_period, int, 0); +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, + "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); #ifdef CONFIG_PPC_FSL_BOOK3E @@ -215,7 +220,6 @@ static void __exit booke_wdt_exit(void) static int __init booke_wdt_init(void) { int ret = 0; - bool nowayout = WATCHDOG_NOWAYOUT; pr_info("powerpc book-e watchdog driver loaded\n"); booke_wdt_info.firmware_version = cur_cpu_spec->pvr_value; diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c index fba21de2bbad..32d0e1781e63 100644 --- a/drivers/watchdog/dw_wdt.c +++ b/drivers/watchdog/dw_wdt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2010-2011 Picochip Ltd., Jamie Iles - * http://www.picochip.com + * https://www.picochip.com * * This file implements a driver for the Synopsys DesignWare watchdog device * in the many subsystems. The watchdog has 16 different timeout periods @@ -13,6 +13,8 @@ */ #include <linux/bitops.h> +#include <linux/limits.h> +#include <linux/kernel.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/err.h> @@ -20,11 +22,13 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/interrupt.h> #include <linux/of.h> #include <linux/pm.h> #include <linux/platform_device.h> #include <linux/reset.h> #include <linux/watchdog.h> +#include <linux/debugfs.h> #define WDOG_CONTROL_REG_OFFSET 0x00 #define WDOG_CONTROL_REG_WDT_EN_MASK 0x01 @@ -34,26 +38,64 @@ #define WDOG_CURRENT_COUNT_REG_OFFSET 0x08 #define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c #define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 - -/* The maximum TOP (timeout period) value that can be set in the watchdog. */ -#define DW_WDT_MAX_TOP 15 +#define WDOG_INTERRUPT_STATUS_REG_OFFSET 0x10 +#define WDOG_INTERRUPT_CLEAR_REG_OFFSET 0x14 +#define WDOG_COMP_PARAMS_5_REG_OFFSET 0xe4 +#define WDOG_COMP_PARAMS_4_REG_OFFSET 0xe8 +#define WDOG_COMP_PARAMS_3_REG_OFFSET 0xec +#define WDOG_COMP_PARAMS_2_REG_OFFSET 0xf0 +#define WDOG_COMP_PARAMS_1_REG_OFFSET 0xf4 +#define WDOG_COMP_PARAMS_1_USE_FIX_TOP BIT(6) +#define WDOG_COMP_VERSION_REG_OFFSET 0xf8 +#define WDOG_COMP_TYPE_REG_OFFSET 0xfc + +/* There are sixteen TOPs (timeout periods) that can be set in the watchdog. */ +#define DW_WDT_NUM_TOPS 16 +#define DW_WDT_FIX_TOP(_idx) (1U << (16 + _idx)) #define DW_WDT_DEFAULT_SECONDS 30 +static const u32 dw_wdt_fix_tops[DW_WDT_NUM_TOPS] = { + DW_WDT_FIX_TOP(0), DW_WDT_FIX_TOP(1), DW_WDT_FIX_TOP(2), + DW_WDT_FIX_TOP(3), DW_WDT_FIX_TOP(4), DW_WDT_FIX_TOP(5), + DW_WDT_FIX_TOP(6), DW_WDT_FIX_TOP(7), DW_WDT_FIX_TOP(8), + DW_WDT_FIX_TOP(9), DW_WDT_FIX_TOP(10), DW_WDT_FIX_TOP(11), + DW_WDT_FIX_TOP(12), DW_WDT_FIX_TOP(13), DW_WDT_FIX_TOP(14), + DW_WDT_FIX_TOP(15) +}; + static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +enum dw_wdt_rmod { + DW_WDT_RMOD_RESET = 1, + DW_WDT_RMOD_IRQ = 2 +}; + +struct dw_wdt_timeout { + u32 top_val; + unsigned int sec; + unsigned int msec; +}; + struct dw_wdt { void __iomem *regs; struct clk *clk; + struct clk *pclk; unsigned long rate; + enum dw_wdt_rmod rmod; + struct dw_wdt_timeout timeouts[DW_WDT_NUM_TOPS]; struct watchdog_device wdd; struct reset_control *rst; /* Save/restore */ u32 control; u32 timeout; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_dir; +#endif }; #define to_dw_wdt(wdd) container_of(wdd, struct dw_wdt, wdd) @@ -64,20 +106,84 @@ static inline int dw_wdt_is_enabled(struct dw_wdt *dw_wdt) WDOG_CONTROL_REG_WDT_EN_MASK; } -static inline int dw_wdt_top_in_seconds(struct dw_wdt *dw_wdt, unsigned top) +static void dw_wdt_update_mode(struct dw_wdt *dw_wdt, enum dw_wdt_rmod rmod) { + u32 val; + + val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); + if (rmod == DW_WDT_RMOD_IRQ) + val |= WDOG_CONTROL_REG_RESP_MODE_MASK; + else + val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK; + writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); + + dw_wdt->rmod = rmod; +} + +static unsigned int dw_wdt_find_best_top(struct dw_wdt *dw_wdt, + unsigned int timeout, u32 *top_val) +{ + int idx; + /* - * There are 16 possible timeout values in 0..15 where the number of - * cycles is 2 ^ (16 + i) and the watchdog counts down. + * Find a TOP with timeout greater or equal to the requested number. + * Note we'll select a TOP with maximum timeout if the requested + * timeout couldn't be reached. */ - return (1U << (16 + top)) / dw_wdt->rate; + for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) { + if (dw_wdt->timeouts[idx].sec >= timeout) + break; + } + + if (idx == DW_WDT_NUM_TOPS) + --idx; + + *top_val = dw_wdt->timeouts[idx].top_val; + + return dw_wdt->timeouts[idx].sec; +} + +static unsigned int dw_wdt_get_min_timeout(struct dw_wdt *dw_wdt) +{ + int idx; + + /* + * We'll find a timeout greater or equal to one second anyway because + * the driver probe would have failed if there was none. + */ + for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) { + if (dw_wdt->timeouts[idx].sec) + break; + } + + return dw_wdt->timeouts[idx].sec; } -static int dw_wdt_get_top(struct dw_wdt *dw_wdt) +static unsigned int dw_wdt_get_max_timeout_ms(struct dw_wdt *dw_wdt) { - int top = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF; + struct dw_wdt_timeout *timeout = &dw_wdt->timeouts[DW_WDT_NUM_TOPS - 1]; + u64 msec; + + msec = (u64)timeout->sec * MSEC_PER_SEC + timeout->msec; - return dw_wdt_top_in_seconds(dw_wdt, top); + return msec < UINT_MAX ? msec : UINT_MAX; +} + +static unsigned int dw_wdt_get_timeout(struct dw_wdt *dw_wdt) +{ + int top_val = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF; + int idx; + + for (idx = 0; idx < DW_WDT_NUM_TOPS; ++idx) { + if (dw_wdt->timeouts[idx].top_val == top_val) + break; + } + + /* + * In IRQ mode due to the two stages counter, the actual timeout is + * twice greater than the TOP setting. + */ + return dw_wdt->timeouts[idx].sec * dw_wdt->rmod; } static int dw_wdt_ping(struct watchdog_device *wdd) @@ -93,17 +199,23 @@ static int dw_wdt_ping(struct watchdog_device *wdd) static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s) { struct dw_wdt *dw_wdt = to_dw_wdt(wdd); - int i, top_val = DW_WDT_MAX_TOP; + unsigned int timeout; + u32 top_val; /* - * Iterate over the timeout values until we find the closest match. We - * always look for >=. + * Note IRQ mode being enabled means having a non-zero pre-timeout + * setup. In this case we try to find a TOP as close to the half of the + * requested timeout as possible since DW Watchdog IRQ mode is designed + * in two stages way - first timeout rises the pre-timeout interrupt, + * second timeout performs the system reset. So basically the effective + * watchdog-caused reset happens after two watchdog TOPs elapsed. */ - for (i = 0; i <= DW_WDT_MAX_TOP; ++i) - if (dw_wdt_top_in_seconds(dw_wdt, i) >= top_s) { - top_val = i; - break; - } + timeout = dw_wdt_find_best_top(dw_wdt, DIV_ROUND_UP(top_s, dw_wdt->rmod), + &top_val); + if (dw_wdt->rmod == DW_WDT_RMOD_IRQ) + wdd->pretimeout = timeout; + else + wdd->pretimeout = 0; /* * Set the new value in the watchdog. Some versions of dw_wdt @@ -114,25 +226,47 @@ static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s) writel(top_val | top_val << WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); + /* Kick new TOP value into the watchdog counter if activated. */ + if (watchdog_active(wdd)) + dw_wdt_ping(wdd); + /* * In case users set bigger timeout value than HW can support, * kernel(watchdog_dev.c) helps to feed watchdog before * wdd->max_hw_heartbeat_ms */ if (top_s * 1000 <= wdd->max_hw_heartbeat_ms) - wdd->timeout = dw_wdt_top_in_seconds(dw_wdt, top_val); + wdd->timeout = timeout * dw_wdt->rmod; else wdd->timeout = top_s; return 0; } +static int dw_wdt_set_pretimeout(struct watchdog_device *wdd, unsigned int req) +{ + struct dw_wdt *dw_wdt = to_dw_wdt(wdd); + + /* + * We ignore actual value of the timeout passed from user-space + * using it as a flag whether the pretimeout functionality is intended + * to be activated. + */ + dw_wdt_update_mode(dw_wdt, req ? DW_WDT_RMOD_IRQ : DW_WDT_RMOD_RESET); + dw_wdt_set_timeout(wdd, wdd->timeout); + + return 0; +} + static void dw_wdt_arm_system_reset(struct dw_wdt *dw_wdt) { u32 val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); - /* Disable interrupt mode; always perform system reset. */ - val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK; + /* Disable/enable interrupt mode depending on the RMOD flag. */ + if (dw_wdt->rmod == DW_WDT_RMOD_IRQ) + val |= WDOG_CONTROL_REG_RESP_MODE_MASK; + else + val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK; /* Enable watchdog. */ val |= WDOG_CONTROL_REG_WDT_EN_MASK; writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); @@ -170,6 +304,7 @@ static int dw_wdt_restart(struct watchdog_device *wdd, struct dw_wdt *dw_wdt = to_dw_wdt(wdd); writel(0, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); + dw_wdt_update_mode(dw_wdt, DW_WDT_RMOD_RESET); if (dw_wdt_is_enabled(dw_wdt)) writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt->regs + WDOG_COUNTER_RESTART_REG_OFFSET); @@ -185,9 +320,19 @@ static int dw_wdt_restart(struct watchdog_device *wdd, static unsigned int dw_wdt_get_timeleft(struct watchdog_device *wdd) { struct dw_wdt *dw_wdt = to_dw_wdt(wdd); + unsigned int sec; + u32 val; + + val = readl(dw_wdt->regs + WDOG_CURRENT_COUNT_REG_OFFSET); + sec = val / dw_wdt->rate; + + if (dw_wdt->rmod == DW_WDT_RMOD_IRQ) { + val = readl(dw_wdt->regs + WDOG_INTERRUPT_STATUS_REG_OFFSET); + if (!val) + sec += wdd->pretimeout; + } - return readl(dw_wdt->regs + WDOG_CURRENT_COUNT_REG_OFFSET) / - dw_wdt->rate; + return sec; } static const struct watchdog_info dw_wdt_ident = { @@ -196,16 +341,41 @@ static const struct watchdog_info dw_wdt_ident = { .identity = "Synopsys DesignWare Watchdog", }; +static const struct watchdog_info dw_wdt_pt_ident = { + .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | + WDIOF_PRETIMEOUT | WDIOF_MAGICCLOSE, + .identity = "Synopsys DesignWare Watchdog", +}; + static const struct watchdog_ops dw_wdt_ops = { .owner = THIS_MODULE, .start = dw_wdt_start, .stop = dw_wdt_stop, .ping = dw_wdt_ping, .set_timeout = dw_wdt_set_timeout, + .set_pretimeout = dw_wdt_set_pretimeout, .get_timeleft = dw_wdt_get_timeleft, .restart = dw_wdt_restart, }; +static irqreturn_t dw_wdt_irq(int irq, void *devid) +{ + struct dw_wdt *dw_wdt = devid; + u32 val; + + /* + * We don't clear the IRQ status. It's supposed to be done by the + * following ping operations. + */ + val = readl(dw_wdt->regs + WDOG_INTERRUPT_STATUS_REG_OFFSET); + if (!val) + return IRQ_NONE; + + watchdog_notify_pretimeout(&dw_wdt->wdd); + + return IRQ_HANDLED; +} + #ifdef CONFIG_PM_SLEEP static int dw_wdt_suspend(struct device *dev) { @@ -214,6 +384,7 @@ static int dw_wdt_suspend(struct device *dev) dw_wdt->control = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); dw_wdt->timeout = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); + clk_disable_unprepare(dw_wdt->pclk); clk_disable_unprepare(dw_wdt->clk); return 0; @@ -227,6 +398,12 @@ static int dw_wdt_resume(struct device *dev) if (err) return err; + err = clk_prepare_enable(dw_wdt->pclk); + if (err) { + clk_disable_unprepare(dw_wdt->clk); + return err; + } + writel(dw_wdt->timeout, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); writel(dw_wdt->control, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET); @@ -238,6 +415,139 @@ static int dw_wdt_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(dw_wdt_pm_ops, dw_wdt_suspend, dw_wdt_resume); +/* + * In case if DW WDT IP core is synthesized with fixed TOP feature disabled the + * TOPs array can be arbitrary ordered with nearly any sixteen uint numbers + * depending on the system engineer imagination. The next method handles the + * passed TOPs array to pre-calculate the effective timeouts and to sort the + * TOP items out in the ascending order with respect to the timeouts. + */ + +static void dw_wdt_handle_tops(struct dw_wdt *dw_wdt, const u32 *tops) +{ + struct dw_wdt_timeout tout, *dst; + int val, tidx; + u64 msec; + + /* + * We walk over the passed TOPs array and calculate corresponding + * timeouts in seconds and milliseconds. The milliseconds granularity + * is needed to distinguish the TOPs with very close timeouts and to + * set the watchdog max heartbeat setting further. + */ + for (val = 0; val < DW_WDT_NUM_TOPS; ++val) { + tout.top_val = val; + tout.sec = tops[val] / dw_wdt->rate; + msec = (u64)tops[val] * MSEC_PER_SEC; + do_div(msec, dw_wdt->rate); + tout.msec = msec - ((u64)tout.sec * MSEC_PER_SEC); + + /* + * Find a suitable place for the current TOP in the timeouts + * array so that the list is remained in the ascending order. + */ + for (tidx = 0; tidx < val; ++tidx) { + dst = &dw_wdt->timeouts[tidx]; + if (tout.sec > dst->sec || (tout.sec == dst->sec && + tout.msec >= dst->msec)) + continue; + else + swap(*dst, tout); + } + + dw_wdt->timeouts[val] = tout; + } +} + +static int dw_wdt_init_timeouts(struct dw_wdt *dw_wdt, struct device *dev) +{ + u32 data, of_tops[DW_WDT_NUM_TOPS]; + const u32 *tops; + int ret; + + /* + * Retrieve custom or fixed counter values depending on the + * WDT_USE_FIX_TOP flag found in the component specific parameters + * #1 register. + */ + data = readl(dw_wdt->regs + WDOG_COMP_PARAMS_1_REG_OFFSET); + if (data & WDOG_COMP_PARAMS_1_USE_FIX_TOP) { + tops = dw_wdt_fix_tops; + } else { + ret = of_property_read_variable_u32_array(dev_of_node(dev), + "snps,watchdog-tops", of_tops, DW_WDT_NUM_TOPS, + DW_WDT_NUM_TOPS); + if (ret < 0) { + dev_warn(dev, "No valid TOPs array specified\n"); + tops = dw_wdt_fix_tops; + } else { + tops = of_tops; + } + } + + /* Convert the specified TOPs into an array of watchdog timeouts. */ + dw_wdt_handle_tops(dw_wdt, tops); + if (!dw_wdt->timeouts[DW_WDT_NUM_TOPS - 1].sec) { + dev_err(dev, "No any valid TOP detected\n"); + return -EINVAL; + } + + return 0; +} + +#ifdef CONFIG_DEBUG_FS + +#define DW_WDT_DBGFS_REG(_name, _off) \ +{ \ + .name = _name, \ + .offset = _off \ +} + +static const struct debugfs_reg32 dw_wdt_dbgfs_regs[] = { + DW_WDT_DBGFS_REG("cr", WDOG_CONTROL_REG_OFFSET), + DW_WDT_DBGFS_REG("torr", WDOG_TIMEOUT_RANGE_REG_OFFSET), + DW_WDT_DBGFS_REG("ccvr", WDOG_CURRENT_COUNT_REG_OFFSET), + DW_WDT_DBGFS_REG("crr", WDOG_COUNTER_RESTART_REG_OFFSET), + DW_WDT_DBGFS_REG("stat", WDOG_INTERRUPT_STATUS_REG_OFFSET), + DW_WDT_DBGFS_REG("param5", WDOG_COMP_PARAMS_5_REG_OFFSET), + DW_WDT_DBGFS_REG("param4", WDOG_COMP_PARAMS_4_REG_OFFSET), + DW_WDT_DBGFS_REG("param3", WDOG_COMP_PARAMS_3_REG_OFFSET), + DW_WDT_DBGFS_REG("param2", WDOG_COMP_PARAMS_2_REG_OFFSET), + DW_WDT_DBGFS_REG("param1", WDOG_COMP_PARAMS_1_REG_OFFSET), + DW_WDT_DBGFS_REG("version", WDOG_COMP_VERSION_REG_OFFSET), + DW_WDT_DBGFS_REG("type", WDOG_COMP_TYPE_REG_OFFSET) +}; + +static void dw_wdt_dbgfs_init(struct dw_wdt *dw_wdt) +{ + struct device *dev = dw_wdt->wdd.parent; + struct debugfs_regset32 *regset; + + regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL); + if (!regset) + return; + + regset->regs = dw_wdt_dbgfs_regs; + regset->nregs = ARRAY_SIZE(dw_wdt_dbgfs_regs); + regset->base = dw_wdt->regs; + + dw_wdt->dbgfs_dir = debugfs_create_dir(dev_name(dev), NULL); + + debugfs_create_regset32("registers", 0444, dw_wdt->dbgfs_dir, regset); +} + +static void dw_wdt_dbgfs_clear(struct dw_wdt *dw_wdt) +{ + debugfs_remove_recursive(dw_wdt->dbgfs_dir); +} + +#else /* !CONFIG_DEBUG_FS */ + +static void dw_wdt_dbgfs_init(struct dw_wdt *dw_wdt) {} +static void dw_wdt_dbgfs_clear(struct dw_wdt *dw_wdt) {} + +#endif /* !CONFIG_DEBUG_FS */ + static int dw_wdt_drv_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -253,9 +563,18 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) if (IS_ERR(dw_wdt->regs)) return PTR_ERR(dw_wdt->regs); - dw_wdt->clk = devm_clk_get(dev, NULL); - if (IS_ERR(dw_wdt->clk)) - return PTR_ERR(dw_wdt->clk); + /* + * Try to request the watchdog dedicated timer clock source. It must + * be supplied if asynchronous mode is enabled. Otherwise fallback + * to the common timer/bus clocks configuration, in which the very + * first found clock supply both timer and APB signals. + */ + dw_wdt->clk = devm_clk_get(dev, "tclk"); + if (IS_ERR(dw_wdt->clk)) { + dw_wdt->clk = devm_clk_get(dev, NULL); + if (IS_ERR(dw_wdt->clk)) + return PTR_ERR(dw_wdt->clk); + } ret = clk_prepare_enable(dw_wdt->clk); if (ret) @@ -267,20 +586,64 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) goto out_disable_clk; } + /* + * Request APB clock if device is configured with async clocks mode. + * In this case both tclk and pclk clocks are supposed to be specified. + * Alas we can't know for sure whether async mode was really activated, + * so the pclk phandle reference is left optional. If it couldn't be + * found we consider the device configured in synchronous clocks mode. + */ + dw_wdt->pclk = devm_clk_get_optional(dev, "pclk"); + if (IS_ERR(dw_wdt->pclk)) { + ret = PTR_ERR(dw_wdt->pclk); + goto out_disable_clk; + } + + ret = clk_prepare_enable(dw_wdt->pclk); + if (ret) + goto out_disable_clk; + dw_wdt->rst = devm_reset_control_get_optional_shared(&pdev->dev, NULL); if (IS_ERR(dw_wdt->rst)) { ret = PTR_ERR(dw_wdt->rst); - goto out_disable_clk; + goto out_disable_pclk; + } + + /* Enable normal reset without pre-timeout by default. */ + dw_wdt_update_mode(dw_wdt, DW_WDT_RMOD_RESET); + + /* + * Pre-timeout IRQ is optional, since some hardware may lack support + * of it. Note we must request rising-edge IRQ, since the lane is left + * pending either until the next watchdog kick event or up to the + * system reset. + */ + ret = platform_get_irq_optional(pdev, 0); + if (ret > 0) { + ret = devm_request_irq(dev, ret, dw_wdt_irq, + IRQF_SHARED | IRQF_TRIGGER_RISING, + pdev->name, dw_wdt); + if (ret) + goto out_disable_pclk; + + dw_wdt->wdd.info = &dw_wdt_pt_ident; + } else { + if (ret == -EPROBE_DEFER) + goto out_disable_pclk; + + dw_wdt->wdd.info = &dw_wdt_ident; } reset_control_deassert(dw_wdt->rst); + ret = dw_wdt_init_timeouts(dw_wdt, dev); + if (ret) + goto out_disable_clk; + wdd = &dw_wdt->wdd; - wdd->info = &dw_wdt_ident; wdd->ops = &dw_wdt_ops; - wdd->min_timeout = 1; - wdd->max_hw_heartbeat_ms = - dw_wdt_top_in_seconds(dw_wdt, DW_WDT_MAX_TOP) * 1000; + wdd->min_timeout = dw_wdt_get_min_timeout(dw_wdt); + wdd->max_hw_heartbeat_ms = dw_wdt_get_max_timeout_ms(dw_wdt); wdd->parent = dev; watchdog_set_drvdata(wdd, dw_wdt); @@ -293,7 +656,7 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) * devicetree. */ if (dw_wdt_is_enabled(dw_wdt)) { - wdd->timeout = dw_wdt_get_top(dw_wdt); + wdd->timeout = dw_wdt_get_timeout(dw_wdt); set_bit(WDOG_HW_RUNNING, &wdd->status); } else { wdd->timeout = DW_WDT_DEFAULT_SECONDS; @@ -306,10 +669,15 @@ static int dw_wdt_drv_probe(struct platform_device *pdev) ret = watchdog_register_device(wdd); if (ret) - goto out_disable_clk; + goto out_disable_pclk; + + dw_wdt_dbgfs_init(dw_wdt); return 0; +out_disable_pclk: + clk_disable_unprepare(dw_wdt->pclk); + out_disable_clk: clk_disable_unprepare(dw_wdt->clk); return ret; @@ -319,8 +687,11 @@ static int dw_wdt_drv_remove(struct platform_device *pdev) { struct dw_wdt *dw_wdt = platform_get_drvdata(pdev); + dw_wdt_dbgfs_clear(dw_wdt); + watchdog_unregister_device(&dw_wdt->wdd); reset_control_assert(dw_wdt->rst); + clk_disable_unprepare(dw_wdt->pclk); clk_disable_unprepare(dw_wdt->clk); return 0; diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c index f5ffa7be066e..2418ebb707bd 100644 --- a/drivers/watchdog/eurotechwdt.c +++ b/drivers/watchdog/eurotechwdt.c @@ -286,7 +286,7 @@ static long eurwdt_ioctl(struct file *file, eurwdt_timeout = time; eurwdt_set_timeout(time); spin_unlock(&eurwdt_lock); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(eurwdt_timeout, p); diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index a3c44d75d80e..f60beec1bbae 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -306,27 +306,6 @@ exit_unlock: return err; } -static int f71862fg_pin_configure(unsigned short ioaddr) -{ - /* When ioaddr is non-zero the calling function has to take care of - mutex handling and superio preparation! */ - - if (f71862fg_pin == 63) { - if (ioaddr) { - /* SPI must be disabled first to use this pin! */ - superio_clear_bit(ioaddr, SIO_REG_ROM_ADDR_SEL, 6); - superio_set_bit(ioaddr, SIO_REG_MFUNCT3, 4); - } - } else if (f71862fg_pin == 56) { - if (ioaddr) - superio_set_bit(ioaddr, SIO_REG_MFUNCT1, 1); - } else { - pr_err("Invalid argument f71862fg_pin=%d\n", f71862fg_pin); - return -EINVAL; - } - return 0; -} - static int watchdog_start(void) { int err; @@ -352,9 +331,13 @@ static int watchdog_start(void) break; case f71862fg: - err = f71862fg_pin_configure(watchdog.sioaddr); - if (err) - goto exit_superio; + if (f71862fg_pin == 63) { + /* SPI must be disabled first to use this pin! */ + superio_clear_bit(watchdog.sioaddr, SIO_REG_ROM_ADDR_SEL, 6); + superio_set_bit(watchdog.sioaddr, SIO_REG_MFUNCT3, 4); + } else if (f71862fg_pin == 56) { + superio_set_bit(watchdog.sioaddr, SIO_REG_MFUNCT1, 1); + } break; case f71868: @@ -629,7 +612,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, if (new_options & WDIOS_ENABLECARD) return watchdog_start(); - /* fall through */ + fallthrough; case WDIOC_KEEPALIVE: watchdog_keepalive(); @@ -643,7 +626,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, return -EINVAL; watchdog_keepalive(); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(watchdog.timeout, uarg.i); @@ -690,9 +673,9 @@ static int __init watchdog_init(int sioaddr) * into the module have been registered yet. */ watchdog.sioaddr = sioaddr; - watchdog.ident.options = WDIOC_SETTIMEOUT - | WDIOF_MAGICCLOSE - | WDIOF_KEEPALIVEPING; + watchdog.ident.options = WDIOF_MAGICCLOSE + | WDIOF_KEEPALIVEPING + | WDIOF_CARDRESET; snprintf(watchdog.ident.identity, sizeof(watchdog.ident.identity), "%s watchdog", @@ -706,6 +689,13 @@ static int __init watchdog_init(int sioaddr) wdt_conf = superio_inb(sioaddr, F71808FG_REG_WDT_CONF); watchdog.caused_reboot = wdt_conf & BIT(F71808FG_FLAG_WDTMOUT_STS); + /* + * We don't want WDTMOUT_STS to stick around till regular reboot. + * Write 1 to the bit to clear it to zero. + */ + superio_outb(sioaddr, F71808FG_REG_WDT_CONF, + wdt_conf | BIT(F71808FG_FLAG_WDTMOUT_STS)); + superio_exit(sioaddr); err = watchdog_set_timeout(timeout); @@ -803,7 +793,6 @@ static int __init f71808e_find(int sioaddr) break; case SIO_F71862_ID: watchdog.type = f71862fg; - err = f71862fg_pin_configure(0); /* validate module parameter */ break; case SIO_F71868_ID: watchdog.type = f71868; @@ -852,6 +841,11 @@ static int __init f71808e_init(void) int err = -ENODEV; int i; + if (f71862fg_pin != 63 && f71862fg_pin != 56) { + pr_err("Invalid argument f71862fg_pin=%d\n", f71862fg_pin); + return -EINVAL; + } + for (i = 0; i < ARRAY_SIZE(addrs); i++) { err = f71808e_find(addrs[i]); if (err == 0) diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c index f6541d1b65e3..df5406aa7d25 100644 --- a/drivers/watchdog/gef_wdt.c +++ b/drivers/watchdog/gef_wdt.c @@ -201,7 +201,7 @@ static long gef_wdt_ioctl(struct file *file, unsigned int cmd, if (get_user(timeout, (int __user *)argp)) return -EFAULT; gef_wdt_set_timeout(timeout); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(gef_wdt_timeout, (int __user *)argp)) diff --git a/drivers/watchdog/geodewdt.c b/drivers/watchdog/geodewdt.c index 9914a4283cb2..83418924e30a 100644 --- a/drivers/watchdog/geodewdt.c +++ b/drivers/watchdog/geodewdt.c @@ -185,7 +185,7 @@ static long geodewdt_ioctl(struct file *file, unsigned int cmd, if (geodewdt_set_heartbeat(interval)) return -EINVAL; - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c index 2b65ea9451d1..a0ddedc362fc 100644 --- a/drivers/watchdog/ib700wdt.c +++ b/drivers/watchdog/ib700wdt.c @@ -214,7 +214,7 @@ static long ibwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ibwdt_set_heartbeat(new_margin)) return -EINVAL; ibwdt_ping(); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); diff --git a/drivers/watchdog/it8712f_wdt.c b/drivers/watchdog/it8712f_wdt.c index 2fed40d14007..9b89d2f09568 100644 --- a/drivers/watchdog/it8712f_wdt.c +++ b/drivers/watchdog/it8712f_wdt.c @@ -303,7 +303,7 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, superio_exit(); it8712f_wdt_ping(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(margin, p)) return -EFAULT; diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c index 09886616fd21..aae29dcfaf11 100644 --- a/drivers/watchdog/ixp4xx_wdt.c +++ b/drivers/watchdog/ixp4xx_wdt.c @@ -136,7 +136,7 @@ static long ixp4xx_wdt_ioctl(struct file *file, unsigned int cmd, heartbeat = time; wdt_enable(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(heartbeat, (int *)arg); diff --git a/drivers/watchdog/m54xx_wdt.c b/drivers/watchdog/m54xx_wdt.c index 60ed6252e5f4..f388a769dbd3 100644 --- a/drivers/watchdog/m54xx_wdt.c +++ b/drivers/watchdog/m54xx_wdt.c @@ -155,7 +155,7 @@ static long m54xx_wdt_ioctl(struct file *file, unsigned int cmd, heartbeat = time; wdt_enable(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(heartbeat, (int *)arg); diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c index 80ff94688487..743377c5b173 100644 --- a/drivers/watchdog/machzwd.c +++ b/drivers/watchdog/machzwd.c @@ -171,7 +171,7 @@ static inline void zf_set_timer(unsigned short new, unsigned char n) switch (n) { case WD1: zf_writew(COUNTER_1, new); - /* fall through */ + fallthrough; case WD2: zf_writeb(COUNTER_2, new > 0xff ? 0xff : new); default: diff --git a/drivers/watchdog/mlx_wdt.c b/drivers/watchdog/mlx_wdt.c index 03b9ac4b99af..54193369e85c 100644 --- a/drivers/watchdog/mlx_wdt.c +++ b/drivers/watchdog/mlx_wdt.c @@ -21,6 +21,7 @@ #define MLXREG_WDT_CLOCK_SCALE 1000 #define MLXREG_WDT_MAX_TIMEOUT_TYPE1 32 #define MLXREG_WDT_MAX_TIMEOUT_TYPE2 255 +#define MLXREG_WDT_MAX_TIMEOUT_TYPE3 65535 #define MLXREG_WDT_MIN_TIMEOUT 1 #define MLXREG_WDT_OPTIONS_BASE (WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | \ WDIOF_SETTIMEOUT) @@ -49,6 +50,7 @@ struct mlxreg_wdt { int tleft_idx; int ping_idx; int reset_idx; + int regmap_val_sz; enum mlxreg_wdt_type wdt_type; }; @@ -111,7 +113,8 @@ static int mlxreg_wdt_set_timeout(struct watchdog_device *wdd, u32 regval, set_time, hw_timeout; int rc; - if (wdt->wdt_type == MLX_WDT_TYPE1) { + switch (wdt->wdt_type) { + case MLX_WDT_TYPE1: rc = regmap_read(wdt->regmap, reg_data->reg, ®val); if (rc) return rc; @@ -120,14 +123,32 @@ static int mlxreg_wdt_set_timeout(struct watchdog_device *wdd, regval = (regval & reg_data->mask) | hw_timeout; /* Rowndown to actual closest number of sec. */ set_time = BIT(hw_timeout) / MLXREG_WDT_CLOCK_SCALE; - } else { + rc = regmap_write(wdt->regmap, reg_data->reg, regval); + break; + case MLX_WDT_TYPE2: + set_time = timeout; + rc = regmap_write(wdt->regmap, reg_data->reg, timeout); + break; + case MLX_WDT_TYPE3: + /* WD_TYPE3 has 2B set time register */ set_time = timeout; - regval = timeout; + if (wdt->regmap_val_sz == 1) { + regval = timeout & 0xff; + rc = regmap_write(wdt->regmap, reg_data->reg, regval); + if (!rc) { + regval = (timeout & 0xff00) >> 8; + rc = regmap_write(wdt->regmap, + reg_data->reg + 1, regval); + } + } else { + rc = regmap_write(wdt->regmap, reg_data->reg, timeout); + } + break; + default: + return -EINVAL; } wdd->timeout = set_time; - rc = regmap_write(wdt->regmap, reg_data->reg, regval); - if (!rc) { /* * Restart watchdog with new timeout period @@ -147,10 +168,25 @@ static unsigned int mlxreg_wdt_get_timeleft(struct watchdog_device *wdd) { struct mlxreg_wdt *wdt = watchdog_get_drvdata(wdd); struct mlxreg_core_data *reg_data = &wdt->pdata->data[wdt->tleft_idx]; - u32 regval; + u32 regval, msb, lsb; int rc; - rc = regmap_read(wdt->regmap, reg_data->reg, ®val); + if (wdt->wdt_type == MLX_WDT_TYPE2) { + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); + } else { + /* WD_TYPE3 has 2 byte timeleft register */ + if (wdt->regmap_val_sz == 1) { + rc = regmap_read(wdt->regmap, reg_data->reg, &lsb); + if (!rc) { + rc = regmap_read(wdt->regmap, + reg_data->reg + 1, &msb); + regval = (msb & 0xff) << 8 | (lsb & 0xff); + } + } else { + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); + } + } + /* Return 0 timeleft in case of failure register read. */ return rc == 0 ? regval : 0; } @@ -212,13 +248,23 @@ static void mlxreg_wdt_config(struct mlxreg_wdt *wdt, wdt->wdd.info = &mlxreg_wdt_aux_info; wdt->wdt_type = pdata->version; - if (wdt->wdt_type == MLX_WDT_TYPE2) { - wdt->wdd.ops = &mlxreg_wdt_ops_type2; - wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE2; - } else { + switch (wdt->wdt_type) { + case MLX_WDT_TYPE1: wdt->wdd.ops = &mlxreg_wdt_ops_type1; wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE1; + break; + case MLX_WDT_TYPE2: + wdt->wdd.ops = &mlxreg_wdt_ops_type2; + wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE2; + break; + case MLX_WDT_TYPE3: + wdt->wdd.ops = &mlxreg_wdt_ops_type2; + wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE3; + break; + default: + break; } + wdt->wdd.min_timeout = MLXREG_WDT_MIN_TIMEOUT; } @@ -249,6 +295,11 @@ static int mlxreg_wdt_probe(struct platform_device *pdev) wdt->wdd.parent = dev; wdt->regmap = pdata->regmap; + rc = regmap_get_val_bytes(wdt->regmap); + if (rc < 0) + return -EINVAL; + + wdt->regmap_val_sz = rc; mlxreg_wdt_config(wdt, pdata); if ((pdata->features & MLXREG_CORE_WD_FEATURE_NOWAYOUT)) diff --git a/drivers/watchdog/mv64x60_wdt.c b/drivers/watchdog/mv64x60_wdt.c index 0bc72dd69b70..894aa63488d3 100644 --- a/drivers/watchdog/mv64x60_wdt.c +++ b/drivers/watchdog/mv64x60_wdt.c @@ -222,7 +222,7 @@ static long mv64x60_wdt_ioctl(struct file *file, if (get_user(timeout, (int __user *)argp)) return -EFAULT; mv64x60_wdt_set_timeout(timeout); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(mv64x60_wdt_timeout, (int __user *)argp)) diff --git a/drivers/watchdog/nv_tco.c b/drivers/watchdog/nv_tco.c index d7a560e348d5..f6902a337422 100644 --- a/drivers/watchdog/nv_tco.c +++ b/drivers/watchdog/nv_tco.c @@ -7,7 +7,7 @@ * Based off i8xx_tco.c: * (c) Copyright 2000 kernel concepts <[email protected]>, All Rights * Reserved. - * http://www.kernelconcepts.de + * https://www.kernelconcepts.de * * TCO timer driver for NV chipsets * based on softdog.c by Alan Cox <[email protected]> @@ -250,7 +250,7 @@ static long nv_tco_ioctl(struct file *file, unsigned int cmd, if (tco_timer_set_heartbeat(new_heartbeat)) return -EINVAL; tco_timer_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); default: diff --git a/drivers/watchdog/nv_tco.h b/drivers/watchdog/nv_tco.h index d325e528010f..c65f82588386 100644 --- a/drivers/watchdog/nv_tco.h +++ b/drivers/watchdog/nv_tco.h @@ -9,7 +9,7 @@ * * (c) Copyright 2000 kernel concepts <[email protected]>, All Rights * Reserved. - * http://www.kernelconcepts.de + * https://www.kernelconcepts.de * * Neither kernel concepts nor Nils Faerber admit liability nor provide * warranty for any of this software. This material is provided diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c index 73fbfc99083b..2d4504302c9e 100644 --- a/drivers/watchdog/pc87413_wdt.c +++ b/drivers/watchdog/pc87413_wdt.c @@ -433,7 +433,7 @@ static long pc87413_ioctl(struct file *file, unsigned int cmd, return -EINVAL; timeout = new_timeout; pc87413_refresh(); - /* fall through - and return the new timeout... */ + fallthrough; /* and return the new timeout */ case WDIOC_GETTIMEOUT: new_timeout = timeout * 60; return put_user(new_timeout, uarg.i); diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index 7a0587fdc52c..e86fa7f8351d 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -651,7 +651,7 @@ static long pcwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; pcwd_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, argp); diff --git a/drivers/watchdog/pcwd_pci.c b/drivers/watchdog/pcwd_pci.c index 81508a42a90c..54d86fcb1837 100644 --- a/drivers/watchdog/pcwd_pci.c +++ b/drivers/watchdog/pcwd_pci.c @@ -542,7 +542,7 @@ static long pcipcwd_ioctl(struct file *file, unsigned int cmd, pcipcwd_keepalive(); } - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c index 2f44af1831d0..41a928eb91ed 100644 --- a/drivers/watchdog/pcwd_usb.c +++ b/drivers/watchdog/pcwd_usb.c @@ -452,7 +452,7 @@ static long usb_pcwd_ioctl(struct file *file, unsigned int cmd, usb_pcwd_keepalive(usb_pcwd_device); } - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); @@ -585,9 +585,8 @@ static struct notifier_block usb_pcwd_notifier = { static inline void usb_pcwd_delete(struct usb_pcwd_private *usb_pcwd) { usb_free_urb(usb_pcwd->intr_urb); - if (usb_pcwd->intr_buffer != NULL) - usb_free_coherent(usb_pcwd->udev, usb_pcwd->intr_size, - usb_pcwd->intr_buffer, usb_pcwd->intr_dma); + usb_free_coherent(usb_pcwd->udev, usb_pcwd->intr_size, + usb_pcwd->intr_buffer, usb_pcwd->intr_dma); kfree(usb_pcwd); } diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c index aee3c2efd565..e74802f3a32e 100644 --- a/drivers/watchdog/rc32434_wdt.c +++ b/drivers/watchdog/rc32434_wdt.c @@ -230,7 +230,7 @@ static long rc32434_wdt_ioctl(struct file *file, unsigned int cmd, return -EFAULT; if (rc32434_wdt_set(new_timeout)) return -EINVAL; - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return copy_to_user(argp, &timeout, sizeof(int)) ? -EFAULT : 0; default: diff --git a/drivers/watchdog/riowd.c b/drivers/watchdog/riowd.c index 1b9a6dc8f982..7008596a575f 100644 --- a/drivers/watchdog/riowd.c +++ b/drivers/watchdog/riowd.c @@ -134,7 +134,7 @@ static long riowd_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -EINVAL; riowd_timeout = (new_margin + 59) / 60; riowd_writereg(p, riowd_timeout, WDTO_INDEX); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(riowd_timeout * 60, (int __user *)argp); diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c index d456dd72d99a..705e8f7523e8 100644 --- a/drivers/watchdog/rti_wdt.c +++ b/drivers/watchdog/rti_wdt.c @@ -35,7 +35,11 @@ #define RTIWWDRX_NMI 0xa -#define RTIWWDSIZE_50P 0x50 +#define RTIWWDSIZE_50P 0x50 +#define RTIWWDSIZE_25P 0x500 +#define RTIWWDSIZE_12P5 0x5000 +#define RTIWWDSIZE_6P25 0x50000 +#define RTIWWDSIZE_3P125 0x500000 #define WDENABLE_KEY 0xa98559da @@ -48,7 +52,7 @@ #define DWDST BIT(1) -static int heartbeat; +static int heartbeat = DEFAULT_HEARTBEAT; /* * struct to hold data for each WDT device @@ -79,11 +83,9 @@ static int rti_wdt_start(struct watchdog_device *wdd) * be petted during the open window; not too early or not too late. * The HW configuration options only allow for the open window size * to be 50% or less than that; we obviouly want to configure the open - * window as large as possible so we select the 50% option. To avoid - * any glitches, we accommodate 5% safety margin also, so we setup - * the min_hw_hearbeat at 55% of the timeout period. + * window as large as possible so we select the 50% option. */ - wdd->min_hw_heartbeat_ms = 11 * wdd->timeout * 1000 / 20; + wdd->min_hw_heartbeat_ms = 500 * wdd->timeout; /* Generate NMI when wdt expires */ writel_relaxed(RTIWWDRX_NMI, wdt->base + RTIWWDRXCTRL); @@ -110,7 +112,48 @@ static int rti_wdt_ping(struct watchdog_device *wdd) return 0; } -static unsigned int rti_wdt_get_timeleft(struct watchdog_device *wdd) +static int rti_wdt_setup_hw_hb(struct watchdog_device *wdd, u32 wsize) +{ + /* + * RTI only supports a windowed mode, where the watchdog can only + * be petted during the open window; not too early or not too late. + * The HW configuration options only allow for the open window size + * to be 50% or less than that. + */ + switch (wsize) { + case RTIWWDSIZE_50P: + /* 50% open window => 50% min heartbeat */ + wdd->min_hw_heartbeat_ms = 500 * heartbeat; + break; + + case RTIWWDSIZE_25P: + /* 25% open window => 75% min heartbeat */ + wdd->min_hw_heartbeat_ms = 750 * heartbeat; + break; + + case RTIWWDSIZE_12P5: + /* 12.5% open window => 87.5% min heartbeat */ + wdd->min_hw_heartbeat_ms = 875 * heartbeat; + break; + + case RTIWWDSIZE_6P25: + /* 6.5% open window => 93.5% min heartbeat */ + wdd->min_hw_heartbeat_ms = 935 * heartbeat; + break; + + case RTIWWDSIZE_3P125: + /* 3.125% open window => 96.9% min heartbeat */ + wdd->min_hw_heartbeat_ms = 969 * heartbeat; + break; + + default: + return -EINVAL; + } + + return 0; +} + +static unsigned int rti_wdt_get_timeleft_ms(struct watchdog_device *wdd) { u64 timer_counter; u32 val; @@ -123,11 +166,18 @@ static unsigned int rti_wdt_get_timeleft(struct watchdog_device *wdd) timer_counter = readl_relaxed(wdt->base + RTIDWDCNTR); + timer_counter *= 1000; + do_div(timer_counter, wdt->freq); return timer_counter; } +static unsigned int rti_wdt_get_timeleft(struct watchdog_device *wdd) +{ + return rti_wdt_get_timeleft_ms(wdd) / 1000; +} + static const struct watchdog_info rti_wdt_info = { .options = WDIOF_KEEPALIVEPING, .identity = "K3 RTI Watchdog", @@ -148,6 +198,7 @@ static int rti_wdt_probe(struct platform_device *pdev) struct watchdog_device *wdd; struct rti_wdt_device *wdt; struct clk *clk; + u32 last_ping = 0; wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL); if (!wdt) @@ -169,6 +220,14 @@ static int rti_wdt_probe(struct platform_device *pdev) return -EINVAL; } + /* + * If watchdog is running at 32k clock, it is not accurate. + * Adjust frequency down in this case so that we don't pet + * the watchdog too often. + */ + if (wdt->freq < 32768) + wdt->freq = wdt->freq * 9 / 10; + pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); if (ret) { @@ -185,11 +244,8 @@ static int rti_wdt_probe(struct platform_device *pdev) wdd->min_timeout = 1; wdd->max_hw_heartbeat_ms = (WDT_PRELOAD_MAX << WDT_PRELOAD_SHIFT) / wdt->freq * 1000; - wdd->timeout = DEFAULT_HEARTBEAT; wdd->parent = dev; - watchdog_init_timeout(wdd, heartbeat, dev); - watchdog_set_drvdata(wdd, wdt); watchdog_set_nowayout(wdd, 1); watchdog_set_restart_priority(wdd, 128); @@ -201,16 +257,53 @@ static int rti_wdt_probe(struct platform_device *pdev) goto err_iomap; } + if (readl(wdt->base + RTIDWDCTRL) == WDENABLE_KEY) { + u32 time_left_ms; + u64 heartbeat_ms; + u32 wsize; + + set_bit(WDOG_HW_RUNNING, &wdd->status); + time_left_ms = rti_wdt_get_timeleft_ms(wdd); + heartbeat_ms = readl(wdt->base + RTIDWDPRLD); + heartbeat_ms <<= WDT_PRELOAD_SHIFT; + heartbeat_ms *= 1000; + do_div(heartbeat_ms, wdt->freq); + if (heartbeat_ms != heartbeat * 1000) + dev_warn(dev, "watchdog already running, ignoring heartbeat config!\n"); + + heartbeat = heartbeat_ms; + heartbeat /= 1000; + + wsize = readl(wdt->base + RTIWWDSIZECTRL); + ret = rti_wdt_setup_hw_hb(wdd, wsize); + if (ret) { + dev_err(dev, "bad window size.\n"); + goto err_iomap; + } + + last_ping = heartbeat_ms - time_left_ms; + if (time_left_ms > heartbeat_ms) { + dev_warn(dev, "time_left > heartbeat? Assuming last ping just before now.\n"); + last_ping = 0; + } + } + + watchdog_init_timeout(wdd, heartbeat, dev); + ret = watchdog_register_device(wdd); if (ret) { dev_err(dev, "cannot register watchdog device\n"); goto err_iomap; } + if (last_ping) + watchdog_set_last_hw_keepalive(wdd, last_ping); + return 0; err_iomap: pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; } @@ -221,6 +314,7 @@ static int rti_wdt_remove(struct platform_device *pdev) watchdog_unregister_device(&wdt->wdd); pm_runtime_put(&pdev->dev); + pm_runtime_disable(&pdev->dev); return 0; } diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c index 9b93be00109f..27846c6bdfb0 100644 --- a/drivers/watchdog/sa1100_wdt.c +++ b/drivers/watchdog/sa1100_wdt.c @@ -127,7 +127,7 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd, pre_margin = oscr_freq * time; writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3); - /*fall through*/ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(pre_margin / oscr_freq, p); diff --git a/drivers/watchdog/sb_wdog.c b/drivers/watchdog/sb_wdog.c index da2dad00d473..504be461f992 100644 --- a/drivers/watchdog/sb_wdog.c +++ b/drivers/watchdog/sb_wdog.c @@ -202,7 +202,7 @@ static long sbwdog_ioctl(struct file *file, unsigned int cmd, timeout = time; sbwdog_set(user_dog, timeout); sbwdog_pet(user_dog); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: /* diff --git a/drivers/watchdog/sbc60xxwdt.c b/drivers/watchdog/sbc60xxwdt.c index f2cbe6d880a8..a947a63fb44a 100644 --- a/drivers/watchdog/sbc60xxwdt.c +++ b/drivers/watchdog/sbc60xxwdt.c @@ -265,7 +265,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) timeout = new_timeout; wdt_keepalive(); } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/sbc7240_wdt.c b/drivers/watchdog/sbc7240_wdt.c index 520b8dd77ed4..d640b26e18a6 100644 --- a/drivers/watchdog/sbc7240_wdt.c +++ b/drivers/watchdog/sbc7240_wdt.c @@ -195,7 +195,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (wdt_set_timeout(new_timeout)) return -EINVAL; } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, (int __user *)arg); default: diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c index 1b20b33879c4..04483d6453d6 100644 --- a/drivers/watchdog/sbc_fitpc2_wdt.c +++ b/drivers/watchdog/sbc_fitpc2_wdt.c @@ -154,7 +154,7 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd, margin = time; wdt_enable(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(margin, (int *)arg); diff --git a/drivers/watchdog/sc520_wdt.c b/drivers/watchdog/sc520_wdt.c index fbe79bcc9297..e66e6b905964 100644 --- a/drivers/watchdog/sc520_wdt.c +++ b/drivers/watchdog/sc520_wdt.c @@ -321,7 +321,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) wdt_keepalive(); } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index 83949a385f62..d8b77fe10eba 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -295,7 +295,7 @@ static long sch311x_wdt_ioctl(struct file *file, unsigned int cmd, if (sch311x_wdt_set_heartbeat(new_timeout)) return -EINVAL; sch311x_wdt_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/scx200_wdt.c b/drivers/watchdog/scx200_wdt.c index c94098acb78f..7b5e18323f3f 100644 --- a/drivers/watchdog/scx200_wdt.c +++ b/drivers/watchdog/scx200_wdt.c @@ -186,7 +186,7 @@ static long scx200_wdt_ioctl(struct file *file, unsigned int cmd, margin = new_margin; scx200_wdt_update_margin(); scx200_wdt_ping(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: if (put_user(margin, p)) return -EFAULT; diff --git a/drivers/watchdog/smsc37b787_wdt.c b/drivers/watchdog/smsc37b787_wdt.c index 43de56acd767..7463df479d11 100644 --- a/drivers/watchdog/smsc37b787_wdt.c +++ b/drivers/watchdog/smsc37b787_wdt.c @@ -474,7 +474,7 @@ static long wb_smsc_wdt_ioctl(struct file *file, return -EINVAL; timeout = new_timeout; wb_smsc_wdt_set_timeout(timeout); - /* fall through - and return the new timeout... */ + fallthrough; /* and return the new timeout */ case WDIOC_GETTIMEOUT: new_timeout = timeout; if (unit == UNIT_MINUTE) diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c index 3e4885c1545e..7a1096265f18 100644 --- a/drivers/watchdog/softdog.c +++ b/drivers/watchdog/softdog.c @@ -20,11 +20,13 @@ #include <linux/hrtimer.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/kthread.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/reboot.h> #include <linux/types.h> #include <linux/watchdog.h> +#include <linux/workqueue.h> #define TIMER_MARGIN 60 /* Default is 60 seconds */ static unsigned int soft_margin = TIMER_MARGIN; /* in seconds */ @@ -49,11 +51,34 @@ module_param(soft_panic, int, 0); MODULE_PARM_DESC(soft_panic, "Softdog action, set to 1 to panic, 0 to reboot (default=0)"); +static char *soft_reboot_cmd; +module_param(soft_reboot_cmd, charp, 0000); +MODULE_PARM_DESC(soft_reboot_cmd, + "Set reboot command. Emergency reboot takes place if unset"); + +static bool soft_active_on_boot; +module_param(soft_active_on_boot, bool, 0000); +MODULE_PARM_DESC(soft_active_on_boot, + "Set to true to active Softdog on boot (default=false)"); + static struct hrtimer softdog_ticktock; static struct hrtimer softdog_preticktock; +static int reboot_kthread_fn(void *data) +{ + kernel_restart(soft_reboot_cmd); + return -EPERM; /* Should not reach here */ +} + +static void reboot_work_fn(struct work_struct *unused) +{ + kthread_run(reboot_kthread_fn, NULL, "softdog_reboot"); +} + static enum hrtimer_restart softdog_fire(struct hrtimer *timer) { + static bool soft_reboot_fired; + module_put(THIS_MODULE); if (soft_noboot) { pr_crit("Triggered - Reboot ignored\n"); @@ -62,6 +87,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer) panic("Software Watchdog Timer expired"); } else { pr_crit("Initiating system reboot\n"); + if (!soft_reboot_fired && soft_reboot_cmd != NULL) { + static DECLARE_WORK(reboot_work, reboot_work_fn); + /* + * The 'kernel_restart' is a 'might-sleep' operation. + * Also, executing it in system-wide workqueues blocks + * any driver from using the same workqueue in its + * shutdown callback function. Thus, we should execute + * the 'kernel_restart' in a standalone kernel thread. + * But since starting a kernel thread is also a + * 'might-sleep' operation, so the 'reboot_work' is + * required as a launcher of the kernel thread. + * + * After request the reboot, restart the timer to + * schedule an 'emergency_restart' reboot after + * 'TIMER_MARGIN' seconds. It's because if the softdog + * hangs, it might be because of scheduling issues. And + * if that is the case, both 'schedule_work' and + * 'kernel_restart' may possibly be malfunctional at the + * same time. + */ + soft_reboot_fired = true; + schedule_work(&reboot_work); + hrtimer_add_expires_ns(timer, + (u64)TIMER_MARGIN * NSEC_PER_SEC); + + return HRTIMER_RESTART; + } emergency_restart(); pr_crit("Reboot didn't ?????\n"); } @@ -145,12 +197,17 @@ static int __init softdog_init(void) softdog_preticktock.function = softdog_pretimeout; } + if (soft_active_on_boot) + softdog_ping(&softdog_dev); + ret = watchdog_register_device(&softdog_dev); if (ret) return ret; pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n", soft_noboot, softdog_dev.timeout, soft_panic, nowayout); + pr_info(" soft_reboot_cmd=%s soft_active_on_boot=%d\n", + soft_reboot_cmd ?: "<not set>", soft_active_on_boot); return 0; } diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index 93bd302ae7c5..85e9664318c9 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -7,7 +7,7 @@ * Based on i8xx_tco.c: * (c) Copyright 2000 kernel concepts <[email protected]>, All Rights * Reserved. - * http://www.kernelconcepts.de + * https://www.kernelconcepts.de * * See AMD Publication 43009 "AMD SB700/710/750 Register Reference Guide", * AMD Publication 45482 "AMD SB800-Series Southbridges Register diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c index 5f05a45ac187..b50757882a98 100644 --- a/drivers/watchdog/sunxi_wdt.c +++ b/drivers/watchdog/sunxi_wdt.c @@ -235,7 +235,7 @@ static int sunxi_wdt_probe(struct platform_device *pdev) sunxi_wdt = devm_kzalloc(dev, sizeof(*sunxi_wdt), GFP_KERNEL); if (!sunxi_wdt) - return -EINVAL; + return -ENOMEM; sunxi_wdt->wdt_regs = of_device_get_match_data(dev); if (!sunxi_wdt->wdt_regs) diff --git a/drivers/watchdog/w83877f_wdt.c b/drivers/watchdog/w83877f_wdt.c index 6b3b667e6f23..5772cc5d3780 100644 --- a/drivers/watchdog/w83877f_wdt.c +++ b/drivers/watchdog/w83877f_wdt.c @@ -289,7 +289,7 @@ static long fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) timeout = new_timeout; wdt_keepalive(); } - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c index 5212e68c6b01..fd64ae77780a 100644 --- a/drivers/watchdog/w83977f_wdt.c +++ b/drivers/watchdog/w83977f_wdt.c @@ -422,7 +422,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; wdt_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, uarg.i); diff --git a/drivers/watchdog/wafer5823wdt.c b/drivers/watchdog/wafer5823wdt.c index a6925847f76f..a8a1ed215e1e 100644 --- a/drivers/watchdog/wafer5823wdt.c +++ b/drivers/watchdog/wafer5823wdt.c @@ -174,7 +174,7 @@ static long wafwdt_ioctl(struct file *file, unsigned int cmd, timeout = new_timeout; wafwdt_stop(); wafwdt_start(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, p); diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index b9dc2c352151..6798addabd5a 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -275,15 +275,18 @@ static int watchdog_start(struct watchdog_device *wdd) set_bit(_WDOG_KEEPALIVE, &wd_data->status); started_at = ktime_get(); - if (watchdog_hw_running(wdd) && wdd->ops->ping) - err = wdd->ops->ping(wdd); - else + if (watchdog_hw_running(wdd) && wdd->ops->ping) { + err = __watchdog_ping(wdd); + if (err == 0) + set_bit(WDOG_ACTIVE, &wdd->status); + } else { err = wdd->ops->start(wdd); - if (err == 0) { - set_bit(WDOG_ACTIVE, &wdd->status); - wd_data->last_keepalive = started_at; - wd_data->last_hw_keepalive = started_at; - watchdog_update_worker(wdd); + if (err == 0) { + set_bit(WDOG_ACTIVE, &wdd->status); + wd_data->last_keepalive = started_at; + wd_data->last_hw_keepalive = started_at; + watchdog_update_worker(wdd); + } } return err; @@ -587,7 +590,7 @@ static DEVICE_ATTR_RW(pretimeout_governor); static umode_t wdt_is_visible(struct kobject *kobj, struct attribute *attr, int n) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct watchdog_device *wdd = dev_get_drvdata(dev); umode_t mode = attr->mode; @@ -776,7 +779,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, err = watchdog_ping(wdd); if (err < 0) break; - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: /* timeout == 0 means that we don't know the timeout */ if (wdd->timeout == 0) { @@ -916,7 +919,7 @@ static int watchdog_release(struct inode *inode, struct file *file) * or if WDIOF_MAGICCLOSE is not set. If nowayout was set then * watchdog_stop will fail. */ - if (!test_bit(WDOG_ACTIVE, &wdd->status)) + if (!watchdog_active(wdd)) err = 0; else if (test_and_clear_bit(_WDOG_ALLOW_RELEASE, &wd_data->status) || !(wdd->info->options & WDIOF_MAGICCLOSE)) @@ -994,6 +997,15 @@ static int watchdog_cdev_register(struct watchdog_device *wdd) if (IS_ERR_OR_NULL(watchdog_kworker)) return -ENODEV; + device_initialize(&wd_data->dev); + wd_data->dev.devt = MKDEV(MAJOR(watchdog_devt), wdd->id); + wd_data->dev.class = &watchdog_class; + wd_data->dev.parent = wdd->parent; + wd_data->dev.groups = wdd->groups; + wd_data->dev.release = watchdog_core_data_release; + dev_set_drvdata(&wd_data->dev, wdd); + dev_set_name(&wd_data->dev, "watchdog%d", wdd->id); + kthread_init_work(&wd_data->work, watchdog_ping_work); hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); wd_data->timer.function = watchdog_timer_expired; @@ -1014,15 +1026,6 @@ static int watchdog_cdev_register(struct watchdog_device *wdd) } } - device_initialize(&wd_data->dev); - wd_data->dev.devt = MKDEV(MAJOR(watchdog_devt), wdd->id); - wd_data->dev.class = &watchdog_class; - wd_data->dev.parent = wdd->parent; - wd_data->dev.groups = wdd->groups; - wd_data->dev.release = watchdog_core_data_release; - dev_set_drvdata(&wd_data->dev, wdd); - dev_set_name(&wd_data->dev, "watchdog%d", wdd->id); - /* Fill in the data structures */ cdev_init(&wd_data->cdev, &watchdog_fops); @@ -1136,6 +1139,36 @@ void watchdog_dev_unregister(struct watchdog_device *wdd) } /* + * watchdog_set_last_hw_keepalive: set last HW keepalive time for watchdog + * @wdd: watchdog device + * @last_ping_ms: time since last HW heartbeat + * + * Adjusts the last known HW keepalive time for a watchdog timer. + * This is needed if the watchdog is already running when the probe + * function is called, and it can't be pinged immediately. This + * function must be called immediately after watchdog registration, + * and min_hw_heartbeat_ms must be set for this to be useful. + */ +int watchdog_set_last_hw_keepalive(struct watchdog_device *wdd, + unsigned int last_ping_ms) +{ + struct watchdog_core_data *wd_data; + ktime_t now; + + if (!wdd) + return -EINVAL; + + wd_data = wdd->wd_data; + + now = ktime_get(); + + wd_data->last_hw_keepalive = ktime_sub(now, ms_to_ktime(last_ping_ms)); + + return __watchdog_ping(wdd); +} +EXPORT_SYMBOL_GPL(watchdog_set_last_hw_keepalive); + +/* * watchdog_dev_init: init dev part of watchdog core * * Allocate a range of chardev nodes to use for watchdog devices diff --git a/drivers/watchdog/wdt.c b/drivers/watchdog/wdt.c index f9054cb0f8e2..a9e40b5c633e 100644 --- a/drivers/watchdog/wdt.c +++ b/drivers/watchdog/wdt.c @@ -389,7 +389,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (wdt_set_heartbeat(new_heartbeat)) return -EINVAL; wdt_ping(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); default: diff --git a/drivers/watchdog/wdt285.c b/drivers/watchdog/wdt285.c index e60993d0767e..110249e5f642 100644 --- a/drivers/watchdog/wdt285.c +++ b/drivers/watchdog/wdt285.c @@ -168,7 +168,7 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, soft_margin = new_margin; reload = soft_margin * (mem_fclk_21285 / 256); watchdog_ping(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: ret = put_user(soft_margin, int_arg); break; diff --git a/drivers/watchdog/wdt977.c b/drivers/watchdog/wdt977.c index 066a4fb4d75b..c9b8e863f70f 100644 --- a/drivers/watchdog/wdt977.c +++ b/drivers/watchdog/wdt977.c @@ -398,7 +398,7 @@ static long wdt977_ioctl(struct file *file, unsigned int cmd, return -EINVAL; wdt977_keepalive(); - /* Fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(timeout, uarg.i); diff --git a/drivers/watchdog/wdt_pci.c b/drivers/watchdog/wdt_pci.c index e528024faa41..c3254ba5ace6 100644 --- a/drivers/watchdog/wdt_pci.c +++ b/drivers/watchdog/wdt_pci.c @@ -426,7 +426,7 @@ static long wdtpci_ioctl(struct file *file, unsigned int cmd, if (wdtpci_set_heartbeat(new_heartbeat)) return -EINVAL; wdtpci_ping(); - /* fall through */ + fallthrough; case WDIOC_GETTIMEOUT: return put_user(heartbeat, p); default: diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 1d339ef92422..ea6c1e7e3e42 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -52,9 +52,7 @@ config XEN_BALLOON_MEMORY_HOTPLUG config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT int "Hotplugged memory limit (in GiB) for a PV guest" - default 512 if X86_64 - default 4 if X86_32 - range 0 64 if X86_32 + default 512 depends on XEN_HAVE_PVMMU depends on XEN_BALLOON_MEMORY_HOTPLUG help diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index 75d3bb948bf3..b1b6eebafd5d 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -613,6 +613,14 @@ dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev, goto fail_detach; } + /* Check that we have zero offset. */ + if (sgt->sgl->offset) { + ret = ERR_PTR(-EINVAL); + pr_debug("DMA buffer has %d bytes offset, user-space expects 0\n", + sgt->sgl->offset); + goto fail_unmap; + } + /* Check number of pages that imported buffer has. */ if (attach->dmabuf->size != gntdev_dmabuf->nr_pages << PAGE_SHIFT) { ret = ERR_PTR(-EINVAL); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ea10f7bc99ab..ea1c28ccb44f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2303,7 +2303,7 @@ struct btrfs_backref_iter *btrfs_backref_iter_alloc( return NULL; ret->path = btrfs_alloc_path(); - if (!ret) { + if (!ret->path) { kfree(ret); return NULL; } diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h index f39d47a2d01a..219a09a2b734 100644 --- a/fs/btrfs/extent-io-tree.h +++ b/fs/btrfs/extent-io-tree.h @@ -34,6 +34,8 @@ struct io_failure_record; */ #define CHUNK_ALLOCATED EXTENT_DIRTY #define CHUNK_TRIMMED EXTENT_DEFRAG +#define CHUNK_STATE_MASK (CHUNK_ALLOCATED | \ + CHUNK_TRIMMED) enum { IO_TREE_FS_PINNED_EXTENTS, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 61ede335f6c3..de6fe176fdfb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,7 @@ #include "delalloc-space.h" #include "block-group.h" #include "discard.h" +#include "rcu-string.h" #undef SCRAMBLE_DELAYED_REFS @@ -5668,6 +5669,19 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) &start, &end, CHUNK_TRIMMED | CHUNK_ALLOCATED); + /* Check if there are any CHUNK_* bits left */ + if (start > device->total_bytes) { + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + btrfs_warn_in_rcu(fs_info, +"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu", + start, end - start + 1, + rcu_str_deref(device->name), + device->total_bytes); + mutex_unlock(&fs_info->chunk_mutex); + ret = 0; + break; + } + /* Ensure we skip the reserved area in the first 1M */ start = max_t(u64, start, SZ_1M); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6d961e11639e..ef0fd7afb0b1 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2282,7 +2282,7 @@ out: static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, bool update_stat) { - struct btrfs_free_space *left_info; + struct btrfs_free_space *left_info = NULL; struct btrfs_free_space *right_info; bool merged = false; u64 offset = info->offset; @@ -2298,7 +2298,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, if (right_info && rb_prev(&right_info->offset_index)) left_info = rb_entry(rb_prev(&right_info->offset_index), struct btrfs_free_space, offset_index); - else + else if (!right_info) left_info = tree_search_offset(ctl, offset - 1, 0, 0); /* See try_merge_free_space() comment. */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6dc03bab0c9d..51fcd82d41c0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -654,12 +654,18 @@ cont: page_error_op | PAGE_END_WRITEBACK); - for (i = 0; i < nr_pages; i++) { - WARN_ON(pages[i]->mapping); - put_page(pages[i]); + /* + * Ensure we only free the compressed pages if we have + * them allocated, as we can still reach here with + * inode_need_compress() == false. + */ + if (pages) { + for (i = 0; i < nr_pages; i++) { + WARN_ON(pages[i]->mapping); + put_page(pages[i]); + } + kfree(pages); } - kfree(pages); - return 0; } } @@ -6622,7 +6628,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, extent_type == BTRFS_FILE_EXTENT_PREALLOC) { /* Only regular file could have regular/prealloc extent */ if (!S_ISREG(inode->vfs_inode.i_mode)) { - ret = -EUCLEAN; + err = -EUCLEAN; btrfs_crit(fs_info, "regular/prealloc extent found for non-regular inode %llu", btrfs_ino(inode)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5a9dc31d95c9..e529ddb35b87 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -517,6 +517,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, char *compress_type; bool compress_force = false; enum btrfs_compression_type saved_compress_type; + int saved_compress_level; bool saved_compress_force; int no_compress = 0; @@ -598,6 +599,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, info->compress_type : BTRFS_COMPRESS_NONE; saved_compress_force = btrfs_test_opt(info, FORCE_COMPRESS); + saved_compress_level = info->compress_level; if (token == Opt_compress || token == Opt_compress_force || strncmp(args[0].from, "zlib", 4) == 0) { @@ -642,6 +644,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, no_compress = 0; } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; + info->compress_level = 0; + info->compress_type = 0; btrfs_clear_opt(info->mount_opt, COMPRESS); btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); compress_force = false; @@ -662,11 +666,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, */ btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); } - if ((btrfs_test_opt(info, COMPRESS) && - (info->compress_type != saved_compress_type || - compress_force != saved_compress_force)) || - (!btrfs_test_opt(info, COMPRESS) && - no_compress == 1)) { + if (no_compress == 1) { + btrfs_info(info, "use no compression"); + } else if ((info->compress_type != saved_compress_type) || + (compress_force != saved_compress_force) || + (info->compress_level != saved_compress_level)) { btrfs_info(info, "%s %s compression, level %d", (compress_force) ? "force" : "use", compress_type, info->compress_level); @@ -1382,6 +1386,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); const char *compress_type; + const char *subvol_name; if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); @@ -1468,8 +1473,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",ref_verify"); seq_printf(seq, ",subvolid=%llu", BTRFS_I(d_inode(dentry))->root->root_key.objectid); - seq_puts(seq, ",subvol="); - seq_dentry(seq, dentry, " \t\n\\"); + subvol_name = btrfs_get_subvol_name_from_objectid(info, + BTRFS_I(d_inode(dentry))->root->root_key.objectid); + if (!IS_ERR(subvol_name)) { + seq_puts(seq, ",subvol="); + seq_escape(seq, subvol_name, " \t\n\\"); + kfree(subvol_name); + } return 0; } @@ -1950,6 +1960,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) set_bit(BTRFS_FS_OPEN, &fs_info->flags); } out: + /* + * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS, + * since the absence of the flag means it can be toggled off by remount. + */ + *flags |= SB_I_VERSION; + wake_up_process(fs_info->transaction_kthread); btrfs_remount_cleanup(fs_info, old_opts); clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 104c80caaa74..c8df2edafd85 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1565,9 +1565,11 @@ void btrfs_sysfs_del_qgroups(struct btrfs_fs_info *fs_info) rbtree_postorder_for_each_entry_safe(qgroup, next, &fs_info->qgroup_tree, node) btrfs_sysfs_del_one_qgroup(fs_info, qgroup); - kobject_del(fs_info->qgroups_kobj); - kobject_put(fs_info->qgroups_kobj); - fs_info->qgroups_kobj = NULL; + if (fs_info->qgroups_kobj) { + kobject_del(fs_info->qgroups_kobj); + kobject_put(fs_info->qgroups_kobj); + fs_info->qgroups_kobj = NULL; + } } /* Called when qgroups get initialized, thus there is no need for locking */ diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ea8136dcf71f..696dd861cc3c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4036,11 +4036,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, fs_info->csum_root, ds + cs, ds + cs + cl - 1, &ordered_sums, 0); - if (ret) { - btrfs_release_path(dst_path); - kfree(ins_data); - return ret; - } + if (ret) + break; } } } @@ -4053,7 +4050,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, * we have to do this after the loop above to avoid changing the * log tree while trying to change the log tree. */ - ret = 0; while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, struct btrfs_ordered_sum, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d7670e2a9f39..ee96c5869f57 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4720,6 +4720,10 @@ again: } mutex_lock(&fs_info->chunk_mutex); + /* Clear all state bits beyond the shrunk device size */ + clear_extent_bits(&device->alloc_state, new_size, (u64)-1, + CHUNK_STATE_MASK); + btrfs_device_set_disk_total_bytes(device, new_size); if (list_empty(&device->post_commit_list)) list_add_tail(&device->post_commit_list, diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index cf235f6eacf9..471e40156065 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -13,7 +13,7 @@ config CEPH_FS scalable file system designed to provide high performance, reliable access to petabytes of storage. - More information at http://ceph.newdream.net/. + More information at https://ceph.io/. If unsure, say N. diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 01ad09733ac7..6ea761c84494 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -862,8 +862,7 @@ static void writepages_finish(struct ceph_osd_request *req) osd_data = osd_req_op_extent_osd_data(req, 0); if (osd_data->pages_from_pool) - mempool_free(osd_data->pages, - ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool); + mempool_free(osd_data->pages, ceph_wb_pagevec_pool); else kfree(osd_data->pages); ceph_osdc_put_request(req); @@ -955,10 +954,10 @@ retry: int num_ops = 0, op_idx; unsigned i, pvec_pages, max_pages, locked_pages = 0; struct page **pages = NULL, **data_pages; - mempool_t *pool = NULL; /* Becomes non-null if mempool used */ struct page *page; pgoff_t strip_unit_end = 0; u64 offset = 0, len = 0; + bool from_pool = false; max_pages = wsize >> PAGE_SHIFT; @@ -1057,16 +1056,16 @@ get_more_pages: sizeof(*pages), GFP_NOFS); if (!pages) { - pool = fsc->wb_pagevec_pool; - pages = mempool_alloc(pool, GFP_NOFS); + from_pool = true; + pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); BUG_ON(!pages); } len = 0; } else if (page->index != (offset + len) >> PAGE_SHIFT) { - if (num_ops >= (pool ? CEPH_OSD_SLAB_OPS : - CEPH_OSD_MAX_OPS)) { + if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS : + CEPH_OSD_MAX_OPS)) { redirty_page_for_writepage(wbc, page); unlock_page(page); break; @@ -1161,7 +1160,7 @@ new_request: offset, len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, 0, - !!pool, false); + from_pool, false); osd_req_op_extent_update(req, op_idx, len); len = 0; @@ -1188,12 +1187,12 @@ new_request: dout("writepages got pages at %llu~%llu\n", offset, len); osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len, - 0, !!pool, false); + 0, from_pool, false); osd_req_op_extent_update(req, op_idx, len); BUG_ON(op_idx + 1 != req->r_num_ops); - pool = NULL; + from_pool = false; if (i < locked_pages) { BUG_ON(num_ops <= req->r_num_ops); num_ops -= req->r_num_ops; @@ -1204,8 +1203,8 @@ new_request: pages = kmalloc_array(locked_pages, sizeof(*pages), GFP_NOFS); if (!pages) { - pool = fsc->wb_pagevec_pool; - pages = mempool_alloc(pool, GFP_NOFS); + from_pool = true; + pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS); BUG_ON(!pages); } memcpy(pages, data_pages + i, diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 972c13aa4225..55ccccf77cea 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -668,6 +668,7 @@ void ceph_add_cap(struct inode *inode, spin_lock(&session->s_cap_lock); list_add_tail(&cap->session_caps, &session->s_caps); session->s_nr_caps++; + atomic64_inc(&mdsc->metric.total_caps); spin_unlock(&session->s_cap_lock); } else { spin_lock(&session->s_cap_lock); @@ -1161,6 +1162,7 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) } else { list_del_init(&cap->session_caps); session->s_nr_caps--; + atomic64_dec(&mdsc->metric.total_caps); cap->session = NULL; removed = 1; } @@ -4187,10 +4189,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) struct ceph_inode_info *ci; dout("check_delayed_caps\n"); - while (1) { - spin_lock(&mdsc->cap_delay_lock); - if (list_empty(&mdsc->cap_delay_list)) - break; + spin_lock(&mdsc->cap_delay_lock); + while (!list_empty(&mdsc->cap_delay_list)) { ci = list_first_entry(&mdsc->cap_delay_list, struct ceph_inode_info, i_cap_delay_list); @@ -4200,13 +4200,13 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) list_del_init(&ci->i_cap_delay_list); inode = igrab(&ci->vfs_inode); - spin_unlock(&mdsc->cap_delay_lock); - if (inode) { + spin_unlock(&mdsc->cap_delay_lock); dout("check_delayed_caps on %p\n", inode); ceph_check_caps(ci, 0, NULL); /* avoid calling iput_final() in tick thread */ ceph_async_iput(inode); + spin_lock(&mdsc->cap_delay_lock); } } spin_unlock(&mdsc->cap_delay_lock); diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 070ed8481340..97539b497e4c 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -145,7 +145,7 @@ static int metric_show(struct seq_file *s, void *p) struct ceph_fs_client *fsc = s->private; struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_client_metric *m = &mdsc->metric; - int i, nr_caps = 0; + int nr_caps = 0; s64 total, sum, avg, min, max, sq; seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n"); @@ -190,17 +190,7 @@ static int metric_show(struct seq_file *s, void *p) percpu_counter_sum(&m->d_lease_mis), percpu_counter_sum(&m->d_lease_hit)); - mutex_lock(&mdsc->mutex); - for (i = 0; i < mdsc->max_sessions; i++) { - struct ceph_mds_session *s; - - s = __ceph_lookup_mds_session(mdsc, i); - if (!s) - continue; - nr_caps += s->s_nr_caps; - ceph_put_mds_session(s); - } - mutex_unlock(&mdsc->mutex); + nr_caps = atomic64_read(&m->total_caps); seq_printf(s, "%-14s%-16d%-16lld%lld\n", "caps", nr_caps, percpu_counter_sum(&m->i_caps_mis), percpu_counter_sum(&m->i_caps_hit)); @@ -272,7 +262,7 @@ static int mds_sessions_show(struct seq_file *s, void *ptr) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_auth_client *ac = fsc->client->monc.auth; struct ceph_options *opt = fsc->client->options; - int mds = -1; + int mds; mutex_lock(&mdsc->mutex); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 39f5311404b0..060bdcc5ce32 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -930,6 +930,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, req->r_num_caps = 2; req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; + if (as_ctx.pagelist) { + req->r_pagelist = as_ctx.pagelist; + as_ctx.pagelist = NULL; + } err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 160644ddaeed..d51c3f2fdca0 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1538,6 +1538,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); struct page *pinned_page = NULL; + bool direct_lock = iocb->ki_flags & IOCB_DIRECT; ssize_t ret; int want, got = 0; int retry_op = 0, read = 0; @@ -1546,7 +1547,7 @@ again: dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode); - if (iocb->ki_flags & IOCB_DIRECT) + if (direct_lock) ceph_start_io_direct(inode); else ceph_start_io_read(inode); @@ -1603,7 +1604,7 @@ again: } ceph_put_cap_refs(ci, got); - if (iocb->ki_flags & IOCB_DIRECT) + if (direct_lock) ceph_end_io_direct(inode); else ceph_end_io_read(inode); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a50497142e59..4a26862d7667 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1103,8 +1103,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, frag.frag, mds); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= CEPH_MDS_STATE_ACTIVE) { - if (mode == USE_ANY_MDS && - !ceph_mdsmap_is_laggy(mdsc->mdsmap, + if (!ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) goto out; } @@ -1168,7 +1167,7 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq) static const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; #define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8) -static void encode_supported_features(void **p, void *end) +static int encode_supported_features(void **p, void *end) { static const size_t count = ARRAY_SIZE(feature_bits); @@ -1176,16 +1175,64 @@ static void encode_supported_features(void **p, void *end) size_t i; size_t size = FEATURE_BYTES(count); - BUG_ON(*p + 4 + size > end); + if (WARN_ON_ONCE(*p + 4 + size > end)) + return -ERANGE; + ceph_encode_32(p, size); memset(*p, 0, size); for (i = 0; i < count; i++) ((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8); *p += size; } else { - BUG_ON(*p + 4 > end); + if (WARN_ON_ONCE(*p + 4 > end)) + return -ERANGE; + ceph_encode_32(p, 0); } + + return 0; +} + +static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED; +#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8) +static int encode_metric_spec(void **p, void *end) +{ + static const size_t count = ARRAY_SIZE(metric_bits); + + /* header */ + if (WARN_ON_ONCE(*p + 2 > end)) + return -ERANGE; + + ceph_encode_8(p, 1); /* version */ + ceph_encode_8(p, 1); /* compat */ + + if (count > 0) { + size_t i; + size_t size = METRIC_BYTES(count); + + if (WARN_ON_ONCE(*p + 4 + 4 + size > end)) + return -ERANGE; + + /* metric spec info length */ + ceph_encode_32(p, 4 + size); + + /* metric spec */ + ceph_encode_32(p, size); + memset(*p, 0, size); + for (i = 0; i < count; i++) + ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8); + *p += size; + } else { + if (WARN_ON_ONCE(*p + 4 + 4 > end)) + return -ERANGE; + + /* metric spec info length */ + ceph_encode_32(p, 4); + /* metric spec */ + ceph_encode_32(p, 0); + } + + return 0; } /* @@ -1203,6 +1250,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; size_t size, count; void *p, *end; + int ret; const char* metadata[][2] = { {"hostname", mdsc->nodename}, @@ -1227,12 +1275,19 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 size = FEATURE_BYTES(count); extra_bytes += 4 + size; + /* metric spec */ + size = 0; + count = ARRAY_SIZE(metric_bits); + if (count > 0) + size = METRIC_BYTES(count); + extra_bytes += 2 + 4 + 4 + size; + /* Allocate the message */ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); if (!msg) { pr_err("create_session_msg ENOMEM creating msg\n"); - return NULL; + return ERR_PTR(-ENOMEM); } p = msg->front.iov_base; end = p + msg->front.iov_len; @@ -1245,9 +1300,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 * Serialize client metadata into waiting buffer space, using * the format that userspace expects for map<string, string> * - * ClientSession messages with metadata are v3 + * ClientSession messages with metadata are v4 */ - msg->hdr.version = cpu_to_le16(3); + msg->hdr.version = cpu_to_le16(4); msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ @@ -1269,7 +1324,20 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 p += val_len; } - encode_supported_features(&p, end); + ret = encode_supported_features(&p, end); + if (ret) { + pr_err("encode_supported_features failed!\n"); + ceph_msg_put(msg); + return ERR_PTR(ret); + } + + ret = encode_metric_spec(&p, end); + if (ret) { + pr_err("encode_metric_spec failed!\n"); + ceph_msg_put(msg); + return ERR_PTR(ret); + } + msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); @@ -1297,8 +1365,8 @@ static int __open_session(struct ceph_mds_client *mdsc, /* send connect message */ msg = create_session_open_msg(mdsc, session->s_seq); - if (!msg) - return -ENOMEM; + if (IS_ERR(msg)) + return PTR_ERR(msg); ceph_con_send(&session->s_con, msg); return 0; } @@ -1312,6 +1380,7 @@ static struct ceph_mds_session * __open_export_target_session(struct ceph_mds_client *mdsc, int target) { struct ceph_mds_session *session; + int ret; session = __ceph_lookup_mds_session(mdsc, target); if (!session) { @@ -1320,8 +1389,11 @@ __open_export_target_session(struct ceph_mds_client *mdsc, int target) return session; } if (session->s_state == CEPH_MDS_SESSION_NEW || - session->s_state == CEPH_MDS_SESSION_CLOSING) - __open_session(mdsc, session); + session->s_state == CEPH_MDS_SESSION_CLOSING) { + ret = __open_session(mdsc, session); + if (ret) + return ERR_PTR(ret); + } return session; } @@ -1485,6 +1557,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, cap->session = NULL; list_del_init(&cap->session_caps); session->s_nr_caps--; + atomic64_dec(&session->s_mdsc->metric.total_caps); if (cap->queue_release) __ceph_queue_cap_release(session, cap); else @@ -1785,8 +1858,7 @@ static void renewed_caps(struct ceph_mds_client *mdsc, /* * send a session close request */ -static int request_close_session(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) +static int request_close_session(struct ceph_mds_session *session) { struct ceph_msg *msg; @@ -1809,7 +1881,7 @@ static int __close_session(struct ceph_mds_client *mdsc, if (session->s_state >= CEPH_MDS_SESSION_CLOSING) return 0; session->s_state = CEPH_MDS_SESSION_CLOSING; - return request_close_session(mdsc, session); + return request_close_session(session); } static bool drop_negative_children(struct dentry *dentry) @@ -2520,7 +2592,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ceph_encode_copy(&p, &ts, sizeof(ts)); } - BUG_ON(p > end); + if (WARN_ON_ONCE(p > end)) { + ceph_msg_put(msg); + msg = ERR_PTR(-ERANGE); + goto out_free2; + } + msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); @@ -2756,7 +2833,9 @@ static void __do_request(struct ceph_mds_client *mdsc, } if (session->s_state == CEPH_MDS_SESSION_NEW || session->s_state == CEPH_MDS_SESSION_CLOSING) { - __open_session(mdsc, session); + err = __open_session(mdsc, session); + if (err) + goto out_session; /* retry the same mds later */ if (random) req->r_resend_mds = mds; @@ -3279,8 +3358,10 @@ static void handle_session(struct ceph_mds_session *session, goto bad; /* version >= 3, feature bits */ ceph_decode_32_safe(&p, end, len, bad); - ceph_decode_64_safe(&p, end, features, bad); - p += len - sizeof(features); + if (len) { + ceph_decode_64_safe(&p, end, features, bad); + p += len - sizeof(features); + } } mutex_lock(&mdsc->mutex); @@ -3310,6 +3391,8 @@ static void handle_session(struct ceph_mds_session *session, session->s_state = CEPH_MDS_SESSION_OPEN; session->s_features = features; renewed_caps(mdsc, session, 0); + if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features)) + metric_schedule_delayed(&mdsc->metric); wake = 1; if (mdsc->stopping) __close_session(mdsc, session); @@ -4263,6 +4346,30 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc) ceph_force_reconnect(fsc->sb); } +bool check_session_state(struct ceph_mds_session *s) +{ + if (s->s_state == CEPH_MDS_SESSION_CLOSING) { + dout("resending session close request for mds%d\n", + s->s_mds); + request_close_session(s); + return false; + } + if (s->s_ttl && time_after(jiffies, s->s_ttl)) { + if (s->s_state == CEPH_MDS_SESSION_OPEN) { + s->s_state = CEPH_MDS_SESSION_HUNG; + pr_info("mds%d hung\n", s->s_mds); + } + } + if (s->s_state == CEPH_MDS_SESSION_NEW || + s->s_state == CEPH_MDS_SESSION_RESTARTING || + s->s_state == CEPH_MDS_SESSION_CLOSED || + s->s_state == CEPH_MDS_SESSION_REJECTED) + /* this mds is failed or recovering, just wait */ + return false; + + return true; +} + /* * delayed work -- periodically trim expired leases, renew caps with mds */ @@ -4283,6 +4390,9 @@ static void delayed_work(struct work_struct *work) dout("mdsc delayed_work\n"); + if (mdsc->stopping) + return; + mutex_lock(&mdsc->mutex); renew_interval = mdsc->mdsmap->m_session_timeout >> 2; renew_caps = time_after_eq(jiffies, HZ*renew_interval + @@ -4294,23 +4404,8 @@ static void delayed_work(struct work_struct *work) struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); if (!s) continue; - if (s->s_state == CEPH_MDS_SESSION_CLOSING) { - dout("resending session close request for mds%d\n", - s->s_mds); - request_close_session(mdsc, s); - ceph_put_mds_session(s); - continue; - } - if (s->s_ttl && time_after(jiffies, s->s_ttl)) { - if (s->s_state == CEPH_MDS_SESSION_OPEN) { - s->s_state = CEPH_MDS_SESSION_HUNG; - pr_info("mds%d hung\n", s->s_mds); - } - } - if (s->s_state == CEPH_MDS_SESSION_NEW || - s->s_state == CEPH_MDS_SESSION_RESTARTING || - s->s_state == CEPH_MDS_SESSION_REJECTED) { - /* this mds is failed or recovering, just wait */ + + if (!check_session_state(s)) { ceph_put_mds_session(s); continue; } @@ -4359,7 +4454,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) goto err_mdsc; } - fsc->mdsc = mdsc; init_completion(&mdsc->safe_umount_waiters); init_waitqueue_head(&mdsc->session_close_wq); INIT_LIST_HEAD(&mdsc->waiting_for_map); @@ -4414,6 +4508,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) strscpy(mdsc->nodename, utsname()->nodename, sizeof(mdsc->nodename)); + + fsc->mdsc = mdsc; return 0; err_mdsmap: @@ -4657,7 +4753,16 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) { dout("stop\n"); - cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ + /* + * Make sure the delayed work stopped before releasing + * the resources. + * + * Because the cancel_delayed_work_sync() will only + * guarantee that the work finishes executing. But the + * delayed work will re-arm itself again after that. + */ + flush_delayed_work(&mdsc->delayed_work); + if (mdsc->mdsmap) ceph_mdsmap_destroy(mdsc->mdsmap); kfree(mdsc->sessions); @@ -4680,6 +4785,7 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) ceph_metric_destroy(&mdsc->metric); + flush_delayed_work(&mdsc->metric.delayed_work); fsc->mdsc = NULL; kfree(mdsc); dout("mdsc_destroy %p done\n", mdsc); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 5e0c4073a6be..bc9e95937d7c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -18,6 +18,7 @@ #include <linux/ceph/auth.h> #include "metric.h" +#include "super.h" /* The first 8 bits are reserved for old ceph releases */ enum ceph_feature_type { @@ -27,8 +28,9 @@ enum ceph_feature_type { CEPHFS_FEATURE_LAZY_CAP_WANTED, CEPHFS_FEATURE_MULTI_RECONNECT, CEPHFS_FEATURE_DELEG_INO, + CEPHFS_FEATURE_METRIC_COLLECT, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_DELEG_INO, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT, }; /* @@ -42,6 +44,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_LAZY_CAP_WANTED, \ CEPHFS_FEATURE_MULTI_RECONNECT, \ CEPHFS_FEATURE_DELEG_INO, \ + CEPHFS_FEATURE_METRIC_COLLECT, \ \ CEPHFS_FEATURE_MAX, \ } @@ -476,6 +479,8 @@ struct ceph_mds_client { extern const char *ceph_mds_op_name(int op); +extern bool check_session_state(struct ceph_mds_session *s); + extern struct ceph_mds_session * __ceph_lookup_mds_session(struct ceph_mds_client *, int mds); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 889627817e52..e4aba6c6d3b5 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -120,7 +120,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) const void *start = *p; int i, j, n; int err; - u8 mdsmap_v, mdsmap_cv; + u8 mdsmap_v; u16 mdsmap_ev; m = kzalloc(sizeof(*m), GFP_NOFS); @@ -129,7 +129,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ceph_decode_need(p, end, 1 + 1, bad); mdsmap_v = ceph_decode_8(p); - mdsmap_cv = ceph_decode_8(p); + *p += sizeof(u8); /* mdsmap_cv */ if (mdsmap_v >= 4) { u32 mdsmap_len; ceph_decode_32_safe(p, end, mdsmap_len, bad); @@ -174,7 +174,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) u64 global_id; u32 namelen; s32 mds, inc, state; - u64 state_seq; u8 info_v; void *info_end = NULL; struct ceph_entity_addr addr; @@ -189,9 +188,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) info_v= ceph_decode_8(p); if (info_v >= 4) { u32 info_len; - u8 info_cv; ceph_decode_need(p, end, 1 + sizeof(u32), bad); - info_cv = ceph_decode_8(p); + *p += sizeof(u8); /* info_cv */ info_len = ceph_decode_32(p); info_end = *p + info_len; if (info_end > end) @@ -210,7 +208,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) mds = ceph_decode_32(p); inc = ceph_decode_32(p); state = ceph_decode_32(p); - state_seq = ceph_decode_64(p); + *p += sizeof(u64); /* state_seq */ err = ceph_decode_entity_addr(p, end, &addr); if (err) goto corrupt; diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 9217f35bc2b9..2466b261fba2 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -1,10 +1,150 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/ceph/ceph_debug.h> #include <linux/types.h> #include <linux/percpu_counter.h> #include <linux/math64.h> #include "metric.h" +#include "mds_client.h" + +static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, + struct ceph_mds_session *s) +{ + struct ceph_metric_head *head; + struct ceph_metric_cap *cap; + struct ceph_metric_read_latency *read; + struct ceph_metric_write_latency *write; + struct ceph_metric_metadata_latency *meta; + struct ceph_client_metric *m = &mdsc->metric; + u64 nr_caps = atomic64_read(&m->total_caps); + struct ceph_msg *msg; + struct timespec64 ts; + s64 sum; + s32 items = 0; + s32 len; + + len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) + + sizeof(*meta); + + msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); + if (!msg) { + pr_err("send metrics to mds%d, failed to allocate message\n", + s->s_mds); + return false; + } + + head = msg->front.iov_base; + + /* encode the cap metric */ + cap = (struct ceph_metric_cap *)(head + 1); + cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO); + cap->ver = 1; + cap->compat = 1; + cap->data_len = cpu_to_le32(sizeof(*cap) - 10); + cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit)); + cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis)); + cap->total = cpu_to_le64(nr_caps); + items++; + + /* encode the read latency metric */ + read = (struct ceph_metric_read_latency *)(cap + 1); + read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY); + read->ver = 1; + read->compat = 1; + read->data_len = cpu_to_le32(sizeof(*read) - 10); + sum = m->read_latency_sum; + jiffies_to_timespec64(sum, &ts); + read->sec = cpu_to_le32(ts.tv_sec); + read->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + /* encode the write latency metric */ + write = (struct ceph_metric_write_latency *)(read + 1); + write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY); + write->ver = 1; + write->compat = 1; + write->data_len = cpu_to_le32(sizeof(*write) - 10); + sum = m->write_latency_sum; + jiffies_to_timespec64(sum, &ts); + write->sec = cpu_to_le32(ts.tv_sec); + write->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + /* encode the metadata latency metric */ + meta = (struct ceph_metric_metadata_latency *)(write + 1); + meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY); + meta->ver = 1; + meta->compat = 1; + meta->data_len = cpu_to_le32(sizeof(*meta) - 10); + sum = m->metadata_latency_sum; + jiffies_to_timespec64(sum, &ts); + meta->sec = cpu_to_le32(ts.tv_sec); + meta->nsec = cpu_to_le32(ts.tv_nsec); + items++; + + put_unaligned_le32(items, &head->num); + msg->front.iov_len = len; + msg->hdr.version = cpu_to_le16(1); + msg->hdr.compat_version = cpu_to_le16(1); + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); + dout("client%llu send metrics to mds%d\n", + ceph_client_gid(mdsc->fsc->client), s->s_mds); + ceph_con_send(&s->s_con, msg); + + return true; +} + + +static void metric_get_session(struct ceph_mds_client *mdsc) +{ + struct ceph_mds_session *s; + int i; + + mutex_lock(&mdsc->mutex); + for (i = 0; i < mdsc->max_sessions; i++) { + s = __ceph_lookup_mds_session(mdsc, i); + if (!s) + continue; + + /* + * Skip it if MDS doesn't support the metric collection, + * or the MDS will close the session's socket connection + * directly when it get this message. + */ + if (check_session_state(s) && + test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &s->s_features)) { + mdsc->metric.session = s; + break; + } + + ceph_put_mds_session(s); + } + mutex_unlock(&mdsc->mutex); +} + +static void metric_delayed_work(struct work_struct *work) +{ + struct ceph_client_metric *m = + container_of(work, struct ceph_client_metric, delayed_work.work); + struct ceph_mds_client *mdsc = + container_of(m, struct ceph_mds_client, metric); + + if (mdsc->stopping) + return; + + if (!m->session || !check_session_state(m->session)) { + if (m->session) { + ceph_put_mds_session(m->session); + m->session = NULL; + } + metric_get_session(mdsc); + } + if (m->session) { + ceph_mdsc_send_metrics(mdsc, m->session); + metric_schedule_delayed(m); + } +} int ceph_metric_init(struct ceph_client_metric *m) { @@ -22,6 +162,7 @@ int ceph_metric_init(struct ceph_client_metric *m) if (ret) goto err_d_lease_mis; + atomic64_set(&m->total_caps, 0); ret = percpu_counter_init(&m->i_caps_hit, 0, GFP_KERNEL); if (ret) goto err_i_caps_hit; @@ -51,6 +192,9 @@ int ceph_metric_init(struct ceph_client_metric *m) m->total_metadatas = 0; m->metadata_latency_sum = 0; + m->session = NULL; + INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work); + return 0; err_i_caps_mis: @@ -72,6 +216,11 @@ void ceph_metric_destroy(struct ceph_client_metric *m) percpu_counter_destroy(&m->i_caps_hit); percpu_counter_destroy(&m->d_lease_mis); percpu_counter_destroy(&m->d_lease_hit); + + cancel_delayed_work_sync(&m->delayed_work); + + if (m->session) + ceph_put_mds_session(m->session); } static inline void __update_latency(ktime_t *totalp, ktime_t *lsump, diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index ccd81285a450..1d0959d669d7 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -6,12 +6,91 @@ #include <linux/percpu_counter.h> #include <linux/ktime.h> +extern bool disable_send_metrics; + +enum ceph_metric_type { + CLIENT_METRIC_TYPE_CAP_INFO, + CLIENT_METRIC_TYPE_READ_LATENCY, + CLIENT_METRIC_TYPE_WRITE_LATENCY, + CLIENT_METRIC_TYPE_METADATA_LATENCY, + CLIENT_METRIC_TYPE_DENTRY_LEASE, + + CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE, +}; + +/* + * This will always have the highest metric bit value + * as the last element of the array. + */ +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \ + CLIENT_METRIC_TYPE_CAP_INFO, \ + CLIENT_METRIC_TYPE_READ_LATENCY, \ + CLIENT_METRIC_TYPE_WRITE_LATENCY, \ + CLIENT_METRIC_TYPE_METADATA_LATENCY, \ + \ + CLIENT_METRIC_TYPE_MAX, \ +} + +/* metric caps header */ +struct ceph_metric_cap { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(hit + mis + total) */ + __le64 hit; + __le64 mis; + __le64 total; +} __packed; + +/* metric read latency header */ +struct ceph_metric_read_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +/* metric write latency header */ +struct ceph_metric_write_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +/* metric metadata latency header */ +struct ceph_metric_metadata_latency { + __le32 type; /* ceph metric type */ + + __u8 ver; + __u8 compat; + + __le32 data_len; /* length of sizeof(sec + nsec) */ + __le32 sec; + __le32 nsec; +} __packed; + +struct ceph_metric_head { + __le32 num; /* the number of metrics that will be sent */ +} __packed; + /* This is the global metrics */ struct ceph_client_metric { atomic64_t total_dentries; struct percpu_counter d_lease_hit; struct percpu_counter d_lease_mis; + atomic64_t total_caps; struct percpu_counter i_caps_hit; struct percpu_counter i_caps_mis; @@ -35,8 +114,20 @@ struct ceph_client_metric { ktime_t metadata_latency_sq_sum; ktime_t metadata_latency_min; ktime_t metadata_latency_max; + + struct ceph_mds_session *session; + struct delayed_work delayed_work; /* delayed work */ }; +static inline void metric_schedule_delayed(struct ceph_client_metric *m) +{ + if (disable_send_metrics) + return; + + /* per second */ + schedule_delayed_work(&m->delayed_work, round_jiffies_relative(HZ)); +} + extern int ceph_metric_init(struct ceph_client_metric *m); extern void ceph_metric_destroy(struct ceph_client_metric *m); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index c9784eb1159a..7ec0e6d03d10 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -27,6 +27,9 @@ #include <linux/ceph/auth.h> #include <linux/ceph/debugfs.h> +static DEFINE_SPINLOCK(ceph_fsc_lock); +static LIST_HEAD(ceph_fsc_list); + /* * Ceph superblock operations * @@ -634,8 +637,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, struct ceph_options *opt) { struct ceph_fs_client *fsc; - int page_count; - size_t size; int err; fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); @@ -683,18 +684,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, if (!fsc->cap_wq) goto fail_inode_wq; - /* set up mempools */ - err = -ENOMEM; - page_count = fsc->mount_options->wsize >> PAGE_SHIFT; - size = sizeof (struct page *) * (page_count ? page_count : 1); - fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size); - if (!fsc->wb_pagevec_pool) - goto fail_cap_wq; + spin_lock(&ceph_fsc_lock); + list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); + spin_unlock(&ceph_fsc_lock); return fsc; -fail_cap_wq: - destroy_workqueue(fsc->cap_wq); fail_inode_wq: destroy_workqueue(fsc->inode_wq); fail_client: @@ -717,12 +712,14 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) { dout("destroy_fs_client %p\n", fsc); + spin_lock(&ceph_fsc_lock); + list_del(&fsc->metric_wakeup); + spin_unlock(&ceph_fsc_lock); + ceph_mdsc_destroy(fsc); destroy_workqueue(fsc->inode_wq); destroy_workqueue(fsc->cap_wq); - mempool_destroy(fsc->wb_pagevec_pool); - destroy_mount_options(fsc->mount_options); ceph_destroy_client(fsc->client); @@ -741,6 +738,7 @@ struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; struct kmem_cache *ceph_dir_file_cachep; struct kmem_cache *ceph_mds_request_cachep; +mempool_t *ceph_wb_pagevec_pool; static void ceph_inode_init_once(void *foo) { @@ -785,6 +783,10 @@ static int __init init_caches(void) if (!ceph_mds_request_cachep) goto bad_mds_req; + ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); + if (!ceph_wb_pagevec_pool) + goto bad_pagevec_pool; + error = ceph_fscache_register(); if (error) goto bad_fscache; @@ -793,6 +795,8 @@ static int __init init_caches(void) bad_fscache: kmem_cache_destroy(ceph_mds_request_cachep); +bad_pagevec_pool: + mempool_destroy(ceph_wb_pagevec_pool); bad_mds_req: kmem_cache_destroy(ceph_dir_file_cachep); bad_dir_file: @@ -823,12 +827,13 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_file_cachep); kmem_cache_destroy(ceph_dir_file_cachep); kmem_cache_destroy(ceph_mds_request_cachep); + mempool_destroy(ceph_wb_pagevec_pool); ceph_fscache_unregister(); } /* - * ceph_umount_begin - initiate forced umount. Tear down down the + * ceph_umount_begin - initiate forced umount. Tear down the * mount, skipping steps that may hang while waiting for server(s). */ static void ceph_umount_begin(struct super_block *sb) @@ -1282,6 +1287,37 @@ static void __exit exit_ceph(void) destroy_caches(); } +static int param_set_metrics(const char *val, const struct kernel_param *kp) +{ + struct ceph_fs_client *fsc; + int ret; + + ret = param_set_bool(val, kp); + if (ret) { + pr_err("Failed to parse sending metrics switch value '%s'\n", + val); + return ret; + } else if (!disable_send_metrics) { + // wake up all the mds clients + spin_lock(&ceph_fsc_lock); + list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { + metric_schedule_delayed(&fsc->mdsc->metric); + } + spin_unlock(&ceph_fsc_lock); + } + + return 0; +} + +static const struct kernel_param_ops param_ops_metrics = { + .set = param_set_metrics, + .get = param_get_bool, +}; + +bool disable_send_metrics = false; +module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); +MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); + module_init(init_ceph); module_exit(exit_ceph); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 5a6cdd39bc10..4c3c964b1c54 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -101,6 +101,8 @@ struct ceph_mount_options { struct ceph_fs_client { struct super_block *sb; + struct list_head metric_wakeup; + struct ceph_mount_options *mount_options; struct ceph_client *client; @@ -116,8 +118,6 @@ struct ceph_fs_client { struct ceph_mds_client *mdsc; - /* writeback */ - mempool_t *wb_pagevec_pool; atomic_long_t writeback_count; struct workqueue_struct *inode_wq; @@ -353,7 +353,7 @@ struct ceph_inode_info { unsigned i_dirty_caps, i_flushing_caps; /* mask of dirtied fields */ /* - * Link to the the auth cap's session's s_cap_dirty list. s_cap_dirty + * Link to the auth cap's session's s_cap_dirty list. s_cap_dirty * is protected by the mdsc->cap_dirty_lock, but each individual item * is also protected by the inode's i_ceph_lock. Walking s_cap_dirty * requires the mdsc->cap_dirty_lock. List presence for an item can diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 71ee34d160c3..3a733ac33d9b 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -497,10 +497,10 @@ static int __set_xattr(struct ceph_inode_info *ci, kfree(*newxattr); *newxattr = NULL; if (xattr->should_free_val) - kfree((void *)xattr->val); + kfree(xattr->val); if (update_xattr) { - kfree((void *)name); + kfree(name); name = xattr->name; } ci->i_xattrs.names_size -= xattr->name_len; @@ -566,9 +566,9 @@ static void __free_xattr(struct ceph_inode_xattr *xattr) BUG_ON(!xattr); if (xattr->should_free_name) - kfree((void *)xattr->name); + kfree(xattr->name); if (xattr->should_free_val) - kfree((void *)xattr->val); + kfree(xattr->val); kfree(xattr); } @@ -582,9 +582,9 @@ static int __remove_xattr(struct ceph_inode_info *ci, rb_erase(&xattr->node, &ci->i_xattrs.index); if (xattr->should_free_name) - kfree((void *)xattr->name); + kfree(xattr->name); if (xattr->should_free_val) - kfree((void *)xattr->val); + kfree(xattr->val); ci->i_xattrs.names_size -= xattr->name_len; ci->i_xattrs.vals_size -= xattr->val_len; diff --git a/fs/coredump.c b/fs/coredump.c index 7237f07ff6be..76e7c10edfc0 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -153,10 +153,10 @@ int cn_esc_printf(struct core_name *cn, const char *fmt, ...) return ret; } -static int cn_print_exe_file(struct core_name *cn) +static int cn_print_exe_file(struct core_name *cn, bool name_only) { struct file *exe_file; - char *pathbuf, *path; + char *pathbuf, *path, *ptr; int ret; exe_file = get_mm_exe_file(current->mm); @@ -175,6 +175,11 @@ static int cn_print_exe_file(struct core_name *cn) goto free_buf; } + if (name_only) { + ptr = strrchr(path, '/'); + if (ptr) + path = ptr + 1; + } ret = cn_esc_printf(cn, "%s", path); free_buf: @@ -301,12 +306,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, utsname()->nodename); up_read(&uts_sem); break; - /* executable */ + /* executable, could be changed by prctl PR_SET_NAME etc */ case 'e': err = cn_esc_printf(cn, "%s", current->comm); break; + /* file name of executable */ + case 'f': + err = cn_print_exe_file(cn, true); + break; case 'E': - err = cn_print_exe_file(cn); + err = cn_print_exe_file(cn, false); break; /* core limit size */ case 'c': diff --git a/fs/exec.c b/fs/exec.c index 3698252719a3..a91003e28eaa 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -141,12 +141,14 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) if (IS_ERR(file)) goto out; - error = -EINVAL; - if (!S_ISREG(file_inode(file)->i_mode)) - goto exit; - + /* + * may_open() has already checked for this, so it should be + * impossible to trip now. But we need to be extra cautious + * and check again at the very end too. + */ error = -EACCES; - if (path_noexec(&file->f_path)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || + path_noexec(&file->f_path))) goto exit; fsnotify_open(file); @@ -215,7 +217,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * We are doing an exec(). 'current' is the process * doing the exec and bprm->mm is the new process's mm. */ - ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags, + ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags, &page, NULL, NULL); if (ret <= 0) return NULL; @@ -909,11 +911,14 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (IS_ERR(file)) goto out; + /* + * may_open() has already checked for this, so it should be + * impossible to trip now. But we need to be extra cautious + * and check again at the very end too. + */ err = -EACCES; - if (!S_ISREG(file_inode(file)->i_mode)) - goto exit; - - if (path_noexec(&file->f_path)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || + path_noexec(&file->f_path))) goto exit; err = deny_write_access(file); @@ -1402,7 +1407,12 @@ int begin_new_exec(struct linux_binprm * bprm) if (retval) goto out_unlock; - set_fs(USER_DS); + /* + * Ensure that the uaccess routines can actually operate on userspace + * pointers: + */ + force_uaccess_begin(); + me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 4055eb00ea9b..a987919686c0 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -158,7 +158,7 @@ int exfat_set_bitmap(struct inode *inode, unsigned int clu) b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); set_bit_le(b, sbi->vol_amap[i]->b_data); - exfat_update_bh(sb, sbi->vol_amap[i], IS_DIRSYNC(inode)); + exfat_update_bh(sbi->vol_amap[i], IS_DIRSYNC(inode)); return 0; } @@ -180,7 +180,7 @@ void exfat_clear_bitmap(struct inode *inode, unsigned int clu) b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx); clear_bit_le(b, sbi->vol_amap[i]->b_data); - exfat_update_bh(sb, sbi->vol_amap[i], IS_DIRSYNC(inode)); + exfat_update_bh(sbi->vol_amap[i], IS_DIRSYNC(inode)); if (opts->discard) { int ret_discard; diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index 119abf0d8dd6..573659bfbc55 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -470,7 +470,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, &ep->dentry.file.access_date, NULL); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, §or); @@ -480,7 +480,7 @@ int exfat_init_dir_entry(struct inode *inode, struct exfat_chain *p_dir, exfat_init_stream_entry(ep, (type == TYPE_FILE) ? ALLOC_FAT_CHAIN : ALLOC_NO_FAT_CHAIN, start_clu, size); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); return 0; @@ -516,7 +516,7 @@ int exfat_update_dir_chksum(struct inode *inode, struct exfat_chain *p_dir, } fep->dentry.file.checksum = cpu_to_le16(chksum); - exfat_update_bh(sb, fbh, IS_DIRSYNC(inode)); + exfat_update_bh(fbh, IS_DIRSYNC(inode)); release_fbh: brelse(fbh); return ret; @@ -538,7 +538,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, return -EIO; ep->dentry.file.num_ext = (unsigned char)(num_entries - 1); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); ep = exfat_get_dentry(sb, p_dir, entry + 1, &bh, §or); @@ -547,7 +547,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, ep->dentry.stream.name_len = p_uniname->name_len; ep->dentry.stream.name_hash = cpu_to_le16(p_uniname->name_hash); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); for (i = EXFAT_FIRST_CLUSTER; i < num_entries; i++) { @@ -556,7 +556,7 @@ int exfat_init_ext_entry(struct inode *inode, struct exfat_chain *p_dir, return -EIO; exfat_init_name_entry(ep, uniname); - exfat_update_bh(sb, bh, sync); + exfat_update_bh(bh, sync); brelse(bh); uniname += EXFAT_FILE_NAME_LEN; } @@ -580,7 +580,7 @@ int exfat_remove_entries(struct inode *inode, struct exfat_chain *p_dir, return -EIO; exfat_set_entry_type(ep, TYPE_DELETED); - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); } @@ -604,16 +604,20 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es) es->modified = true; } -void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) +int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync) { - int i; + int i, err = 0; - for (i = 0; i < es->num_bh; i++) { - if (es->modified) - exfat_update_bh(es->sb, es->bh[i], sync); - brelse(es->bh[i]); - } + if (es->modified) + err = exfat_update_bhs(es->bh, es->num_bh, sync); + + for (i = 0; i < es->num_bh; i++) + if (err) + bforget(es->bh[i]); + else + brelse(es->bh[i]); kfree(es); + return err; } static int exfat_walk_fat_chain(struct super_block *sb, diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 75c7bdbeba6d..95d717f8620c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -13,8 +13,6 @@ #define EXFAT_SUPER_MAGIC 0x2011BAB0UL #define EXFAT_ROOT_INO 1 -#define EXFAT_SB_DIRTY 0 - #define EXFAT_CLUSTERS_UNTRACKED (~0u) /* @@ -226,7 +224,8 @@ struct exfat_sb_info { unsigned int num_FAT_sectors; /* num of FAT sectors */ unsigned int root_dir; /* root dir cluster */ unsigned int dentries_per_clu; /* num of dentries per cluster */ - unsigned int vol_flag; /* volume dirty flag */ + unsigned int vol_flags; /* volume flags */ + unsigned int vol_flags_persistent; /* volume flags to retain */ struct buffer_head *boot_bh; /* buffer_head of BOOT sector */ unsigned int map_clu; /* allocation bitmap start cluster */ @@ -238,7 +237,6 @@ struct exfat_sb_info { unsigned int clu_srch_ptr; /* cluster search pointer */ unsigned int used_clusters; /* number of used clusters */ - unsigned long s_state; struct mutex s_lock; /* superblock lock */ struct exfat_mount_options options; struct nls_table *nls_io; /* Charset used for input and display */ @@ -383,7 +381,8 @@ static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi, } /* super.c */ -int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag); +int exfat_set_volume_dirty(struct super_block *sb); +int exfat_clear_volume_dirty(struct super_block *sb); /* fatent.c */ #define exfat_get_next_cluster(sb, pclu) exfat_ent_get(sb, *(pclu), pclu) @@ -463,7 +462,7 @@ struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es, int num); struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb, struct exfat_chain *p_dir, int entry, unsigned int type); -void exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); +int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync); int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir); /* inode.c */ @@ -515,7 +514,8 @@ void exfat_set_entry_time(struct exfat_sb_info *sbi, struct timespec64 *ts, u8 *tz, __le16 *time, __le16 *date, u8 *time_cs); u16 exfat_calc_chksum16(void *data, int len, u16 chksum, int type); u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type); -void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync); +void exfat_update_bh(struct buffer_head *bh, int sync); +int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync); void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags); void exfat_chain_dup(struct exfat_chain *dup, struct exfat_chain *ec); diff --git a/fs/exfat/exfat_raw.h b/fs/exfat/exfat_raw.h index 350ce59cc324..6aec6288e1f2 100644 --- a/fs/exfat/exfat_raw.h +++ b/fs/exfat/exfat_raw.h @@ -14,9 +14,8 @@ #define EXFAT_MAX_FILE_LEN 255 -#define VOL_CLEAN 0x0000 -#define VOL_DIRTY 0x0002 -#define ERR_MEDIUM 0x0004 +#define VOLUME_DIRTY 0x0002 +#define MEDIA_FAILURE 0x0004 #define EXFAT_EOF_CLUSTER 0xFFFFFFFFu #define EXFAT_BAD_CLUSTER 0xFFFFFFF7u diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c index 4e5c5c9c0f2d..c3c9afee7418 100644 --- a/fs/exfat/fatent.c +++ b/fs/exfat/fatent.c @@ -75,7 +75,7 @@ int exfat_ent_set(struct super_block *sb, unsigned int loc, fat_entry = (__le32 *)&(bh->b_data[off]); *fat_entry = cpu_to_le32(content); - exfat_update_bh(sb, bh, sb->s_flags & SB_SYNCHRONOUS); + exfat_update_bh(bh, sb->s_flags & SB_SYNCHRONOUS); exfat_mirror_bh(sb, sec, bh); brelse(bh); return 0; @@ -174,7 +174,6 @@ int exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain) return -EIO; } - set_bit(EXFAT_SB_DIRTY, &sbi->s_state); clu = p_chain->dir; if (p_chain->flags == ALLOC_NO_FAT_CHAIN) { @@ -230,21 +229,6 @@ int exfat_find_last_cluster(struct super_block *sb, struct exfat_chain *p_chain, return 0; } -static inline int exfat_sync_bhs(struct buffer_head **bhs, int nr_bhs) -{ - int i, err = 0; - - for (i = 0; i < nr_bhs; i++) - write_dirty_buffer(bhs[i], 0); - - for (i = 0; i < nr_bhs; i++) { - wait_on_buffer(bhs[i]); - if (!err && !buffer_uptodate(bhs[i])) - err = -EIO; - } - return err; -} - int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) { struct super_block *sb = dir->i_sb; @@ -266,41 +250,23 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu) } /* Zeroing the unused blocks on this cluster */ - n = 0; while (blknr < last_blknr) { - bhs[n] = sb_getblk(sb, blknr); - if (!bhs[n]) { - err = -ENOMEM; - goto release_bhs; - } - memset(bhs[n]->b_data, 0, sb->s_blocksize); - exfat_update_bh(sb, bhs[n], 0); - - n++; - blknr++; - - if (n == nr_bhs) { - if (IS_DIRSYNC(dir)) { - err = exfat_sync_bhs(bhs, n); - if (err) - goto release_bhs; + for (n = 0; n < nr_bhs && blknr < last_blknr; n++, blknr++) { + bhs[n] = sb_getblk(sb, blknr); + if (!bhs[n]) { + err = -ENOMEM; + goto release_bhs; } - - for (i = 0; i < n; i++) - brelse(bhs[i]); - n = 0; + memset(bhs[n]->b_data, 0, sb->s_blocksize); } - } - if (IS_DIRSYNC(dir)) { - err = exfat_sync_bhs(bhs, n); + err = exfat_update_bhs(bhs, n, IS_DIRSYNC(dir)); if (err) goto release_bhs; - } - - for (i = 0; i < n; i++) - brelse(bhs[i]); + for (i = 0; i < n; i++) + brelse(bhs[i]); + } return 0; release_bhs: @@ -358,8 +324,6 @@ int exfat_alloc_cluster(struct inode *inode, unsigned int num_alloc, } } - set_bit(EXFAT_SB_DIRTY, &sbi->s_state); - p_chain->dir = EXFAT_EOF_CLUSTER; while ((new_clu = exfat_find_free_bitmap(sb, hint_clu)) != diff --git a/fs/exfat/file.c b/fs/exfat/file.c index a6a063830edc..f41f523a58ad 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -106,7 +106,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) if (ei->type != TYPE_FILE && ei->type != TYPE_DIR) return -EPERM; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); num_clusters_phys = @@ -154,6 +154,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) struct timespec64 ts; struct exfat_dentry *ep, *ep2; struct exfat_entry_set_cache *es; + int err; es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -188,7 +189,9 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) } exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, inode_needs_sync(inode)); + err = exfat_free_dentry_set(es, inode_needs_sync(inode)); + if (err) + return err; } /* cut off from the FAT chain */ @@ -217,7 +220,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) if (exfat_free_cluster(inode, &clu)) return -EIO; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); return 0; } diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index cf9ca6c4d046..7f90204adef5 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -39,7 +39,7 @@ static int __exfat_write_inode(struct inode *inode, int sync) if (is_dir && ei->dir.dir == sbi->root_dir && ei->entry == -1) return 0; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); /* get the directory entry of given file or directory */ es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -77,8 +77,7 @@ static int __exfat_write_inode(struct inode *inode, int sync) ep2->dentry.stream.size = ep2->dentry.stream.valid_size; exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, sync); - return 0; + return exfat_free_dentry_set(es, sync); } int exfat_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -168,7 +167,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, } if (*clu == EXFAT_EOF_CLUSTER) { - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); new_clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ? EXFAT_EOF_CLUSTER : last_clu + 1; @@ -222,6 +221,7 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, if (ei->dir.dir != DIR_DELETED && modified) { struct exfat_dentry *ep; struct exfat_entry_set_cache *es; + int err; es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES); @@ -240,8 +240,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, ep->dentry.stream.valid_size; exfat_update_dir_chksum_with_entry_set(es); - exfat_free_dentry_set(es, inode_needs_sync(inode)); - + err = exfat_free_dentry_set(es, inode_needs_sync(inode)); + if (err) + return err; } /* end of if != DIR_DELETED */ inode->i_blocks += diff --git a/fs/exfat/misc.c b/fs/exfat/misc.c index 17d41f3d3709..d34e6193258d 100644 --- a/fs/exfat/misc.c +++ b/fs/exfat/misc.c @@ -163,9 +163,8 @@ u32 exfat_calc_chksum32(void *data, int len, u32 chksum, int type) return chksum; } -void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync) +void exfat_update_bh(struct buffer_head *bh, int sync) { - set_bit(EXFAT_SB_DIRTY, &EXFAT_SB(sb)->s_state); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -173,6 +172,25 @@ void exfat_update_bh(struct super_block *sb, struct buffer_head *bh, int sync) sync_dirty_buffer(bh); } +int exfat_update_bhs(struct buffer_head **bhs, int nr_bhs, int sync) +{ + int i, err = 0; + + for (i = 0; i < nr_bhs; i++) { + set_buffer_uptodate(bhs[i]); + mark_buffer_dirty(bhs[i]); + if (sync) + write_dirty_buffer(bhs[i], 0); + } + + for (i = 0; i < nr_bhs && sync; i++) { + wait_on_buffer(bhs[i]); + if (!err && !buffer_uptodate(bhs[i])) + err = -EIO; + } + return err; +} + void exfat_chain_set(struct exfat_chain *ec, unsigned int dir, unsigned int size, unsigned char flags) { diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 2b9e21094a96..e73f20f66cb2 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -387,7 +387,7 @@ static int exfat_find_empty_entry(struct inode *inode, ep->dentry.stream.valid_size = cpu_to_le64(size); ep->dentry.stream.size = ep->dentry.stream.valid_size; ep->dentry.stream.flags = p_dir->flags; - exfat_update_bh(sb, bh, IS_DIRSYNC(inode)); + exfat_update_bh(bh, IS_DIRSYNC(inode)); brelse(bh); if (exfat_update_dir_chksum(inode, &(ei->dir), ei->entry)) @@ -562,10 +562,10 @@ static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, int err; mutex_lock(&EXFAT_SB(sb)->s_lock); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_FILE, &info); - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); if (err) goto unlock; @@ -834,7 +834,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) num_entries++; brelse(bh); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); /* update the directory entry */ if (exfat_remove_entries(dir, &cdir, entry, 0, num_entries)) { err = -EIO; @@ -843,7 +843,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) /* This doesn't modify ei */ ei->dir.dir = DIR_DELETED; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); inode_inc_iversion(dir); dir->i_mtime = dir->i_atime = current_time(dir); @@ -873,10 +873,10 @@ static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int err; mutex_lock(&EXFAT_SB(sb)->s_lock); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_add_entry(dir, dentry->d_name.name, &cdir, TYPE_DIR, &info); - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); if (err) goto unlock; @@ -1001,14 +1001,14 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) num_entries++; brelse(bh); - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); err = exfat_remove_entries(dir, &cdir, entry, 0, num_entries); if (err) { exfat_err(sb, "failed to exfat_remove_entries : err(%d)", err); goto unlock; } ei->dir.dir = DIR_DELETED; - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); inode_inc_iversion(dir); dir->i_mtime = dir->i_atime = current_time(dir); @@ -1071,7 +1071,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, new_bh, sync); + exfat_update_bh(new_bh, sync); brelse(old_bh); brelse(new_bh); @@ -1087,7 +1087,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, } memcpy(epnew, epold, DENTRY_SIZE); - exfat_update_bh(sb, new_bh, sync); + exfat_update_bh(new_bh, sync); brelse(old_bh); brelse(new_bh); @@ -1104,7 +1104,7 @@ static int exfat_rename_file(struct inode *inode, struct exfat_chain *p_dir, epold->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, old_bh, sync); + exfat_update_bh(old_bh, sync); brelse(old_bh); ret = exfat_init_ext_entry(inode, p_dir, oldentry, num_new_entries, p_uniname); @@ -1159,7 +1159,7 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, epnew->dentry.file.attr |= cpu_to_le16(ATTR_ARCHIVE); ei->attr |= ATTR_ARCHIVE; } - exfat_update_bh(sb, new_bh, IS_DIRSYNC(inode)); + exfat_update_bh(new_bh, IS_DIRSYNC(inode)); brelse(mov_bh); brelse(new_bh); @@ -1175,7 +1175,7 @@ static int exfat_move_file(struct inode *inode, struct exfat_chain *p_olddir, } memcpy(epnew, epmov, DENTRY_SIZE); - exfat_update_bh(sb, new_bh, IS_DIRSYNC(inode)); + exfat_update_bh(new_bh, IS_DIRSYNC(inode)); brelse(mov_bh); brelse(new_bh); @@ -1300,7 +1300,7 @@ static int __exfat_rename(struct inode *old_parent_inode, if (ret) goto out; - exfat_set_vol_flags(sb, VOL_DIRTY); + exfat_set_volume_dirty(sb); if (olddir.dir == newdir.dir) ret = exfat_rename_file(new_parent_inode, &olddir, dentry, @@ -1355,7 +1355,7 @@ del_out: */ new_ei->dir.dir = DIR_DELETED; } - exfat_set_vol_flags(sb, VOL_CLEAN); + exfat_clear_volume_dirty(sb); out: return ret; } diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 253a92460d52..3b6a1659892f 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -45,9 +45,6 @@ static void exfat_put_super(struct super_block *sb) struct exfat_sb_info *sbi = EXFAT_SB(sb); mutex_lock(&sbi->s_lock); - if (test_and_clear_bit(EXFAT_SB_DIRTY, &sbi->s_state)) - sync_blockdev(sb->s_bdev); - exfat_set_vol_flags(sb, VOL_CLEAN); exfat_free_bitmap(sbi); brelse(sbi->boot_bh); mutex_unlock(&sbi->s_lock); @@ -60,13 +57,14 @@ static int exfat_sync_fs(struct super_block *sb, int wait) struct exfat_sb_info *sbi = EXFAT_SB(sb); int err = 0; + if (!wait) + return 0; + /* If there are some dirty buffers in the bdev inode */ mutex_lock(&sbi->s_lock); - if (test_and_clear_bit(EXFAT_SB_DIRTY, &sbi->s_state)) { - sync_blockdev(sb->s_bdev); - if (exfat_set_vol_flags(sb, VOL_CLEAN)) - err = -EIO; - } + sync_blockdev(sb->s_bdev); + if (exfat_clear_volume_dirty(sb)) + err = -EIO; mutex_unlock(&sbi->s_lock); return err; } @@ -98,17 +96,20 @@ static int exfat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) +static int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flags) { struct exfat_sb_info *sbi = EXFAT_SB(sb); struct boot_sector *p_boot = (struct boot_sector *)sbi->boot_bh->b_data; bool sync; + /* retain persistent-flags */ + new_flags |= sbi->vol_flags_persistent; + /* flags are not changed */ - if (sbi->vol_flag == new_flag) + if (sbi->vol_flags == new_flags) return 0; - sbi->vol_flag = new_flag; + sbi->vol_flags = new_flags; /* skip updating volume dirty flag, * if this volume has been mounted with read-only @@ -116,9 +117,9 @@ int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) if (sb_rdonly(sb)) return 0; - p_boot->vol_flags = cpu_to_le16(new_flag); + p_boot->vol_flags = cpu_to_le16(new_flags); - if (new_flag == VOL_DIRTY && !buffer_dirty(sbi->boot_bh)) + if ((new_flags & VOLUME_DIRTY) && !buffer_dirty(sbi->boot_bh)) sync = true; else sync = false; @@ -131,6 +132,20 @@ int exfat_set_vol_flags(struct super_block *sb, unsigned short new_flag) return 0; } +int exfat_set_volume_dirty(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return exfat_set_vol_flags(sb, sbi->vol_flags | VOLUME_DIRTY); +} + +int exfat_clear_volume_dirty(struct super_block *sb) +{ + struct exfat_sb_info *sbi = EXFAT_SB(sb); + + return exfat_set_vol_flags(sb, sbi->vol_flags & ~VOLUME_DIRTY); +} + static int exfat_show_options(struct seq_file *m, struct dentry *root) { struct super_block *sb = root->d_sb; @@ -459,7 +474,8 @@ static int exfat_read_boot_sector(struct super_block *sb) sbi->dentries_per_clu = 1 << (sbi->cluster_size_bits - DENTRY_SIZE_BITS); - sbi->vol_flag = le16_to_cpu(p_boot->vol_flags); + sbi->vol_flags = le16_to_cpu(p_boot->vol_flags); + sbi->vol_flags_persistent = sbi->vol_flags & (VOLUME_DIRTY | MEDIA_FAILURE); sbi->clu_srch_ptr = EXFAT_FIRST_CLUSTER; sbi->used_clusters = EXFAT_CLUSTERS_UNTRACKED; @@ -474,9 +490,9 @@ static int exfat_read_boot_sector(struct super_block *sb) exfat_err(sb, "bogus data start sector"); return -EINVAL; } - if (sbi->vol_flag & VOL_DIRTY) + if (sbi->vol_flags & VOLUME_DIRTY) exfat_warn(sb, "Volume was not properly unmounted. Some data may be corrupt. Please run fsck."); - if (sbi->vol_flag & ERR_MEDIUM) + if (sbi->vol_flags & MEDIA_FAILURE) exfat_warn(sb, "Medium has reported failures. Some data may be lost."); /* exFAT file size is limited by a disk volume size */ diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index ca31993dcb47..66532a71e8fd 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig @@ -41,7 +41,7 @@ config MSDOS_FS they are compressed; to access compressed MSDOS partitions under Linux, you can either use the DOS emulator DOSEMU, described in the DOSEMU-HOWTO, available from - <http://www.tldp.org/docs.html#howto>, or try dmsdosfs in + <https://www.tldp.org/docs.html#howto>, or try dmsdosfs in <ftp://ibiblio.org/pub/Linux/system/filesystems/dosfs/>. If you intend to use dosemu with a non-compressed MSDOS partition, say Y here) and MSDOS floppies. This means that file access becomes diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index bbfe18c07417..f7e3304b7802 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -657,6 +657,9 @@ static void fat_ra_init(struct super_block *sb, struct fatent_ra *ra, unsigned long ra_pages = sb->s_bdi->ra_pages; unsigned int reada_blocks; + if (fatent->entry >= ent_limit) + return; + if (ra_pages > sb->s_bdi->io_pages) ra_pages = rounddown(ra_pages, sb->s_bdi->io_pages); reada_blocks = ra_pages << (PAGE_SHIFT - sb->s_blocksize_bits + 1); diff --git a/fs/fat/file.c b/fs/fat/file.c index 42134c58c87e..f9ee27cf4d7c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -25,9 +25,9 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) { u32 attr; - inode_lock(inode); + inode_lock_shared(inode); attr = fat_make_attrs(inode); - inode_unlock(inode); + inode_unlock_shared(inode); return put_user(attr, user_attr); } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 523954d00dff..b5c109703daa 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1364,6 +1364,12 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_magic = HUGETLBFS_MAGIC; sb->s_op = &hugetlbfs_ops; sb->s_time_gran = 1; + + /* + * Due to the special and limited functionality of hugetlbfs, it does + * not work well as a stacking filesystem. + */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx)); if (!sb->s_root) goto out_free; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 7cb5fd38eb14..7b09a9158e40 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -150,6 +150,25 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) return 0; } +static bool minix_check_superblock(struct super_block *sb) +{ + struct minix_sb_info *sbi = minix_sb(sb); + + if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) + return false; + + /* + * s_max_size must not exceed the block mapping limitation. This check + * is only needed for V1 filesystems, since V2/V3 support an extra level + * of indirect blocks which places the limit well above U32_MAX. + */ + if (sbi->s_version == MINIX_V1 && + sb->s_maxbytes > (7 + 512 + 512*512) * BLOCK_SIZE) + return false; + + return true; +} + static int minix_fill_super(struct super_block *s, void *data, int silent) { struct buffer_head *bh; @@ -185,7 +204,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) sbi->s_zmap_blocks = ms->s_zmap_blocks; sbi->s_firstdatazone = ms->s_firstdatazone; sbi->s_log_zone_size = ms->s_log_zone_size; - sbi->s_max_size = ms->s_max_size; + s->s_maxbytes = ms->s_max_size; s->s_magic = ms->s_magic; if (s->s_magic == MINIX_SUPER_MAGIC) { sbi->s_version = MINIX_V1; @@ -216,7 +235,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) sbi->s_zmap_blocks = m3s->s_zmap_blocks; sbi->s_firstdatazone = m3s->s_firstdatazone; sbi->s_log_zone_size = m3s->s_log_zone_size; - sbi->s_max_size = m3s->s_max_size; + s->s_maxbytes = m3s->s_max_size; sbi->s_ninodes = m3s->s_ninodes; sbi->s_nzones = m3s->s_zones; sbi->s_dirsize = 64; @@ -228,11 +247,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) } else goto out_no_fs; + if (!minix_check_superblock(s)) + goto out_illegal_sb; + /* * Allocate the buffer map to keep the superblock small. */ - if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) - goto out_illegal_sb; i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); map = kzalloc(i, GFP_KERNEL); if (!map) @@ -468,6 +488,13 @@ static struct inode *V1_minix_iget(struct inode *inode) iget_failed(inode); return ERR_PTR(-EIO); } + if (raw_inode->i_nlinks == 0) { + printk("MINIX-fs: deleted inode referenced: %lu\n", + inode->i_ino); + brelse(bh); + iget_failed(inode); + return ERR_PTR(-ESTALE); + } inode->i_mode = raw_inode->i_mode; i_uid_write(inode, raw_inode->i_uid); i_gid_write(inode, raw_inode->i_gid); @@ -501,6 +528,13 @@ static struct inode *V2_minix_iget(struct inode *inode) iget_failed(inode); return ERR_PTR(-EIO); } + if (raw_inode->i_nlinks == 0) { + printk("MINIX-fs: deleted inode referenced: %lu\n", + inode->i_ino); + brelse(bh); + iget_failed(inode); + return ERR_PTR(-ESTALE); + } inode->i_mode = raw_inode->i_mode; i_uid_write(inode, raw_inode->i_uid); i_gid_write(inode, raw_inode->i_gid); diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c index 043c3fdbc8e7..446148792f41 100644 --- a/fs/minix/itree_common.c +++ b/fs/minix/itree_common.c @@ -75,6 +75,7 @@ static int alloc_branch(struct inode *inode, int n = 0; int i; int parent = minix_new_block(inode); + int err = -ENOSPC; branch[0].key = cpu_to_block(parent); if (parent) for (n = 1; n < num; n++) { @@ -85,6 +86,11 @@ static int alloc_branch(struct inode *inode, break; branch[n].key = cpu_to_block(nr); bh = sb_getblk(inode->i_sb, parent); + if (!bh) { + minix_free_block(inode, nr); + err = -ENOMEM; + break; + } lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); branch[n].bh = bh; @@ -103,7 +109,7 @@ static int alloc_branch(struct inode *inode, bforget(branch[i].bh); for (i = 0; i < n; i++) minix_free_block(inode, block_to_cpu(branch[i].key)); - return -ENOSPC; + return err; } static inline int splice_branch(struct inode *inode, diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c index 046cc96ee7ad..1fed906042aa 100644 --- a/fs/minix/itree_v1.c +++ b/fs/minix/itree_v1.c @@ -29,12 +29,12 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, inode->i_sb->s_bdev); - } else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) { - if (printk_ratelimit()) - printk("MINIX-fs: block_to_path: " - "block %ld too big on dev %pg\n", - block, inode->i_sb->s_bdev); - } else if (block < 7) { + return 0; + } + if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes) + return 0; + + if (block < 7) { offsets[n++] = block; } else if ((block -= 7) < 512) { offsets[n++] = 7; diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c index f7fc7ecccccc..9d00f31a2d9d 100644 --- a/fs/minix/itree_v2.c +++ b/fs/minix/itree_v2.c @@ -32,13 +32,12 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH]) if (block < 0) { printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n", block, sb->s_bdev); - } else if ((u64)block * (u64)sb->s_blocksize >= - minix_sb(sb)->s_max_size) { - if (printk_ratelimit()) - printk("MINIX-fs: block_to_path: " - "block %ld too big on dev %pg\n", - block, sb->s_bdev); - } else if (block < DIRCOUNT) { + return 0; + } + if ((u64)block * (u64)sb->s_blocksize >= sb->s_maxbytes) + return 0; + + if (block < DIRCOUNT) { offsets[n++] = block; } else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) { offsets[n++] = DIRCOUNT; diff --git a/fs/minix/minix.h b/fs/minix/minix.h index df081e8afcc3..168d45d3de73 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -32,7 +32,6 @@ struct minix_sb_info { unsigned long s_zmap_blocks; unsigned long s_firstdatazone; unsigned long s_log_zone_size; - unsigned long s_max_size; int s_dirsize; int s_namelen; struct buffer_head ** s_imap; diff --git a/fs/namei.c b/fs/namei.c index fde8fe086c09..2112e578dccc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2849,18 +2849,24 @@ static int may_open(const struct path *path, int acc_mode, int flag) case S_IFLNK: return -ELOOP; case S_IFDIR: - if (acc_mode & MAY_WRITE) + if (acc_mode & (MAY_WRITE | MAY_EXEC)) return -EISDIR; break; case S_IFBLK: case S_IFCHR: if (!may_open_dev(path)) return -EACCES; - /*FALLTHRU*/ + fallthrough; case S_IFIFO: case S_IFSOCK: + if (acc_mode & MAY_EXEC) + return -EACCES; flag &= ~O_TRUNC; break; + case S_IFREG: + if ((acc_mode & MAY_EXEC) && path_noexec(path)) + return -EACCES; + break; } error = inode_permission(inode, MAY_OPEN | acc_mode); @@ -3770,11 +3776,11 @@ exit2: mnt_drop_write(path.mnt); exit1: path_put(&path); - putname(name); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } + putname(name); return error; } diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 235b959fc2b3..adf3bb0a8048 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -613,10 +613,10 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, lock = nilfs_mdt_bgl_lock(inode, group); if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) - nilfs_msg(inode->i_sb, KERN_WARNING, - "%s (ino=%lu): entry number %llu already freed", - __func__, inode->i_ino, - (unsigned long long)req->pr_entry_nr); + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)req->pr_entry_nr); else nilfs_palloc_group_desc_add_entries(desc, lock, 1); @@ -654,10 +654,10 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, lock = nilfs_mdt_bgl_lock(inode, group); if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) - nilfs_msg(inode->i_sb, KERN_WARNING, - "%s (ino=%lu): entry number %llu already freed", - __func__, inode->i_ino, - (unsigned long long)req->pr_entry_nr); + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)req->pr_entry_nr); else nilfs_palloc_group_desc_add_entries(desc, lock, 1); @@ -763,10 +763,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) do { if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) { - nilfs_msg(inode->i_sb, KERN_WARNING, - "%s (ino=%lu): entry number %llu already freed", - __func__, inode->i_ino, - (unsigned long long)entry_nrs[j]); + nilfs_warn(inode->i_sb, + "%s (ino=%lu): entry number %llu already freed", + __func__, inode->i_ino, + (unsigned long long)entry_nrs[j]); } else { n++; } @@ -808,10 +808,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) ret = nilfs_palloc_delete_entry_block(inode, last_nrs[k]); if (ret && ret != -ENOENT) - nilfs_msg(inode->i_sb, KERN_WARNING, - "error %d deleting block that object (entry=%llu, ino=%lu) belongs to", - ret, (unsigned long long)last_nrs[k], - inode->i_ino); + nilfs_warn(inode->i_sb, + "error %d deleting block that object (entry=%llu, ino=%lu) belongs to", + ret, (unsigned long long)last_nrs[k], + inode->i_ino); } desc_kaddr = kmap_atomic(desc_bh->b_page); @@ -826,9 +826,9 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) if (nfree == nilfs_palloc_entries_per_group(inode)) { ret = nilfs_palloc_delete_bitmap_block(inode, group); if (ret && ret != -ENOENT) - nilfs_msg(inode->i_sb, KERN_WARNING, - "error %d deleting bitmap block of group=%lu, ino=%lu", - ret, group, inode->i_ino); + nilfs_warn(inode->i_sb, + "error %d deleting bitmap block of group=%lu, ino=%lu", + ret, group, inode->i_ino); } } return 0; diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 23e043eca237..f42ab57201e7 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -351,10 +351,10 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, (flags & NILFS_BTREE_NODE_ROOT) || nchildren < 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { - nilfs_msg(inode->i_sb, KERN_CRIT, - "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", - inode->i_ino, (unsigned long long)blocknr, level, - flags, nchildren); + nilfs_crit(inode->i_sb, + "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", + inode->i_ino, (unsigned long long)blocknr, level, + flags, nchildren); ret = 1; } return ret; @@ -381,9 +381,9 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { - nilfs_msg(inode->i_sb, KERN_CRIT, - "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", - inode->i_ino, level, flags, nchildren); + nilfs_crit(inode->i_sb, + "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", + inode->i_ino, level, flags, nchildren); ret = 1; } return ret; @@ -450,10 +450,10 @@ static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, { if (unlikely(nilfs_btree_node_get_level(node) != level)) { dump_stack(); - nilfs_msg(btree->b_inode->i_sb, KERN_CRIT, - "btree level mismatch (ino=%lu): %d != %d", - btree->b_inode->i_ino, - nilfs_btree_node_get_level(node), level); + nilfs_crit(btree->b_inode->i_sb, + "btree level mismatch (ino=%lu): %d != %d", + btree->b_inode->i_ino, + nilfs_btree_node_get_level(node), level); return 1; } return 0; @@ -508,7 +508,7 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, out_no_wait: if (!buffer_uptodate(bh)) { - nilfs_msg(btree->b_inode->i_sb, KERN_ERR, + nilfs_err(btree->b_inode->i_sb, "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", btree->b_inode->i_ino, (unsigned long long)ptr); brelse(bh); @@ -2074,10 +2074,10 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { if (unlikely(ret == -ENOENT)) - nilfs_msg(btree->b_inode->i_sb, KERN_CRIT, - "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", - btree->b_inode->i_ino, - (unsigned long long)key, level); + nilfs_crit(btree->b_inode->i_sb, + "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", + btree->b_inode->i_ino, + (unsigned long long)key, level); goto out; } @@ -2114,11 +2114,11 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, if (level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX) { dump_stack(); - nilfs_msg(btree->b_inode->i_sb, KERN_WARNING, - "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", - level, (unsigned long long)key, - btree->b_inode->i_ino, - (unsigned long long)bh->b_blocknr); + nilfs_warn(btree->b_inode->i_sb, + "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", + level, (unsigned long long)key, + btree->b_inode->i_ino, + (unsigned long long)bh->b_blocknr); return; } diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 8d41311b5db4..86d4d850d130 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -322,7 +322,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, int ret, ncps, nicps, nss, count, i; if (unlikely(start == 0 || start > end)) { - nilfs_msg(cpfile->i_sb, KERN_ERR, + nilfs_err(cpfile->i_sb, "cannot delete checkpoints: invalid range [%llu, %llu)", (unsigned long long)start, (unsigned long long)end); return -EINVAL; @@ -376,7 +376,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cno); if (ret == 0) continue; - nilfs_msg(cpfile->i_sb, KERN_ERR, + nilfs_err(cpfile->i_sb, "error %d deleting checkpoint block", ret); break; @@ -981,12 +981,10 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, int err; if (cpsize > sb->s_blocksize) { - nilfs_msg(sb, KERN_ERR, - "too large checkpoint size: %zu bytes", cpsize); + nilfs_err(sb, "too large checkpoint size: %zu bytes", cpsize); return -EINVAL; } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) { - nilfs_msg(sb, KERN_ERR, - "too small checkpoint size: %zu bytes", cpsize); + nilfs_err(sb, "too small checkpoint size: %zu bytes", cpsize); return -EINVAL; } diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 6f4066636be9..8bccdf1158fc 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -340,11 +340,11 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) kaddr = kmap_atomic(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { - nilfs_msg(dat->i_sb, KERN_CRIT, - "%s: invalid vblocknr = %llu, [%llu, %llu)", - __func__, (unsigned long long)vblocknr, - (unsigned long long)le64_to_cpu(entry->de_start), - (unsigned long long)le64_to_cpu(entry->de_end)); + nilfs_crit(dat->i_sb, + "%s: invalid vblocknr = %llu, [%llu, %llu)", + __func__, (unsigned long long)vblocknr, + (unsigned long long)le64_to_cpu(entry->de_start), + (unsigned long long)le64_to_cpu(entry->de_end)); kunmap_atomic(kaddr); brelse(entry_bh); return -EINVAL; @@ -471,11 +471,11 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, int err; if (entry_size > sb->s_blocksize) { - nilfs_msg(sb, KERN_ERR, "too large DAT entry size: %zu bytes", + nilfs_err(sb, "too large DAT entry size: %zu bytes", entry_size); return -EINVAL; } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { - nilfs_msg(sb, KERN_ERR, "too small DAT entry size: %zu bytes", + nilfs_err(sb, "too small DAT entry size: %zu bytes", entry_size); return -EINVAL; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 533e24ea3a88..f353101955e3 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -328,16 +328,18 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, key = nilfs_bmap_data_get_key(bmap, *bh); if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { - nilfs_msg(bmap->b_inode->i_sb, KERN_CRIT, - "%s (ino=%lu): invalid key: %llu", __func__, - bmap->b_inode->i_ino, (unsigned long long)key); + nilfs_crit(bmap->b_inode->i_sb, + "%s (ino=%lu): invalid key: %llu", + __func__, + bmap->b_inode->i_ino, (unsigned long long)key); return -EINVAL; } ptr = nilfs_direct_get_ptr(bmap, key); if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { - nilfs_msg(bmap->b_inode->i_sb, KERN_CRIT, - "%s (ino=%lu): invalid pointer: %llu", __func__, - bmap->b_inode->i_ino, (unsigned long long)ptr); + nilfs_crit(bmap->b_inode->i_sb, + "%s (ino=%lu): invalid pointer: %llu", + __func__, + bmap->b_inode->i_ino, (unsigned long long)ptr); return -EINVAL; } diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index aa3c328ee189..448320496856 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -142,7 +142,7 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) if (!buffer_uptodate(bh)) { struct inode *inode = bh->b_page->mapping->host; - nilfs_msg(inode->i_sb, KERN_ERR, + nilfs_err(inode->i_sb, "I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)", buffer_nilfs_node(bh) ? "node" : "data", inode->i_ino, (unsigned long long)bh->b_blocknr); diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 4140d232cadc..02727ed3a7c6 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -142,8 +142,8 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); if (unlikely(err)) - nilfs_msg(sb, KERN_WARNING, "error %d reading inode: ino=%lu", - err, (unsigned long)ino); + nilfs_warn(sb, "error %d reading inode: ino=%lu", + err, (unsigned long)ino); return err; } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 28009ec54420..745d371d6fea 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -104,10 +104,10 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, * However, the page having this block must * be locked in this case. */ - nilfs_msg(inode->i_sb, KERN_WARNING, - "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", - __func__, inode->i_ino, - (unsigned long long)blkoff); + nilfs_warn(inode->i_sb, + "%s (ino=%lu): a race condition while inserting a data block at offset=%llu", + __func__, inode->i_ino, + (unsigned long long)blkoff); err = 0; } nilfs_transaction_abort(inode->i_sb); @@ -388,7 +388,8 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) failed_after_creation: clear_nlink(inode); - unlock_new_inode(inode); + if (inode->i_state & I_NEW) + unlock_new_inode(inode); iput(inode); /* * raw_inode will be deleted through * nilfs_evict_inode(). @@ -706,9 +707,8 @@ repeat: goto repeat; failed: - nilfs_msg(ii->vfs_inode.i_sb, KERN_WARNING, - "error %d truncating bmap (ino=%lu)", ret, - ii->vfs_inode.i_ino); + nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)", + ret, ii->vfs_inode.i_ino); } void nilfs_truncate(struct inode *inode) @@ -919,9 +919,9 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty) * This will happen when somebody is freeing * this inode. */ - nilfs_msg(inode->i_sb, KERN_WARNING, - "cannot set file dirty (ino=%lu): the file is being freed", - inode->i_ino); + nilfs_warn(inode->i_sb, + "cannot set file dirty (ino=%lu): the file is being freed", + inode->i_ino); spin_unlock(&nilfs->ns_inode_lock); return -EINVAL; /* * NILFS_I_DIRTY may remain for @@ -942,9 +942,9 @@ int __nilfs_mark_inode_dirty(struct inode *inode, int flags) err = nilfs_load_inode_block(inode, &ibh); if (unlikely(err)) { - nilfs_msg(inode->i_sb, KERN_WARNING, - "cannot mark inode dirty (ino=%lu): error %d loading inode block", - inode->i_ino, err); + nilfs_warn(inode->i_sb, + "cannot mark inode dirty (ino=%lu): error %d loading inode block", + inode->i_ino, err); return err; } nilfs_update_inode(inode, ibh, flags); @@ -970,8 +970,8 @@ void nilfs_dirty_inode(struct inode *inode, int flags) struct nilfs_mdt_info *mdi = NILFS_MDT(inode); if (is_bad_inode(inode)) { - nilfs_msg(inode->i_sb, KERN_WARNING, - "tried to mark bad_inode dirty. ignored."); + nilfs_warn(inode->i_sb, + "tried to mark bad_inode dirty. ignored."); dump_stack(); return; } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 4ba73dbf3e8d..07d26f61f22a 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -569,25 +569,25 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, if (unlikely(ret < 0)) { if (ret == -ENOENT) - nilfs_msg(inode->i_sb, KERN_CRIT, - "%s: invalid virtual block address (%s): ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", - __func__, vdesc->vd_flags ? "node" : "data", - (unsigned long long)vdesc->vd_ino, - (unsigned long long)vdesc->vd_cno, - (unsigned long long)vdesc->vd_offset, - (unsigned long long)vdesc->vd_blocknr, - (unsigned long long)vdesc->vd_vblocknr); + nilfs_crit(inode->i_sb, + "%s: invalid virtual block address (%s): ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", + __func__, vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); return ret; } if (unlikely(!list_empty(&bh->b_assoc_buffers))) { - nilfs_msg(inode->i_sb, KERN_CRIT, - "%s: conflicting %s buffer: ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", - __func__, vdesc->vd_flags ? "node" : "data", - (unsigned long long)vdesc->vd_ino, - (unsigned long long)vdesc->vd_cno, - (unsigned long long)vdesc->vd_offset, - (unsigned long long)vdesc->vd_blocknr, - (unsigned long long)vdesc->vd_vblocknr); + nilfs_crit(inode->i_sb, + "%s: conflicting %s buffer: ino=%llu, cno=%llu, offset=%llu, blocknr=%llu, vblocknr=%llu", + __func__, vdesc->vd_flags ? "node" : "data", + (unsigned long long)vdesc->vd_ino, + (unsigned long long)vdesc->vd_cno, + (unsigned long long)vdesc->vd_offset, + (unsigned long long)vdesc->vd_blocknr, + (unsigned long long)vdesc->vd_vblocknr); brelse(bh); return -EEXIST; } @@ -837,8 +837,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, return 0; failed: - nilfs_msg(nilfs->ns_sb, KERN_ERR, "error %d preparing GC: %s", ret, - msg); + nilfs_err(nilfs->ns_sb, "error %d preparing GC: %s", ret, msg); return ret; } @@ -947,7 +946,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); if (ret < 0) { - nilfs_msg(inode->i_sb, KERN_ERR, + nilfs_err(inode->i_sb, "error %d preparing GC: cannot read source blocks", ret); } else { diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 700870a92bc4..c0361ce45f62 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -199,7 +199,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, out_no_wait: err = -EIO; if (!buffer_uptodate(first_bh)) { - nilfs_msg(inode->i_sb, KERN_ERR, + nilfs_err(inode->i_sb, "I/O error reading meta-data file (ino=%lu, block-offset=%lu)", inode->i_ino, block); goto failed_bh; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 9fe6d4ab74f0..a6ec7961d4f5 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -272,9 +272,9 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) goto out; if (!inode->i_nlink) { - nilfs_msg(inode->i_sb, KERN_WARNING, - "deleting nonexistent file (ino=%lu), %d", - inode->i_ino, inode->i_nlink); + nilfs_warn(inode->i_sb, + "deleting nonexistent file (ino=%lu), %d", + inode->i_ino, inode->i_nlink); set_nlink(inode, 1); } err = nilfs_delete_entry(de, page); diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 42395ba52da6..f8450ee3fd06 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -289,9 +289,8 @@ static inline int nilfs_mark_inode_dirty_sync(struct inode *inode) /* super.c */ extern struct inode *nilfs_alloc_inode(struct super_block *); -extern __printf(3, 4) -void __nilfs_msg(struct super_block *sb, const char *level, - const char *fmt, ...); +__printf(2, 3) +void __nilfs_msg(struct super_block *sb, const char *fmt, ...); extern __printf(3, 4) void __nilfs_error(struct super_block *sb, const char *function, const char *fmt, ...); @@ -299,7 +298,7 @@ void __nilfs_error(struct super_block *sb, const char *function, #ifdef CONFIG_PRINTK #define nilfs_msg(sb, level, fmt, ...) \ - __nilfs_msg(sb, level, fmt, ##__VA_ARGS__) + __nilfs_msg(sb, level fmt, ##__VA_ARGS__) #define nilfs_error(sb, fmt, ...) \ __nilfs_error(sb, __func__, fmt, ##__VA_ARGS__) @@ -307,7 +306,7 @@ void __nilfs_error(struct super_block *sb, const char *function, #define nilfs_msg(sb, level, fmt, ...) \ do { \ - no_printk(fmt, ##__VA_ARGS__); \ + no_printk(level fmt, ##__VA_ARGS__); \ (void)(sb); \ } while (0) #define nilfs_error(sb, fmt, ...) \ @@ -318,6 +317,15 @@ void __nilfs_error(struct super_block *sb, const char *function, #endif /* CONFIG_PRINTK */ +#define nilfs_crit(sb, fmt, ...) \ + nilfs_msg(sb, KERN_CRIT, fmt, ##__VA_ARGS__) +#define nilfs_err(sb, fmt, ...) \ + nilfs_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__) +#define nilfs_warn(sb, fmt, ...) \ + nilfs_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__) +#define nilfs_info(sb, fmt, ...) \ + nilfs_msg(sb, KERN_INFO, fmt, ##__VA_ARGS__) + extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index d7fc8d369d89..b175f1330408 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -391,9 +391,8 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) BUG_ON(!PageLocked(page)); if (!silent) - nilfs_msg(sb, KERN_WARNING, - "discard dirty page: offset=%lld, ino=%lu", - page_offset(page), inode->i_ino); + nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu", + page_offset(page), inode->i_ino); ClearPageUptodate(page); ClearPageMappedToDisk(page); @@ -409,9 +408,9 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) do { lock_buffer(bh); if (!silent) - nilfs_msg(sb, KERN_WARNING, - "discard dirty block: blocknr=%llu, size=%zu", - (u64)bh->b_blocknr, bh->b_size); + nilfs_warn(sb, + "discard dirty block: blocknr=%llu, size=%zu", + (u64)bh->b_blocknr, bh->b_size); set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 140b663e91c7..0b453ef8fae5 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -51,7 +51,7 @@ static int nilfs_warn_segment_error(struct super_block *sb, int err) switch (err) { case NILFS_SEG_FAIL_IO: - nilfs_msg(sb, KERN_ERR, "I/O error reading segment"); + nilfs_err(sb, "I/O error reading segment"); return -EIO; case NILFS_SEG_FAIL_MAGIC: msg = "Magic number mismatch"; @@ -72,10 +72,10 @@ static int nilfs_warn_segment_error(struct super_block *sb, int err) msg = "No super root in the last segment"; break; default: - nilfs_msg(sb, KERN_ERR, "unrecognized segment error %d", err); + nilfs_err(sb, "unrecognized segment error %d", err); return -EINVAL; } - nilfs_msg(sb, KERN_WARNING, "invalid segment: %s", msg); + nilfs_warn(sb, "invalid segment: %s", msg); return -EINVAL; } @@ -543,10 +543,10 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, put_page(page); failed_inode: - nilfs_msg(sb, KERN_WARNING, - "error %d recovering data block (ino=%lu, block-offset=%llu)", - err, (unsigned long)rb->ino, - (unsigned long long)rb->blkoff); + nilfs_warn(sb, + "error %d recovering data block (ino=%lu, block-offset=%llu)", + err, (unsigned long)rb->ino, + (unsigned long long)rb->blkoff); if (!err2) err2 = err; next: @@ -669,8 +669,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, } if (nsalvaged_blocks) { - nilfs_msg(sb, KERN_INFO, "salvaged %lu blocks", - nsalvaged_blocks); + nilfs_info(sb, "salvaged %lu blocks", nsalvaged_blocks); ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; } out: @@ -681,7 +680,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, confused: err = -EINVAL; failed: - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "error %d roll-forwarding partial segment at blocknr = %llu", err, (unsigned long long)pseg_start); goto out; @@ -703,8 +702,8 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, set_buffer_dirty(bh); err = sync_dirty_buffer(bh); if (unlikely(err)) - nilfs_msg(nilfs->ns_sb, KERN_WARNING, - "buffer sync write failed during post-cleaning of recovery."); + nilfs_warn(nilfs->ns_sb, + "buffer sync write failed during post-cleaning of recovery."); brelse(bh); } @@ -739,8 +738,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root); if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, - "error %d loading the latest checkpoint", err); + nilfs_err(sb, "error %d loading the latest checkpoint", err); return err; } @@ -751,8 +749,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri); if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, - "error %d preparing segment for recovery", + nilfs_err(sb, "error %d preparing segment for recovery", err); goto failed; } @@ -766,8 +763,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, nilfs_detach_log_writer(sb); if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, - "error %d writing segment for recovery", + nilfs_err(sb, "error %d writing segment for recovery", err); goto failed; } diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 20c479b5e41b..1a8729eded8b 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -505,7 +505,7 @@ static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) } while (--segbuf->sb_nbio > 0); if (unlikely(atomic_read(&segbuf->sb_err) > 0)) { - nilfs_msg(segbuf->sb_super, KERN_ERR, + nilfs_err(segbuf->sb_super, "I/O error writing log (start-blocknr=%llu, block-count=%lu) in segment %llu", (unsigned long long)segbuf->sb_pseg_start, segbuf->sb_sum.nblocks, diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 91b58c897f92..a651e821c2de 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -158,7 +158,7 @@ static int nilfs_prepare_segment_lock(struct super_block *sb, * it is saved and will be restored on * nilfs_transaction_commit(). */ - nilfs_msg(sb, KERN_WARNING, "journal info from a different FS"); + nilfs_warn(sb, "journal info from a different FS"); save = current->journal_info; } if (!ti) { @@ -1940,9 +1940,9 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, err = nilfs_ifile_get_inode_block( ifile, ii->vfs_inode.i_ino, &ibh); if (unlikely(err)) { - nilfs_msg(sci->sc_super, KERN_WARNING, - "log writer: error %d getting inode block (ino=%lu)", - err, ii->vfs_inode.i_ino); + nilfs_warn(sci->sc_super, + "log writer: error %d getting inode block (ino=%lu)", + err, ii->vfs_inode.i_ino); return err; } spin_lock(&nilfs->ns_inode_lock); @@ -2449,7 +2449,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, if (likely(!err)) break; - nilfs_msg(sb, KERN_WARNING, "error %d cleaning segments", err); + nilfs_warn(sb, "error %d cleaning segments", err); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(sci->sc_interval); } @@ -2457,9 +2457,9 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, sci->sc_nfreesegs); if (ret) { - nilfs_msg(sb, KERN_WARNING, - "error %d on discard request, turning discards off for the device", - ret); + nilfs_warn(sb, + "error %d on discard request, turning discards off for the device", + ret); nilfs_clear_opt(nilfs, DISCARD); } } @@ -2540,9 +2540,9 @@ static int nilfs_segctor_thread(void *arg) /* start sync. */ sci->sc_task = current; wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ - nilfs_msg(sci->sc_super, KERN_INFO, - "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", - sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); + nilfs_info(sci->sc_super, + "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", + sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); spin_lock(&sci->sc_state_lock); loop: @@ -2616,8 +2616,8 @@ static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) if (IS_ERR(t)) { int err = PTR_ERR(t); - nilfs_msg(sci->sc_super, KERN_ERR, - "error %d creating segctord thread", err); + nilfs_err(sci->sc_super, "error %d creating segctord thread", + err); return err; } wait_event(sci->sc_wait_task, sci->sc_task != NULL); @@ -2727,14 +2727,14 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) nilfs_segctor_write_out(sci); if (!list_empty(&sci->sc_dirty_files)) { - nilfs_msg(sci->sc_super, KERN_WARNING, - "disposed unprocessed dirty file(s) when stopping log writer"); + nilfs_warn(sci->sc_super, + "disposed unprocessed dirty file(s) when stopping log writer"); nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); } if (!list_empty(&sci->sc_iput_queue)) { - nilfs_msg(sci->sc_super, KERN_WARNING, - "disposed unprocessed inode(s) in iput queue when stopping log writer"); + nilfs_warn(sci->sc_super, + "disposed unprocessed inode(s) in iput queue when stopping log writer"); nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1); } @@ -2812,8 +2812,8 @@ void nilfs_detach_log_writer(struct super_block *sb) spin_lock(&nilfs->ns_inode_lock); if (!list_empty(&nilfs->ns_dirty_files)) { list_splice_init(&nilfs->ns_dirty_files, &garbage_list); - nilfs_msg(sb, KERN_WARNING, - "disposed unprocessed dirty file(s) when detaching log writer"); + nilfs_warn(sb, + "disposed unprocessed dirty file(s) when detaching log writer"); } spin_unlock(&nilfs->ns_inode_lock); up_write(&nilfs->ns_segctor_sem); diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index bf3f8f05c89b..42ff67c0c14f 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -171,9 +171,9 @@ int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs, down_write(&NILFS_MDT(sufile)->mi_sem); for (seg = segnumv; seg < segnumv + nsegs; seg++) { if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) { - nilfs_msg(sufile->i_sb, KERN_WARNING, - "%s: invalid segment number: %llu", - __func__, (unsigned long long)*seg); + nilfs_warn(sufile->i_sb, + "%s: invalid segment number: %llu", + __func__, (unsigned long long)*seg); nerr++; } } @@ -230,9 +230,8 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, int ret; if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { - nilfs_msg(sufile->i_sb, KERN_WARNING, - "%s: invalid segment number: %llu", - __func__, (unsigned long long)segnum); + nilfs_warn(sufile->i_sb, "%s: invalid segment number: %llu", + __func__, (unsigned long long)segnum); return -EINVAL; } down_write(&NILFS_MDT(sufile)->mi_sem); @@ -410,9 +409,8 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { - nilfs_msg(sufile->i_sb, KERN_WARNING, - "%s: segment %llu must be clean", __func__, - (unsigned long long)segnum); + nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean", + __func__, (unsigned long long)segnum); kunmap_atomic(kaddr); return; } @@ -468,9 +466,8 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, kaddr = kmap_atomic(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_clean(su)) { - nilfs_msg(sufile->i_sb, KERN_WARNING, - "%s: segment %llu is already clean", - __func__, (unsigned long long)segnum); + nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean", + __func__, (unsigned long long)segnum); kunmap_atomic(kaddr); return; } @@ -1168,12 +1165,12 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, int err; if (susize > sb->s_blocksize) { - nilfs_msg(sb, KERN_ERR, - "too large segment usage size: %zu bytes", susize); + nilfs_err(sb, "too large segment usage size: %zu bytes", + susize); return -EINVAL; } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { - nilfs_msg(sb, KERN_ERR, - "too small segment usage size: %zu bytes", susize); + nilfs_err(sb, "too small segment usage size: %zu bytes", + susize); return -EINVAL; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 5729ee86da9a..2eee5fb1a882 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -62,19 +62,25 @@ struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); static int nilfs_remount(struct super_block *sb, int *flags, char *data); -void __nilfs_msg(struct super_block *sb, const char *level, const char *fmt, - ...) +void __nilfs_msg(struct super_block *sb, const char *fmt, ...) { struct va_format vaf; va_list args; + int level; va_start(args, fmt); - vaf.fmt = fmt; + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); vaf.va = &args; + if (sb) - printk("%sNILFS (%s): %pV\n", level, sb->s_id, &vaf); + printk("%c%cNILFS (%s): %pV\n", + KERN_SOH_ASCII, level, sb->s_id, &vaf); else - printk("%sNILFS: %pV\n", level, &vaf); + printk("%c%cNILFS: %pV\n", + KERN_SOH_ASCII, level, &vaf); + va_end(args); } @@ -106,7 +112,7 @@ static void nilfs_set_error(struct super_block *sb) * * This implements the body of nilfs_error() macro. Normally, * nilfs_error() should be used. As for sustainable errors such as a - * single-shot I/O error, nilfs_msg() should be used instead. + * single-shot I/O error, nilfs_err() should be used instead. * * Callers should not add a trailing newline since this will do it. */ @@ -178,8 +184,7 @@ static int nilfs_sync_super(struct super_block *sb, int flag) } if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, "unable to write superblock: err=%d", - err); + nilfs_err(sb, "unable to write superblock: err=%d", err); if (err == -EIO && nilfs->ns_sbh[1]) { /* * sbp[0] points to newer log than sbp[1], @@ -249,7 +254,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb, sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); } else { - nilfs_msg(sb, KERN_CRIT, "superblock broke"); + nilfs_crit(sb, "superblock broke"); return NULL; } } else if (sbp[1] && @@ -359,9 +364,9 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) offset = sb2off & (nilfs->ns_blocksize - 1); nsbh = sb_getblk(sb, newblocknr); if (!nsbh) { - nilfs_msg(sb, KERN_WARNING, - "unable to move secondary superblock to block %llu", - (unsigned long long)newblocknr); + nilfs_warn(sb, + "unable to move secondary superblock to block %llu", + (unsigned long long)newblocknr); ret = -EIO; goto out; } @@ -524,7 +529,7 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, up_read(&nilfs->ns_segctor_sem); if (unlikely(err)) { if (err == -ENOENT || err == -EINVAL) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)", (unsigned long long)cno); err = -EINVAL; @@ -622,8 +627,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) err = nilfs_ifile_count_free_inodes(root->ifile, &nmaxinodes, &nfreeinodes); if (unlikely(err)) { - nilfs_msg(sb, KERN_WARNING, - "failed to count free inodes: err=%d", err); + nilfs_warn(sb, "failed to count free inodes: err=%d", err); if (err == -ERANGE) { /* * If nilfs_palloc_count_max_entries() returns @@ -755,7 +759,7 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) break; case Opt_snapshot: if (is_remount) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "\"%s\" option is invalid for remount", p); return 0; @@ -771,8 +775,7 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) nilfs_clear_opt(nilfs, DISCARD); break; default: - nilfs_msg(sb, KERN_ERR, - "unrecognized mount option \"%s\"", p); + nilfs_err(sb, "unrecognized mount option \"%s\"", p); return 0; } } @@ -808,10 +811,10 @@ static int nilfs_setup_super(struct super_block *sb, int is_mount) mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); if (nilfs->ns_mount_state & NILFS_ERROR_FS) { - nilfs_msg(sb, KERN_WARNING, "mounting fs with errors"); + nilfs_warn(sb, "mounting fs with errors"); #if 0 } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { - nilfs_msg(sb, KERN_WARNING, "maximal mount count reached"); + nilfs_warn(sb, "maximal mount count reached"); #endif } if (!max_mnt_count) @@ -874,7 +877,7 @@ int nilfs_check_feature_compatibility(struct super_block *sb, features = le64_to_cpu(sbp->s_feature_incompat) & ~NILFS_FEATURE_INCOMPAT_SUPP; if (features) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "couldn't mount because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; @@ -882,7 +885,7 @@ int nilfs_check_feature_compatibility(struct super_block *sb, features = le64_to_cpu(sbp->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; if (!sb_rdonly(sb) && features) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "couldn't mount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); return -EINVAL; @@ -901,12 +904,12 @@ static int nilfs_get_root_dentry(struct super_block *sb, inode = nilfs_iget(sb, root, NILFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); - nilfs_msg(sb, KERN_ERR, "error %d getting root inode", ret); + nilfs_err(sb, "error %d getting root inode", ret); goto out; } if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { iput(inode); - nilfs_msg(sb, KERN_ERR, "corrupt root inode"); + nilfs_err(sb, "corrupt root inode"); ret = -EINVAL; goto out; } @@ -934,7 +937,7 @@ static int nilfs_get_root_dentry(struct super_block *sb, return ret; failed_dentry: - nilfs_msg(sb, KERN_ERR, "error %d getting root dentry", ret); + nilfs_err(sb, "error %d getting root dentry", ret); goto out; } @@ -954,7 +957,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = (ret == -ENOENT) ? -EINVAL : ret; goto out; } else if (!ret) { - nilfs_msg(s, KERN_ERR, + nilfs_err(s, "The specified checkpoint is not a snapshot (checkpoint number=%llu)", (unsigned long long)cno); ret = -EINVAL; @@ -963,7 +966,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, ret = nilfs_attach_checkpoint(s, cno, false, &root); if (ret) { - nilfs_msg(s, KERN_ERR, + nilfs_err(s, "error %d while loading snapshot (checkpoint number=%llu)", ret, (unsigned long long)cno); goto out; @@ -1060,7 +1063,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "error %d while loading last checkpoint (checkpoint number=%llu)", err, (unsigned long long)cno); goto failed_unload; @@ -1122,8 +1125,8 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { - nilfs_msg(sb, KERN_WARNING, - "couldn't remount because the filesystem is in an incomplete recovery state"); + nilfs_warn(sb, + "couldn't remount because the filesystem is in an incomplete recovery state"); goto restore_opts; } @@ -1155,9 +1158,9 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) ~NILFS_FEATURE_COMPAT_RO_SUPP; up_read(&nilfs->ns_sem); if (features) { - nilfs_msg(sb, KERN_WARNING, - "couldn't remount RDWR because of unsupported optional features (%llx)", - (unsigned long long)features); + nilfs_warn(sb, + "couldn't remount RDWR because of unsupported optional features (%llx)", + (unsigned long long)features); err = -EROFS; goto restore_opts; } @@ -1216,7 +1219,7 @@ static int nilfs_parse_snapshot_option(const char *option, return 0; parse_error: - nilfs_msg(NULL, KERN_ERR, "invalid option \"%s\": %s", option, msg); + nilfs_err(NULL, "invalid option \"%s\": %s", option, msg); return 1; } @@ -1319,7 +1322,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } else if (!sd.cno) { if (nilfs_tree_is_busy(s->s_root)) { if ((flags ^ s->s_flags) & SB_RDONLY) { - nilfs_msg(s, KERN_ERR, + nilfs_err(s, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); err = -EBUSY; diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index e60be7bb55b0..303d71430bdd 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -263,8 +263,8 @@ nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr, err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); up_read(&nilfs->ns_segctor_sem); if (err < 0) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "unable to get checkpoint stat: err=%d", err); + nilfs_err(nilfs->ns_sb, "unable to get checkpoint stat: err=%d", + err); return err; } @@ -286,8 +286,8 @@ nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr, err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); up_read(&nilfs->ns_segctor_sem); if (err < 0) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "unable to get checkpoint stat: err=%d", err); + nilfs_err(nilfs->ns_sb, "unable to get checkpoint stat: err=%d", + err); return err; } @@ -405,8 +405,8 @@ nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr, err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat); up_read(&nilfs->ns_segctor_sem); if (err < 0) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "unable to get segment stat: err=%d", err); + nilfs_err(nilfs->ns_sb, "unable to get segment stat: err=%d", + err); return err; } @@ -779,15 +779,15 @@ nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr, err = kstrtouint(skip_spaces(buf), 0, &val); if (err) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "unable to convert string: err=%d", err); + nilfs_err(nilfs->ns_sb, "unable to convert string: err=%d", + err); return err; } if (val < NILFS_SB_FREQ) { val = NILFS_SB_FREQ; - nilfs_msg(nilfs->ns_sb, KERN_WARNING, - "superblock update frequency cannot be lesser than 10 seconds"); + nilfs_warn(nilfs->ns_sb, + "superblock update frequency cannot be lesser than 10 seconds"); } down_write(&nilfs->ns_sem); @@ -990,8 +990,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL); if (unlikely(!nilfs->ns_dev_subgroups)) { err = -ENOMEM; - nilfs_msg(sb, KERN_ERR, - "unable to allocate memory for device group"); + nilfs_err(sb, "unable to allocate memory for device group"); goto failed_create_device_group; } @@ -1101,15 +1100,13 @@ int __init nilfs_sysfs_init(void) nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj); if (!nilfs_kset) { err = -ENOMEM; - nilfs_msg(NULL, KERN_ERR, - "unable to create sysfs entry: err=%d", err); + nilfs_err(NULL, "unable to create sysfs entry: err=%d", err); goto failed_sysfs_init; } err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group); if (unlikely(err)) { - nilfs_msg(NULL, KERN_ERR, - "unable to create feature group: err=%d", err); + nilfs_err(NULL, "unable to create feature group: err=%d", err); goto cleanup_sysfs_init; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 484785cdf96e..221a1cc597f0 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -183,7 +183,7 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs, nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); nilfs->ns_cno = nilfs->ns_last_cno + 1; if (nilfs->ns_segnum >= nilfs->ns_nsegments) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, + nilfs_err(nilfs->ns_sb, "pointed segment number is out of range: segnum=%llu, nsegments=%lu", (unsigned long long)nilfs->ns_segnum, nilfs->ns_nsegments); @@ -210,12 +210,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) int err; if (!valid_fs) { - nilfs_msg(sb, KERN_WARNING, "mounting unchecked fs"); + nilfs_warn(sb, "mounting unchecked fs"); if (s_flags & SB_RDONLY) { - nilfs_msg(sb, KERN_INFO, - "recovery required for readonly filesystem"); - nilfs_msg(sb, KERN_INFO, - "write access will be enabled during recovery"); + nilfs_info(sb, + "recovery required for readonly filesystem"); + nilfs_info(sb, + "write access will be enabled during recovery"); } } @@ -230,12 +230,11 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto scan_error; if (!nilfs_valid_sb(sbp[1])) { - nilfs_msg(sb, KERN_WARNING, - "unable to fall back to spare super block"); + nilfs_warn(sb, + "unable to fall back to spare super block"); goto scan_error; } - nilfs_msg(sb, KERN_INFO, - "trying rollback from an earlier position"); + nilfs_info(sb, "trying rollback from an earlier position"); /* * restore super block with its spare and reconfigure @@ -248,9 +247,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) /* verify consistency between two super blocks */ blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size); if (blocksize != nilfs->ns_blocksize) { - nilfs_msg(sb, KERN_WARNING, - "blocksize differs between two super blocks (%d != %d)", - blocksize, nilfs->ns_blocksize); + nilfs_warn(sb, + "blocksize differs between two super blocks (%d != %d)", + blocksize, nilfs->ns_blocksize); goto scan_error; } @@ -269,8 +268,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root); if (unlikely(err)) { - nilfs_msg(sb, KERN_ERR, "error %d while loading super root", - err); + nilfs_err(sb, "error %d while loading super root", err); goto failed; } @@ -281,28 +279,28 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) __u64 features; if (nilfs_test_opt(nilfs, NORECOVERY)) { - nilfs_msg(sb, KERN_INFO, - "norecovery option specified, skipping roll-forward recovery"); + nilfs_info(sb, + "norecovery option specified, skipping roll-forward recovery"); goto skip_recovery; } features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & ~NILFS_FEATURE_COMPAT_RO_SUPP; if (features) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "couldn't proceed with recovery because of unsupported optional features (%llx)", (unsigned long long)features); err = -EROFS; goto failed_unload; } if (really_read_only) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "write access unavailable, cannot proceed"); err = -EROFS; goto failed_unload; } sb->s_flags &= ~SB_RDONLY; } else if (nilfs_test_opt(nilfs, NORECOVERY)) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "recovery cancelled because norecovery option was specified for a read/write mount"); err = -EINVAL; goto failed_unload; @@ -318,12 +316,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) up_write(&nilfs->ns_sem); if (err) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "error %d updating super block. recovery unfinished.", err); goto failed_unload; } - nilfs_msg(sb, KERN_INFO, "recovery complete"); + nilfs_info(sb, "recovery complete"); skip_recovery: nilfs_clear_recovery_info(&ri); @@ -331,7 +329,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) return 0; scan_error: - nilfs_msg(sb, KERN_ERR, "error %d while searching super root", err); + nilfs_err(sb, "error %d while searching super root", err); goto failed; failed_unload: @@ -378,7 +376,7 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, + nilfs_err(nilfs->ns_sb, "unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).", le32_to_cpu(sbp->s_rev_level), le16_to_cpu(sbp->s_minor_rev_level), @@ -391,13 +389,11 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); if (nilfs->ns_inode_size > nilfs->ns_blocksize) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "too large inode size: %d bytes", + nilfs_err(nilfs->ns_sb, "too large inode size: %d bytes", nilfs->ns_inode_size); return -EINVAL; } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "too small inode size: %d bytes", + nilfs_err(nilfs->ns_sb, "too small inode size: %d bytes", nilfs->ns_inode_size); return -EINVAL; } @@ -406,8 +402,7 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, - "too short segment: %lu blocks", + nilfs_err(nilfs->ns_sb, "too short segment: %lu blocks", nilfs->ns_blocks_per_segment); return -EINVAL; } @@ -417,7 +412,7 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, le32_to_cpu(sbp->s_r_segments_percentage); if (nilfs->ns_r_segments_percentage < 1 || nilfs->ns_r_segments_percentage > 99) { - nilfs_msg(nilfs->ns_sb, KERN_ERR, + nilfs_err(nilfs->ns_sb, "invalid reserved segments percentage: %lu", nilfs->ns_r_segments_percentage); return -EINVAL; @@ -503,16 +498,16 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, if (!sbp[0]) { if (!sbp[1]) { - nilfs_msg(sb, KERN_ERR, "unable to read superblock"); + nilfs_err(sb, "unable to read superblock"); return -EIO; } - nilfs_msg(sb, KERN_WARNING, - "unable to read primary superblock (blocksize = %d)", - blocksize); + nilfs_warn(sb, + "unable to read primary superblock (blocksize = %d)", + blocksize); } else if (!sbp[1]) { - nilfs_msg(sb, KERN_WARNING, - "unable to read secondary superblock (blocksize = %d)", - blocksize); + nilfs_warn(sb, + "unable to read secondary superblock (blocksize = %d)", + blocksize); } /* @@ -534,14 +529,14 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, } if (!valid[swp]) { nilfs_release_super_block(nilfs); - nilfs_msg(sb, KERN_ERR, "couldn't find nilfs on the device"); + nilfs_err(sb, "couldn't find nilfs on the device"); return -EINVAL; } if (!valid[!swp]) - nilfs_msg(sb, KERN_WARNING, - "broken superblock, retrying with spare superblock (blocksize = %d)", - blocksize); + nilfs_warn(sb, + "broken superblock, retrying with spare superblock (blocksize = %d)", + blocksize); if (swp) nilfs_swap_super_block(nilfs); @@ -575,7 +570,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { - nilfs_msg(sb, KERN_ERR, "unable to set blocksize"); + nilfs_err(sb, "unable to set blocksize"); err = -EINVAL; goto out; } @@ -594,7 +589,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); if (blocksize < NILFS_MIN_BLOCK_SIZE || blocksize > NILFS_MAX_BLOCK_SIZE) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "couldn't mount because of unsupported filesystem blocksize %d", blocksize); err = -EINVAL; @@ -604,7 +599,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) int hw_blocksize = bdev_logical_block_size(sb->s_bdev); if (blocksize < hw_blocksize) { - nilfs_msg(sb, KERN_ERR, + nilfs_err(sb, "blocksize %d too small for device (sector-size = %d)", blocksize, hw_blocksize); err = -EINVAL; diff --git a/fs/open.c b/fs/open.c index c80e9f497e9b..9af548fb841b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -779,12 +779,6 @@ static int do_dentry_open(struct file *f, return 0; } - /* Any file opened for execve()/uselib() has to be a regular file. */ - if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) { - error = -EACCES; - goto cleanup_file; - } - if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) diff --git a/fs/proc/base.c b/fs/proc/base.c index a333caeca291..617db4e0faa0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -551,8 +551,17 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, { unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; + long badness; + + badness = oom_badness(task, totalpages); + /* + * Special case OOM_SCORE_ADJ_MIN for all others scale the + * badness value into [0, 2000] range which we have been + * exporting for a long time so userspace might depend on it. + */ + if (badness != LONG_MIN) + points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3; - points = oom_badness(task, totalpages) * 1000 / totalpages; seq_printf(m, "%lu\n", points); return 0; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dbda4499a859..5066b0251ed8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -786,7 +786,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); - SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); + SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", mss->private_hugetlb >> 10, 7); @@ -816,7 +816,7 @@ static int show_smap(struct seq_file *m, void *v) __show_smap(m, &mss, false); - seq_printf(m, "THPeligible: %d\n", + seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); if (arch_pkeys_enabled()) diff --git a/fs/signalfd.c b/fs/signalfd.c index 44b6845b071c..5b78719be445 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -314,9 +314,10 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, { sigset_t mask; - if (sizemask != sizeof(sigset_t) || - copy_from_user(&mask, user_mask, sizeof(mask))) + if (sizemask != sizeof(sigset_t)) return -EINVAL; + if (copy_from_user(&mask, user_mask, sizeof(mask))) + return -EFAULT; return do_signalfd4(ufd, &mask, flags); } @@ -325,9 +326,10 @@ SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, { sigset_t mask; - if (sizemask != sizeof(sigset_t) || - copy_from_user(&mask, user_mask, sizeof(mask))) + if (sizemask != sizeof(sigset_t)) return -EINVAL; + if (copy_from_user(&mask, user_mask, sizeof(mask))) + return -EFAULT; return do_signalfd4(ufd, &mask, 0); } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 1da0be667409..e3b69fb280e8 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -101,7 +101,7 @@ static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gene struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct inode *inode; - if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg) + if (ino < UFS_ROOTINO || ino > (u64)uspi->s_ncg * uspi->s_ipg) return ERR_PTR(-ESTALE); inode = ufs_iget(sb, ino); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 4df87546bd40..ae9aaf1f34bf 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -600,7 +600,7 @@ xfs_sb_quota_to_disk( * disk. If neither are active, we should NULL the inode. * * In all cases, the separate pquotino must remain 0 because it - * it beyond the "end" of the valid non-pquotino superblock. + * is beyond the "end" of the valid non-pquotino superblock. */ if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index e380bd1a9bfc..50f922cad91a 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -44,7 +44,7 @@ xfs_attr_shortform_compare(const void *a, const void *b) /* * Copy out entries of shortform attribute lists for attr_list(). * Shortform attribute lists are not stored in hashval sorted order. - * If the output buffer is not large enough to hold them all, then we + * If the output buffer is not large enough to hold them all, then * we have to calculate each entries' hashvalue and sort them before * we can begin returning them to the user. */ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 5bb6f22cc11a..408d1b572d3f 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -127,7 +127,7 @@ xfs_buf_item_size_segment( * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged * in a single iovec. * - * Discontiguous buffers need a format structure per region that that is being + * Discontiguous buffers need a format structure per region that is being * logged. This makes the changes in the buffer appear to log recovery as though * they came from separate buffers, just like would occur if multiple buffers * were used instead of a single discontiguous buffer. This enables diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d480f11e6b00..8f0457d67d77 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -948,7 +948,7 @@ xlog_recover_buf_commit_pass2( * or inode_cluster_size bytes, whichever is bigger. The inode * buffers in the log can be a different size if the log was generated * by an older kernel using unclustered inode buffers or a newer kernel - * running with a different inode cluster size. Regardless, if the + * running with a different inode cluster size. Regardless, if * the inode buffer size isn't max(blocksize, inode_cluster_size) * for *our* value of inode_cluster_size, then we need to keep * the buffer out of the buffer cache so that the buffer won't diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 04dc2be19c3a..bcd73b9c2994 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -807,7 +807,7 @@ xfs_qm_dqget_checks( } /* - * Given the file system, id, and type (UDQUOT/GDQUOT), return a a locked + * Given the file system, id, and type (UDQUOT/GDQUOT), return a locked * dquot, doing an allocation (if requested) as needed. */ int diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 5a4b0119143a..465fd9e048d4 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -56,7 +56,7 @@ xfs_fs_encode_fh( fileid_type = FILEID_INO32_GEN_PARENT; /* - * If the the filesystem may contain 64bit inode numbers, we need + * If the filesystem may contain 64bit inode numbers, we need * to use larger file handles that can represent them. * * While we only allocate inodes that do not fit into 32 bits any diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 407d6299606d..c06129cffba9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -451,7 +451,7 @@ xfs_lock_inodes( /* * Currently supports between 2 and 5 inodes with exclusive locking. We * support an arbitrary depth of locking here, but absolute limits on - * inodes depend on the the type of locking and the limits placed by + * inodes depend on the type of locking and the limits placed by * lockdep annotations in xfs_lock_inumorder. These are all checked by * the asserts. */ @@ -3105,7 +3105,7 @@ out_trans_abort: /* * xfs_rename_alloc_whiteout() * - * Return a referenced, unlinked, unlocked inode that that can be used as a + * Return a referenced, unlinked, unlocked inode that can be used as a * whiteout in a rename transaction. We use a tmpfile inode here so that if we * crash between allocating the inode and linking it into the rename transaction * recovery will free the inode and we won't leak it. diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 895f61b2b4f0..6c65938cee1c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -191,7 +191,7 @@ xfs_inode_item_format_data_fork( ip->i_df.if_bytes > 0) { /* * Round i_bytes up to a word boundary. - * The underlying memory is guaranteed to + * The underlying memory is guaranteed * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); @@ -275,7 +275,7 @@ xfs_inode_item_format_attr_fork( ip->i_afp->if_bytes > 0) { /* * Round i_bytes up to a word boundary. - * The underlying memory is guaranteed to + * The underlying memory is guaranteed * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0e3f62cde375..3abb8b9d6f4c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -865,7 +865,7 @@ xfs_buffered_write_iomap_begin( } /* - * Search the data fork fork first to look up our source mapping. We + * Search the data fork first to look up our source mapping. We * always need the data fork map, as we have to return it to the * iomap code so that the higher level write code can read data in to * perform read-modify-write cycles for unaligned writes. diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 56c32eecffea..b0ef071b3cb5 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -239,7 +239,7 @@ xfs_cil_prepare_item( * this CIL context and so we need to pin it. If we are replacing the * old_lv, then remove the space it accounts for and make it the shadow * buffer for later freeing. In both cases we are now switching to the - * shadow buffer, so update the the pointer to it appropriately. + * shadow buffer, so update the pointer to it appropriately. */ if (!old_lv) { if (lv->lv_item->li_ops->iop_pin) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 52a65a74208f..e2ec91b2d0f4 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1100,7 +1100,7 @@ xlog_verify_head( * * Note that xlog_find_tail() clears the blocks at the new head * (i.e., the records with invalid CRC) if the cycle number - * matches the the current cycle. + * matches the current cycle. */ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, buffer, rhead_blk, rhead, wrapped); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7b2c72bc2858..ca93b6488377 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -485,7 +485,7 @@ xfs_cui_item_recover( * transaction. Normally, any work that needs to be deferred * gets attached to the same defer_ops that scheduled the * refcount update. However, we're in log recovery here, so we - * we use the passed in defer_ops and to finish up any work that + * use the passed in defer_ops and to finish up any work that * doesn't fit. We need to reserve enough blocks to handle a * full btree split on either end of the refcount range. */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index aac83f9d6107..16098dc42add 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -721,7 +721,7 @@ xfs_reflink_end_cow( * repeatedly cycles the ILOCK to allocate one transaction per remapped * extent. * - * If we're being called by writeback then the the pages will still + * If we're being called by writeback then the pages will still * have PageWriteback set, which prevents races with reflink remapping * and truncate. Reflink remapping prevents races with writeback by * taking the iolock and mmaplock before flushing the pages and diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index e9f810fc6731..43585850f154 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -32,9 +32,11 @@ xfs_sysfs_init( struct xfs_kobj *parent_kobj, const char *name) { + struct kobject *parent; + + parent = parent_kobj ? &parent_kobj->kobject : NULL; init_completion(&kobj->complete); - return kobject_init_and_add(&kobj->kobject, ktype, - &parent_kobj->kobject, "%s", name); + return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); } static inline void diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0c783d339675..dbb69b4bf3ed 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -480,7 +480,7 @@ xfsaild_push( * inode buffer is locked because we already pushed the * updates to it as part of inode clustering. * - * We do not want to to stop flushing just because lots + * We do not want to stop flushing just because lots * of items are already being flushed, but we need to * re-try the flushing relatively soon if most of the * AIL is being flushed. @@ -515,7 +515,7 @@ xfsaild_push( /* * Are there too many items we can't do anything with? * - * If we we are skipping too many items because we can't flush + * If we are skipping too many items because we can't flush * them or they are already being flushed, we back off and * given them time to complete whatever operation is being * done. i.e. remove pressure from the AIL while we can't make diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 30a3aab312e6..dabf8cb7203b 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -163,7 +163,7 @@ static inline u16 readw(const volatile void __iomem *addr) u16 val; __io_br(); - val = __le16_to_cpu(__raw_readw(addr)); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); __io_ar(val); return val; } @@ -176,7 +176,7 @@ static inline u32 readl(const volatile void __iomem *addr) u32 val; __io_br(); - val = __le32_to_cpu(__raw_readl(addr)); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); __io_ar(val); return val; } @@ -212,7 +212,7 @@ static inline void writeb(u8 value, volatile void __iomem *addr) static inline void writew(u16 value, volatile void __iomem *addr) { __io_bw(); - __raw_writew(cpu_to_le16(value), addr); + __raw_writew((u16 __force)cpu_to_le16(value), addr); __io_aw(); } #endif @@ -222,7 +222,7 @@ static inline void writew(u16 value, volatile void __iomem *addr) static inline void writel(u32 value, volatile void __iomem *addr) { __io_bw(); - __raw_writel(__cpu_to_le32(value), addr); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); __io_aw(); } #endif @@ -474,7 +474,7 @@ static inline u16 _inw(unsigned long addr) u16 val; __io_pbr(); - val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr)); + val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); __io_par(val); return val; } @@ -487,7 +487,7 @@ static inline u32 _inl(unsigned long addr) u32 val; __io_pbr(); - val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr)); + val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); __io_par(val); return val; } @@ -508,7 +508,7 @@ static inline void _outb(u8 value, unsigned long addr) static inline void _outw(u16 value, unsigned long addr) { __io_pbw(); - __raw_writew(cpu_to_le16(value), PCI_IOBASE + addr); + __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); __io_paw(); } #endif @@ -518,7 +518,7 @@ static inline void _outw(u16 value, unsigned long addr) static inline void _outl(u32 value, unsigned long addr) { __io_pbw(); - __raw_writel(cpu_to_le32(value), PCI_IOBASE + addr); + __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); __io_paw(); } #endif diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 1c4fd950f091..c5edc5e08b94 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -168,7 +168,6 @@ void hyperv_report_panic_msg(phys_addr_t pa, size_t size); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); void hyperv_cleanup(void); -void hv_setup_sched_clock(void *sched_clock); #else /* CONFIG_HYPERV */ static inline bool hv_is_hyperv_initialized(void) { return false; } static inline bool hv_is_hibernation_supported(void) { return false; } diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 66397ed10acb..d16302d3eb59 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -60,8 +60,8 @@ extern __visible const void __nosave_begin, __nosave_end; /* Function descriptor handling (if any). Override in asm/sections.h */ #ifndef dereference_function_descriptor -#define dereference_function_descriptor(p) (p) -#define dereference_kernel_function_descriptor(p) (p) +#define dereference_function_descriptor(p) ((void *)(p)) +#define dereference_kernel_function_descriptor(p) ((void *)(p)) #endif /* random extra sections (if any). Override diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index e935318804f8..ba68ee4dabfa 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -86,8 +86,8 @@ static inline void set_fs(mm_segment_t fs) } #endif -#ifndef segment_eq -#define segment_eq(a, b) ((a).seg == (b).seg) +#ifndef uaccess_kernel +#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) #endif #define access_ok(addr, size) __access_ok((unsigned long)(addr),(size)) diff --git a/include/clocksource/timer-ti-dm.h b/include/clocksource/timer-ti-dm.h index 531ca87fcd08..4c61dade8835 100644 --- a/include/clocksource/timer-ti-dm.h +++ b/include/clocksource/timer-ti-dm.h @@ -1,7 +1,7 @@ /* * OMAP Dual-Mode Timers * - * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2010 Texas Instruments Incorporated - https://www.ti.com/ * Tarun Kanti DebBarma <[email protected]> * Thara Gopinath <[email protected]> * diff --git a/include/dt-bindings/clock/actions,s500-cmu.h b/include/dt-bindings/clock/actions,s500-cmu.h index 030981cd2d56..a250a52a6192 100644 --- a/include/dt-bindings/clock/actions,s500-cmu.h +++ b/include/dt-bindings/clock/actions,s500-cmu.h @@ -72,7 +72,12 @@ #define CLK_NAND 52 #define CLK_ECC 53 #define CLK_RMII_REF 54 +#define CLK_GPIO 55 -#define CLK_NR_CLKS (CLK_RMII_REF + 1) +/* system clock (part 2) */ +#define CLK_APB 56 +#define CLK_DMAC 57 + +#define CLK_NR_CLKS (CLK_DMAC + 1) #endif /* __DT_BINDINGS_CLOCK_S500_CMU_H */ diff --git a/include/dt-bindings/clock/jz4780-cgu.h b/include/dt-bindings/clock/jz4780-cgu.h index 1859ce53ee38..85cf8eb5081b 100644 --- a/include/dt-bindings/clock/jz4780-cgu.h +++ b/include/dt-bindings/clock/jz4780-cgu.h @@ -12,78 +12,80 @@ #ifndef __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ #define __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ -#define JZ4780_CLK_EXCLK 0 -#define JZ4780_CLK_RTCLK 1 -#define JZ4780_CLK_APLL 2 -#define JZ4780_CLK_MPLL 3 -#define JZ4780_CLK_EPLL 4 -#define JZ4780_CLK_VPLL 5 -#define JZ4780_CLK_OTGPHY 6 -#define JZ4780_CLK_SCLKA 7 -#define JZ4780_CLK_CPUMUX 8 -#define JZ4780_CLK_CPU 9 -#define JZ4780_CLK_L2CACHE 10 -#define JZ4780_CLK_AHB0 11 -#define JZ4780_CLK_AHB2PMUX 12 -#define JZ4780_CLK_AHB2 13 -#define JZ4780_CLK_PCLK 14 -#define JZ4780_CLK_DDR 15 -#define JZ4780_CLK_VPU 16 -#define JZ4780_CLK_I2SPLL 17 -#define JZ4780_CLK_I2S 18 +#define JZ4780_CLK_EXCLK 0 +#define JZ4780_CLK_RTCLK 1 +#define JZ4780_CLK_APLL 2 +#define JZ4780_CLK_MPLL 3 +#define JZ4780_CLK_EPLL 4 +#define JZ4780_CLK_VPLL 5 +#define JZ4780_CLK_OTGPHY 6 +#define JZ4780_CLK_SCLKA 7 +#define JZ4780_CLK_CPUMUX 8 +#define JZ4780_CLK_CPU 9 +#define JZ4780_CLK_L2CACHE 10 +#define JZ4780_CLK_AHB0 11 +#define JZ4780_CLK_AHB2PMUX 12 +#define JZ4780_CLK_AHB2 13 +#define JZ4780_CLK_PCLK 14 +#define JZ4780_CLK_DDR 15 +#define JZ4780_CLK_VPU 16 +#define JZ4780_CLK_I2SPLL 17 +#define JZ4780_CLK_I2S 18 #define JZ4780_CLK_LCD0PIXCLK 19 #define JZ4780_CLK_LCD1PIXCLK 20 -#define JZ4780_CLK_MSCMUX 21 -#define JZ4780_CLK_MSC0 22 -#define JZ4780_CLK_MSC1 23 -#define JZ4780_CLK_MSC2 24 -#define JZ4780_CLK_UHC 25 -#define JZ4780_CLK_SSIPLL 26 -#define JZ4780_CLK_SSI 27 -#define JZ4780_CLK_CIMMCLK 28 -#define JZ4780_CLK_PCMPLL 29 -#define JZ4780_CLK_PCM 30 -#define JZ4780_CLK_GPU 31 -#define JZ4780_CLK_HDMI 32 -#define JZ4780_CLK_BCH 33 -#define JZ4780_CLK_NEMC 34 -#define JZ4780_CLK_OTG0 35 -#define JZ4780_CLK_SSI0 36 -#define JZ4780_CLK_SMB0 37 -#define JZ4780_CLK_SMB1 38 -#define JZ4780_CLK_SCC 39 -#define JZ4780_CLK_AIC 40 -#define JZ4780_CLK_TSSI0 41 -#define JZ4780_CLK_OWI 42 -#define JZ4780_CLK_KBC 43 -#define JZ4780_CLK_SADC 44 -#define JZ4780_CLK_UART0 45 -#define JZ4780_CLK_UART1 46 -#define JZ4780_CLK_UART2 47 -#define JZ4780_CLK_UART3 48 -#define JZ4780_CLK_SSI1 49 -#define JZ4780_CLK_SSI2 50 -#define JZ4780_CLK_PDMA 51 -#define JZ4780_CLK_GPS 52 -#define JZ4780_CLK_MAC 53 -#define JZ4780_CLK_SMB2 54 -#define JZ4780_CLK_CIM 55 -#define JZ4780_CLK_LCD 56 -#define JZ4780_CLK_TVE 57 -#define JZ4780_CLK_IPU 58 -#define JZ4780_CLK_DDR0 59 -#define JZ4780_CLK_DDR1 60 -#define JZ4780_CLK_SMB3 61 -#define JZ4780_CLK_TSSI1 62 -#define JZ4780_CLK_COMPRESS 63 -#define JZ4780_CLK_AIC1 64 -#define JZ4780_CLK_GPVLC 65 -#define JZ4780_CLK_OTG1 66 -#define JZ4780_CLK_UART4 67 -#define JZ4780_CLK_AHBMON 68 -#define JZ4780_CLK_SMB4 69 -#define JZ4780_CLK_DES 70 -#define JZ4780_CLK_X2D 71 -#define JZ4780_CLK_CORE1 72 +#define JZ4780_CLK_MSCMUX 21 +#define JZ4780_CLK_MSC0 22 +#define JZ4780_CLK_MSC1 23 +#define JZ4780_CLK_MSC2 24 +#define JZ4780_CLK_UHC 25 +#define JZ4780_CLK_SSIPLL 26 +#define JZ4780_CLK_SSI 27 +#define JZ4780_CLK_CIMMCLK 28 +#define JZ4780_CLK_PCMPLL 29 +#define JZ4780_CLK_PCM 30 +#define JZ4780_CLK_GPU 31 +#define JZ4780_CLK_HDMI 32 +#define JZ4780_CLK_BCH 33 +#define JZ4780_CLK_NEMC 34 +#define JZ4780_CLK_OTG0 35 +#define JZ4780_CLK_SSI0 36 +#define JZ4780_CLK_SMB0 37 +#define JZ4780_CLK_SMB1 38 +#define JZ4780_CLK_SCC 39 +#define JZ4780_CLK_AIC 40 +#define JZ4780_CLK_TSSI0 41 +#define JZ4780_CLK_OWI 42 +#define JZ4780_CLK_KBC 43 +#define JZ4780_CLK_SADC 44 +#define JZ4780_CLK_UART0 45 +#define JZ4780_CLK_UART1 46 +#define JZ4780_CLK_UART2 47 +#define JZ4780_CLK_UART3 48 +#define JZ4780_CLK_SSI1 49 +#define JZ4780_CLK_SSI2 50 +#define JZ4780_CLK_PDMA 51 +#define JZ4780_CLK_GPS 52 +#define JZ4780_CLK_MAC 53 +#define JZ4780_CLK_SMB2 54 +#define JZ4780_CLK_CIM 55 +#define JZ4780_CLK_LCD 56 +#define JZ4780_CLK_TVE 57 +#define JZ4780_CLK_IPU 58 +#define JZ4780_CLK_DDR0 59 +#define JZ4780_CLK_DDR1 60 +#define JZ4780_CLK_SMB3 61 +#define JZ4780_CLK_TSSI1 62 +#define JZ4780_CLK_COMPRESS 63 +#define JZ4780_CLK_AIC1 64 +#define JZ4780_CLK_GPVLC 65 +#define JZ4780_CLK_OTG1 66 +#define JZ4780_CLK_UART4 67 +#define JZ4780_CLK_AHBMON 68 +#define JZ4780_CLK_SMB4 69 +#define JZ4780_CLK_DES 70 +#define JZ4780_CLK_X2D 71 +#define JZ4780_CLK_CORE1 72 +#define JZ4780_CLK_EXCLK_DIV512 73 +#define JZ4780_CLK_RTC 74 #endif /* __DT_BINDINGS_CLOCK_JZ4780_CGU_H__ */ diff --git a/include/dt-bindings/clock/qcom,gcc-sc7180.h b/include/dt-bindings/clock/qcom,gcc-sc7180.h index 992b67b7e5e4..bdf43adc7897 100644 --- a/include/dt-bindings/clock/qcom,gcc-sc7180.h +++ b/include/dt-bindings/clock/qcom,gcc-sc7180.h @@ -138,6 +138,7 @@ #define GCC_MSS_Q6_MEMNOC_AXI_CLK 128 #define GCC_MSS_SNOC_AXI_CLK 129 #define GCC_SEC_CTRL_CLK_SRC 130 +#define GCC_LPASS_CFG_NOC_SWAY_CLK 131 /* GCC resets */ #define GCC_QUSB2PHY_PRIM_BCR 0 diff --git a/include/dt-bindings/clock/qcom,gcc-sdm660.h b/include/dt-bindings/clock/qcom,gcc-sdm660.h index 468302282913..df8a6f3d367e 100644 --- a/include/dt-bindings/clock/qcom,gcc-sdm660.h +++ b/include/dt-bindings/clock/qcom,gcc-sdm660.h @@ -152,5 +152,6 @@ #define GCC_USB_20_BCR 6 #define GCC_USB_30_BCR 7 #define GCC_USB_PHY_CFG_AHB2PHY_BCR 8 +#define GCC_MSS_RESTART 9 #endif diff --git a/include/dt-bindings/clock/qcom,gpucc-sm8150.h b/include/dt-bindings/clock/qcom,gpucc-sm8150.h new file mode 100644 index 000000000000..c5b70aad7770 --- /dev/null +++ b/include/dt-bindings/clock/qcom,gpucc-sm8150.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SM8150_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SM8150_H + +/* GPU_CC clock registers */ +#define GPU_CC_AHB_CLK 0 +#define GPU_CC_CRC_AHB_CLK 1 +#define GPU_CC_CX_APB_CLK 2 +#define GPU_CC_CX_GMU_CLK 3 +#define GPU_CC_CX_SNOC_DVM_CLK 4 +#define GPU_CC_CXO_AON_CLK 5 +#define GPU_CC_CXO_CLK 6 +#define GPU_CC_GMU_CLK_SRC 7 +#define GPU_CC_GX_GMU_CLK 8 +#define GPU_CC_PLL1 9 + +/* GPU_CC Resets */ +#define GPUCC_GPU_CC_CX_BCR 0 +#define GPUCC_GPU_CC_GFX3D_AON_BCR 1 +#define GPUCC_GPU_CC_GMU_BCR 2 +#define GPUCC_GPU_CC_GX_BCR 3 +#define GPUCC_GPU_CC_SPDM_BCR 4 +#define GPUCC_GPU_CC_XO_BCR 5 + +/* GPU_CC GDSCRs */ +#define GPU_CX_GDSC 0 +#define GPU_GX_GDSC 1 + +#endif diff --git a/include/dt-bindings/clock/qcom,gpucc-sm8250.h b/include/dt-bindings/clock/qcom,gpucc-sm8250.h new file mode 100644 index 000000000000..dc8e387c48ad --- /dev/null +++ b/include/dt-bindings/clock/qcom,gpucc-sm8250.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_SM8250_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_SM8250_H + +/* GPU_CC clock registers */ +#define GPU_CC_AHB_CLK 0 +#define GPU_CC_CRC_AHB_CLK 1 +#define GPU_CC_CX_APB_CLK 2 +#define GPU_CC_CX_GMU_CLK 3 +#define GPU_CC_CX_SNOC_DVM_CLK 4 +#define GPU_CC_CXO_AON_CLK 5 +#define GPU_CC_CXO_CLK 6 +#define GPU_CC_GMU_CLK_SRC 7 +#define GPU_CC_GX_GMU_CLK 8 +#define GPU_CC_PLL1 9 +#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK 10 + +/* GPU_CC Resets */ +#define GPUCC_GPU_CC_ACD_BCR 0 +#define GPUCC_GPU_CC_CX_BCR 1 +#define GPUCC_GPU_CC_GFX3D_AON_BCR 2 +#define GPUCC_GPU_CC_GMU_BCR 3 +#define GPUCC_GPU_CC_GX_BCR 4 +#define GPUCC_GPU_CC_XO_BCR 5 + +/* GPU_CC GDSCRs */ +#define GPU_CX_GDSC 0 +#define GPU_GX_GDSC 1 + +#endif diff --git a/include/dt-bindings/clock/qcom,lpasscorecc-sc7180.h b/include/dt-bindings/clock/qcom,lpasscorecc-sc7180.h new file mode 100644 index 000000000000..a55d01db2b20 --- /dev/null +++ b/include/dt-bindings/clock/qcom,lpasscorecc-sc7180.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, The Linux Foundation. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_LPASS_CORE_CC_SC7180_H +#define _DT_BINDINGS_CLK_QCOM_LPASS_CORE_CC_SC7180_H + +/* LPASS_CORE_CC clocks */ +#define LPASS_LPAAUDIO_DIG_PLL 0 +#define LPASS_LPAAUDIO_DIG_PLL_OUT_ODD 1 +#define CORE_CLK_SRC 2 +#define EXT_MCLK0_CLK_SRC 3 +#define LPAIF_PRI_CLK_SRC 4 +#define LPAIF_SEC_CLK_SRC 5 +#define LPASS_AUDIO_CORE_CORE_CLK 6 +#define LPASS_AUDIO_CORE_EXT_MCLK0_CLK 7 +#define LPASS_AUDIO_CORE_LPAIF_PRI_IBIT_CLK 8 +#define LPASS_AUDIO_CORE_LPAIF_SEC_IBIT_CLK 9 +#define LPASS_AUDIO_CORE_SYSNOC_MPORT_CORE_CLK 10 + +/* LPASS Core power domains */ +#define LPASS_CORE_HM_GDSCR 0 + +/* LPASS Audio power domains */ +#define LPASS_AUDIO_HM_GDSCR 0 +#define LPASS_PDC_HM_GDSCR 1 + +#endif diff --git a/include/dt-bindings/clock/x1000-cgu.h b/include/dt-bindings/clock/x1000-cgu.h index 0367c8c02e16..f187e0719fd3 100644 --- a/include/dt-bindings/clock/x1000-cgu.h +++ b/include/dt-bindings/clock/x1000-cgu.h @@ -48,5 +48,7 @@ #define X1000_CLK_SSI 33 #define X1000_CLK_OST 34 #define X1000_CLK_PDMA 35 +#define X1000_CLK_EXCLK_DIV512 36 +#define X1000_CLK_RTC 37 #endif /* __DT_BINDINGS_CLOCK_X1000_CGU_H__ */ diff --git a/include/dt-bindings/clock/x1830-cgu.h b/include/dt-bindings/clock/x1830-cgu.h index 801e1d09c881..88455376a950 100644 --- a/include/dt-bindings/clock/x1830-cgu.h +++ b/include/dt-bindings/clock/x1830-cgu.h @@ -51,5 +51,7 @@ #define X1830_CLK_TCU 36 #define X1830_CLK_DTRNG 37 #define X1830_CLK_OST 38 +#define X1830_CLK_EXCLK_DIV512 39 +#define X1830_CLK_RTC 40 #endif /* __DT_BINDINGS_CLOCK_X1830_CGU_H__ */ diff --git a/include/dt-bindings/reset/actions,s500-reset.h b/include/dt-bindings/reset/actions,s500-reset.h new file mode 100644 index 000000000000..f5d94176d10b --- /dev/null +++ b/include/dt-bindings/reset/actions,s500-reset.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Device Tree binding constants for Actions Semi S500 Reset Management Unit + * + * Copyright (c) 2014 Actions Semi Inc. + * Copyright (c) 2020 Cristian Ciocaltea <[email protected]> + */ + +#ifndef __DT_BINDINGS_ACTIONS_S500_RESET_H +#define __DT_BINDINGS_ACTIONS_S500_RESET_H + +#define RESET_DMAC 0 +#define RESET_NORIF 1 +#define RESET_DDR 2 +#define RESET_NANDC 3 +#define RESET_SD0 4 +#define RESET_SD1 5 +#define RESET_PCM1 6 +#define RESET_DE 7 +#define RESET_LCD 8 +#define RESET_SD2 9 +#define RESET_DSI 10 +#define RESET_CSI 11 +#define RESET_BISP 12 +#define RESET_KEY 13 +#define RESET_GPIO 14 +#define RESET_AUDIO 15 +#define RESET_PCM0 16 +#define RESET_VDE 17 +#define RESET_VCE 18 +#define RESET_GPU3D 19 +#define RESET_NIC301 20 +#define RESET_LENS 21 +#define RESET_PERIPHRESET 22 +#define RESET_USB2_0 23 +#define RESET_TVOUT 24 +#define RESET_HDMI 25 +#define RESET_HDCP2TX 26 +#define RESET_UART6 27 +#define RESET_UART0 28 +#define RESET_UART1 29 +#define RESET_UART2 30 +#define RESET_SPI0 31 +#define RESET_SPI1 32 +#define RESET_SPI2 33 +#define RESET_SPI3 34 +#define RESET_I2C0 35 +#define RESET_I2C1 36 +#define RESET_USB3 37 +#define RESET_UART3 38 +#define RESET_UART4 39 +#define RESET_UART5 40 +#define RESET_I2C2 41 +#define RESET_I2C3 42 +#define RESET_ETHERNET 43 +#define RESET_CHIPID 44 +#define RESET_USB2_1 45 +#define RESET_WD0RESET 46 +#define RESET_WD1RESET 47 +#define RESET_WD2RESET 48 +#define RESET_WD3RESET 49 +#define RESET_DBG0RESET 50 +#define RESET_DBG1RESET 51 +#define RESET_DBG2RESET 52 +#define RESET_DBG3RESET 53 + +#endif /* __DT_BINDINGS_ACTIONS_S500_RESET_H */ diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d120e6c323e7..51c19381108c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -26,16 +26,9 @@ enum kvm_arch_timer_regs { struct arch_timer_context { struct kvm_vcpu *vcpu; - /* Registers: control register, timer value */ - u32 cnt_ctl; - u64 cnt_cval; - /* Timer IRQ */ struct kvm_irq_level irq; - /* Virtual offset */ - u64 cntvoff; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; @@ -71,7 +64,7 @@ int kvm_timer_hyp_init(bool); int kvm_timer_enable(struct kvm_vcpu *vcpu); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); -void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); +void kvm_timer_sync_user(struct kvm_vcpu *vcpu); bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); void kvm_timer_update_run(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); @@ -109,4 +102,8 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timer_regs treg, u64 val); +/* Needed for tracing */ +u32 timer_get_ctl(struct arch_timer_context *ctxt); +u64 timer_get_cval(struct arch_timer_context *ctxt); + #endif diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 75e582b8d2d9..4c328fef403c 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -36,7 +36,7 @@ struct dma_chan_ref { /** * async_tx_flags - modifiers for the async_* calls * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the - * the destination address is not a source. The asynchronous case handles this + * destination address is not a source. The asynchronous case handles this * implicitly, the synchronous case needs to zero the destination block. * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is * also one of the source addresses. In the synchronous case the destination diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 48ea093ff04c..4e035aca6f7e 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -77,7 +77,7 @@ */ #define FIELD_FIT(_mask, _val) \ ({ \ - __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: "); \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cef4ef0d2b4e..55f694b63164 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info { struct bpf_map *map; }; -typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux); +typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - bpf_iter_check_target_t check_target; + bpf_iter_attach_target_t attach_target; + bpf_iter_detach_target_t detach_target; u32 ctx_arg_info_size; - enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; diff --git a/include/linux/btree.h b/include/linux/btree.h index 68f858c831b1..243ee544397a 100644 --- a/include/linux/btree.h +++ b/include/linux/btree.h @@ -10,7 +10,7 @@ * * A B+Tree is a data structure for looking up arbitrary (currently allowing * unsigned long, u32, u64 and 2 * u64) keys into pointers. The data structure - * is described at http://en.wikipedia.org/wiki/B-tree, we currently do not + * is described at https://en.wikipedia.org/wiki/B-tree, we currently do not * use binary search to find the key on lookups. * * Each B+Tree consists of a head, that contains bookkeeping information and diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 39e6f4c57580..fcd84e8d88f4 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -58,7 +58,7 @@ * because 10.2.z (jewel) did not care if its peers advertised this * feature bit. * - * - In the second phase we stop advertising the the bit and call it + * - In the second phase we stop advertising the bit and call it * RETIRED. This can normally be done in the *next* major release * following the one in which we marked the feature DEPRECATED. In * the above example, for 12.0.z (luminous) we can say: diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ebf5ba62b772..455e9b9e2adf 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -130,6 +130,7 @@ struct ceph_dir_layout { #define CEPH_MSG_CLIENT_REQUEST 24 #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 #define CEPH_MSG_CLIENT_REPLY 26 +#define CEPH_MSG_CLIENT_METRICS 29 #define CEPH_MSG_CLIENT_CAPS 0x310 #define CEPH_MSG_CLIENT_LEASE 0x311 #define CEPH_MSG_CLIENT_SNAP 0x312 diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index e5ed1c541e7f..c8645f0b797d 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -282,6 +282,7 @@ extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; extern struct kmem_cache *ceph_dir_file_cachep; extern struct kmem_cache *ceph_mds_request_cachep; +extern mempool_t *ceph_wb_pagevec_pool; /* ceph_common.c */ extern bool libceph_compatible(void *data); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c60b59e9291b..83fa08a06507 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -404,7 +404,7 @@ void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc); &__oreq->r_ops[__whch].typ.fld; \ }) -extern void osd_req_op_init(struct ceph_osd_request *osd_req, +struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 6f815be99b77..03a5de5f99f4 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1096,7 +1096,6 @@ int clk_hw_get_parent_index(struct clk_hw *hw); int clk_hw_set_parent(struct clk_hw *hw, struct clk_hw *new_parent); unsigned int __clk_get_enable_count(struct clk *clk); unsigned long clk_hw_get_rate(const struct clk_hw *hw); -unsigned long __clk_get_flags(struct clk *clk); unsigned long clk_hw_get_flags(const struct clk_hw *hw); #define clk_hw_can_set_rate_parent(hw) \ (clk_hw_get_flags((hw)) & CLK_SET_RATE_PARENT) diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 49a53a137610..a4f82e836a7c 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -59,6 +59,7 @@ #define AT91_PMC_PLL_UPDT 0x1C /* PMC PLL update register [for SAM9X60] */ #define AT91_PMC_PLL_UPDT_UPDATE (1 << 8) /* Update PLL settings */ #define AT91_PMC_PLL_UPDT_ID (1 << 0) /* PLL ID */ +#define AT91_PMC_PLL_UPDT_ID_MSK (0xf) /* PLL ID mask */ #define AT91_PMC_PLL_UPDT_STUPTIM (0xff << 16) /* Startup time */ #define AT91_CKGR_MOR 0x20 /* Main Oscillator Register [not on SAM9RL] */ @@ -136,6 +137,8 @@ #define AT91_PMC_PLLADIV2_ON (1 << 12) #define AT91_PMC_H32MXDIV BIT(24) +#define AT91_PMC_XTALF 0x34 /* Main XTAL Frequency Register [SAMA7G5 only] */ + #define AT91_PMC_USB 0x38 /* USB Clock Register [some SAM9 only] */ #define AT91_PMC_USBS (0x1 << 0) /* USB OHCI Input clock selection */ #define AT91_PMC_USBS_PLLA (0 << 0) @@ -174,6 +177,7 @@ #define AT91_PMC_MOSCRCS (1 << 17) /* Main On-Chip RC [some SAM9] */ #define AT91_PMC_CFDEV (1 << 18) /* Clock Failure Detector Event [some SAM9] */ #define AT91_PMC_GCKRDY (1 << 24) /* Generated Clocks */ +#define AT91_PMC_MCKXRDY (1 << 26) /* Master Clock x [x=1..4] Ready Status */ #define AT91_PMC_IMR 0x6c /* Interrupt Mask Register */ #define AT91_PMC_FSMR 0x70 /* Fast Startup Mode Register */ diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 6fa0eea3f530..25a521d299c1 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -85,11 +85,13 @@ static inline unsigned long compact_gap(unsigned int order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; +extern unsigned int sysctl_compaction_proactiveness; extern int sysctl_compaction_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; +extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); extern int fragmentation_index(struct zone *zone, unsigned int order); extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 8a072d00e688..cee0c728d39a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -40,7 +40,7 @@ #endif /* - * Not all versions of clang implement the the type-generic versions + * Not all versions of clang implement the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements * __has_builtin allowing us to avoid awkward version * checks. Unfortunately, we don't know which version of gcc clang diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 2e231ba8fe3f..4b33cb385f96 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -5,48 +5,54 @@ #ifndef __ASSEMBLY__ #ifdef __CHECKER__ +/* address spaces */ # define __kernel __attribute__((address_space(0))) # define __user __attribute__((noderef, address_space(__user))) -# define __safe __attribute__((safe)) -# define __force __attribute__((force)) -# define __nocast __attribute__((nocast)) # define __iomem __attribute__((noderef, address_space(__iomem))) +# define __percpu __attribute__((noderef, address_space(__percpu))) +# define __rcu __attribute__((noderef, address_space(__rcu))) +extern void __chk_user_ptr(const volatile void __user *); +extern void __chk_io_ptr(const volatile void __iomem *); +/* context/locking */ # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) # define __releases(x) __attribute__((context(x,1,0))) # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) -# define __percpu __attribute__((noderef, address_space(__percpu))) -# define __rcu __attribute__((noderef, address_space(__rcu))) +/* other */ +# define __force __attribute__((force)) +# define __nocast __attribute__((nocast)) +# define __safe __attribute__((safe)) # define __private __attribute__((noderef)) -extern void __chk_user_ptr(const volatile void __user *); -extern void __chk_io_ptr(const volatile void __iomem *); # define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) #else /* __CHECKER__ */ +/* address spaces */ +# define __kernel # ifdef STRUCTLEAK_PLUGIN -# define __user __attribute__((user)) +# define __user __attribute__((user)) # else # define __user # endif -# define __kernel -# define __safe -# define __force -# define __nocast # define __iomem -# define __chk_user_ptr(x) (void)0 -# define __chk_io_ptr(x) (void)0 -# define __builtin_warning(x, y...) (1) +# define __percpu +# define __rcu +# define __chk_user_ptr(x) (void)0 +# define __chk_io_ptr(x) (void)0 +/* context/locking */ # define __must_hold(x) # define __acquires(x) # define __releases(x) -# define __acquire(x) (void)0 -# define __release(x) (void)0 +# define __acquire(x) (void)0 +# define __release(x) (void)0 # define __cond_lock(x,c) (c) -# define __percpu -# define __rcu +/* other */ +# define __force +# define __nocast +# define __safe # define __private # define ACCESS_PRIVATE(p, member) ((p)->member) +# define __builtin_warning(x, y...) (1) #endif /* __CHECKER__ */ /* Indirect macros required for expanded argument pasting, eg. __LINE__. */ diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 525510a9f965..6594dbc34a37 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -38,6 +38,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_OSRELEASE(value) \ vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_BUILD_ID(value) \ + vmcoreinfo_append_str("BUILD-ID=%s\n", value) #define VMCOREINFO_PAGESIZE(value) \ vmcoreinfo_append_str("PAGESIZE=%ld\n", value) #define VMCOREINFO_SYMBOL(name) \ @@ -64,6 +66,10 @@ extern unsigned char *vmcoreinfo_data; extern size_t vmcoreinfo_size; extern u32 *vmcoreinfo_note; +/* raw contents of kernel .notes section */ +extern const void __start_notes __weak; +extern const void __stop_notes __weak; + Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); void final_note(Elf_Word *buf); diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 33c16f2de7f6..2f811baf78d2 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -17,7 +17,7 @@ * The algorithm was originally described in detail in this paper * (although the algorithm has evolved somewhat since then): * - * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf + * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf * * LGPL2 */ diff --git a/include/linux/delay.h b/include/linux/delay.h index 5e016a4029d9..1d0e2ce6b6d9 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -16,7 +16,7 @@ * 3. CPU clock rate changes. * * Please see this thread: - * http://lists.openwall.net/linux-kernel/2011/01/09/56 + * https://lists.openwall.net/linux-kernel/2011/01/09/56 */ #include <linux/kernel.h> diff --git a/include/linux/dma/k3-psil.h b/include/linux/dma/k3-psil.h index 61d5cc0ad601..1962f75fa2d3 100644 --- a/include/linux/dma/k3-psil.h +++ b/include/linux/dma/k3-psil.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef K3_PSIL_H_ diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index caadbab1632a..5eb34ad973a7 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef K3_UDMA_GLUE_H_ diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index 579356ae447e..5896441ee604 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -2,7 +2,7 @@ /* * CPPI5 descriptors interface * - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef __TI_CPPI5_H__ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d896b8657085..3ceb72b67a7a 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -178,7 +178,7 @@ struct fid { * get_name: * @get_name should find a name for the given @child in the given @parent * directory. The name should be stored in the @name (with the - * understanding that it is already pointing to a a %NAME_MAX+1 sized + * understanding that it is already pointing to a %NAME_MAX+1 sized * buffer. get_name() should return %0 on success, a negative error code * or error. @get_name will be called without @parent->i_mutex held. * diff --git a/include/linux/fb.h b/include/linux/fb.h index 2b530e6d86e4..850f79e9a7cb 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -400,8 +400,6 @@ struct fb_tile_ops { #define FBINFO_HWACCEL_YPAN 0x2000 /* optional */ #define FBINFO_HWACCEL_YWRAP 0x4000 /* optional */ -#define FBINFO_MISC_USEREVENT 0x10000 /* event request - from userspace */ #define FBINFO_MISC_TILEBLITTING 0x20000 /* use tile blitting */ /* A driver may set this flag to indicate that it does want a set_par to be diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 6d775984905b..b07d88c92bb2 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -10,7 +10,7 @@ /* * Return code to denote that requested number of * frontswap pages are unused(moved to page cache). - * Used in in shmem_unuse and try_to_unuse. + * Used in shmem_unuse and try_to_unuse. */ #define FRONTSWAP_PAGES_UNUSED 2 diff --git a/include/linux/fs.h b/include/linux/fs.h index 011af396aa17..7c69dd7c6160 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -518,6 +518,16 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) up_read(&mapping->i_mmap_rwsem); } +static inline void i_mmap_assert_locked(struct address_space *mapping) +{ + lockdep_assert_held(&mapping->i_mmap_rwsem); +} + +static inline void i_mmap_assert_write_locked(struct address_space *mapping) +{ + lockdep_assert_held_write(&mapping->i_mmap_rwsem); +} + /* * Might pages of this file be mapped into userspace? */ diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 02393c0c98f9..bfd00320c7f3 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -44,7 +44,7 @@ struct genradix_root; struct __genradix { - struct genradix_root __rcu *root; + struct genradix_root *root; }; /* diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d6e82e3de027..14e6202ce47f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -73,7 +73,7 @@ static inline void kunmap(struct page *page) * no global lock is needed and because the kmap code must perform a global TLB * invalidation when the kmap pool wraps. * - * However when holding an atomic kmap is is not legal to sleep, so atomic + * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. * * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 17c4c4975145..467302056e17 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -181,13 +181,6 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) -#ifdef CONFIG_DEBUG_VM -#define transparent_hugepage_debug_cow() \ - (transparent_hugepage_flags & \ - (1<<TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG)) -#else /* CONFIG_DEBUG_VM */ -#define transparent_hugepage_debug_cow() 0 -#endif /* CONFIG_DEBUG_VM */ extern unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 50650d0d01b9..d5cc5f802dd4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -10,6 +10,7 @@ #include <linux/list.h> #include <linux/kref.h> #include <linux/pgtable.h> +#include <linux/gfp.h> struct ctl_table; struct user_struct; @@ -164,7 +165,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, @@ -203,8 +205,9 @@ static inline struct address_space *hugetlb_page_mapping_lock_write( return NULL; } -static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, - pte_t *ptep) +static inline int huge_pmd_unshare(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { return 0; } @@ -504,13 +507,10 @@ struct huge_bootmem_page { struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask); + nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -692,6 +692,27 @@ static inline bool hugepage_movable_supported(struct hstate *h) return true; } +/* Movability of hugepages depends on migration support. */ +static inline gfp_t htlb_alloc_mask(struct hstate *h) +{ + if (hugepage_movable_supported(h)) + return GFP_HIGHUSER_MOVABLE; + else + return GFP_HIGHUSER; +} + +static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) +{ + gfp_t modified_mask = htlb_alloc_mask(h); + + /* Some callers might want to enforce node */ + modified_mask |= (gfp_mask & __GFP_THISNODE); + + modified_mask |= (gfp_mask & __GFP_NOWARN); + + return modified_mask; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { @@ -759,13 +780,9 @@ static inline struct page *alloc_huge_page(struct vm_area_struct *vma, return NULL; } -static inline struct page *alloc_huge_page_node(struct hstate *h, int nid) -{ - return NULL; -} - static inline struct page * -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask) +alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask) { return NULL; } @@ -878,6 +895,16 @@ static inline bool hugepage_movable_supported(struct hstate *h) return false; } +static inline gfp_t htlb_alloc_mask(struct hstate *h) +{ + return 0; +} + +static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) +{ + return 0; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4e7714c88f95..fc55ea41d323 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -231,7 +231,6 @@ enum i2c_alert_protocol { * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) - * @disable_i2c_core_irq_mapping: Tell the i2c-core to not do irq-mapping * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. @@ -290,8 +289,6 @@ struct i2c_driver { int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; struct list_head clients; - - bool disable_i2c_core_irq_mapping; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) @@ -609,6 +606,14 @@ struct i2c_timings { * may configure padmux here for SDA/SCL line or something else they want. * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery. * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. + * @pinctrl: pinctrl used by GPIO recovery to change the state of the I2C pins. + * Optional. + * @pins_default: default pinctrl state of SCL/SDA lines, when they are assigned + * to the I2C bus. Optional. Populated internally for GPIO recovery, if + * state with the name PINCTRL_STATE_DEFAULT is found and pinctrl is valid. + * @pins_gpio: recovery pinctrl state of SCL/SDA lines, when they are used as + * GPIOs. Optional. Populated internally for GPIO recovery, if this state + * is called "gpio" or "recovery" and pinctrl is valid. */ struct i2c_bus_recovery_info { int (*recover_bus)(struct i2c_adapter *adap); @@ -625,6 +630,9 @@ struct i2c_bus_recovery_info { /* gpio recovery */ struct gpio_desc *scl_gpiod; struct gpio_desc *sda_gpiod; + struct pinctrl *pinctrl; + struct pinctrl_state *pins_default; + struct pinctrl_state *pins_gpio; }; int i2c_recover_bus(struct i2c_adapter *adap); diff --git a/include/linux/irqchip/irq-omap-intc.h b/include/linux/irqchip/irq-omap-intc.h index 216e5adf80ce..dca379c0d7eb 100644 --- a/include/linux/irqchip/irq-omap-intc.h +++ b/include/linux/irqchip/irq-omap-intc.h @@ -2,7 +2,7 @@ /** * irq-omap-intc.h - INTC Idle Functions * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com * * Author: Felipe Balbi <[email protected]> */ diff --git a/include/linux/jhash.h b/include/linux/jhash.h index ba2f6a9776b6..19ddd43aee68 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -5,7 +5,7 @@ * * Copyright (C) 2006. Bob Jenkins ([email protected]) * - * http://burtleburtle.net/bob/hash/ + * https://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7339a00c895e..500def620d8f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -17,7 +17,6 @@ #include <asm/byteorder.h> #include <asm/div64.h> #include <uapi/linux/kernel.h> -#include <asm/div64.h> #define STACK_MAGIC 0xdeadbeef @@ -322,8 +321,7 @@ void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); extern void oops_enter(void); extern void oops_exit(void); -void print_oops_end_marker(void); -extern int oops_may_print(void); +extern bool oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; @@ -347,7 +345,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtoull. Return code must be checked. + * Preferred over simple_strtoul(). Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { @@ -375,7 +373,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the simple_strtoull. Return code must be checked. + * Preferred over simple_strtol(). Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { diff --git a/include/linux/leds-ti-lmu-common.h b/include/linux/leds-ti-lmu-common.h index 5eb111f38803..420b61e5a213 100644 --- a/include/linux/leds-ti-lmu-common.h +++ b/include/linux/leds-ti-lmu-common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ // TI LMU Common Core -// Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/ +// Copyright (C) 2018 Texas Instruments Incorporated - https://www.ti.com/ #ifndef _TI_LMU_COMMON_H_ #define _TI_LMU_COMMON_H_ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1bb49b600310..385237e4cb44 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -32,6 +32,7 @@ struct kmem_cache; enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, + MEMCG_PERCPU_B, MEMCG_NR_STAT, }; @@ -64,8 +65,8 @@ struct mem_cgroup_id { /* * Per memcg event counter is incremented at every pagein/pageout. With THP, - * it will be incremated by the number of pages. This counter is used for - * for trigger some periodic events. This is straightforward and better + * it will be incremented by the number of pages. This counter is used + * to trigger some periodic events. This is straightforward and better * than using jiffies etc. to handle periodic memcg event. */ enum mem_cgroup_events_target { @@ -339,6 +340,13 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +static __always_inline bool memcg_stat_item_in_bytes(int idx) +{ + if (idx == MEMCG_PERCPU_B) + return true; + return vmstat_item_in_bytes(idx); +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return (memcg == root_mem_cgroup); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ea9c15b60a96..5f1c74df264d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -6,7 +6,7 @@ #ifndef _LINUX_MEMPOLICY_H #define _LINUX_MEMPOLICY_H 1 - +#include <linux/sched.h> #include <linux/mmzone.h> #include <linux/dax.h> #include <linux/slab.h> @@ -28,7 +28,7 @@ struct mm_struct; * the process policy is used. Interrupts ignore the memory policy * of the current process. * - * Locking policy for interlave: + * Locking policy for interleave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on * mmap_lock. @@ -152,6 +152,15 @@ extern int huge_node(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, const nodemask_t *mask); +extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); + +static inline nodemask_t *policy_nodemask_current(gfp_t gfp) +{ + struct mempolicy *mpol = get_task_policy(current); + + return policy_nodemask(gfp, mpol); +} + extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -281,5 +290,10 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, static inline void mpol_put_task_policy(struct task_struct *task) { } + +static inline nodemask_t *policy_nodemask_current(gfp_t gfp) +{ + return NULL; +} #endif /* CONFIG_NUMA */ #endif diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 540998d9810b..0f8d1583fa8e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -10,6 +10,8 @@ typedef struct page *new_page_t(struct page *page, unsigned long private); typedef void free_page_t(struct page *page, unsigned long private); +struct migration_target_control; + /* * Return values from addresss_space_operations.migratepage(): * - negative errno on page migration failure; @@ -31,34 +33,6 @@ enum migrate_reason { /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ extern const char *migrate_reason_names[MR_TYPES]; -static inline struct page *new_page_nodemask(struct page *page, - int preferred_nid, nodemask_t *nodemask) -{ - gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; - unsigned int order = 0; - struct page *new_page = NULL; - - if (PageHuge(page)) - return alloc_huge_page_nodemask(page_hstate(compound_head(page)), - preferred_nid, nodemask); - - if (PageTransHuge(page)) { - gfp_mask |= GFP_TRANSHUGE; - order = HPAGE_PMD_ORDER; - } - - if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) - gfp_mask |= __GFP_HIGHMEM; - - new_page = __alloc_pages_nodemask(gfp_mask, order, - preferred_nid, nodemask); - - if (new_page && PageTransHuge(new_page)) - prep_transhuge_page(new_page); - - return new_page; -} - #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); @@ -67,6 +41,7 @@ extern int migrate_page(struct address_space *mapping, enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); +extern struct page *alloc_migration_target(struct page *page, unsigned long private); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); @@ -85,6 +60,9 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason) { return -ENOSYS; } +static inline struct page *alloc_migration_target(struct page *page, + unsigned long private) + { return NULL; } static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) { return -EBUSY; } diff --git a/include/linux/mm.h b/include/linux/mm.h index f6a82f9bccd7..e7602a3bcef1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -38,6 +38,7 @@ struct file_ra_state; struct user_struct; struct writeback_control; struct bdi_writeback; +struct pt_regs; void init_mm_internals(void); @@ -479,7 +480,7 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags) { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } /* - * vm_fault is filled by the the pagefault handler and passed to the vma's + * vm_fault is filled by the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask * of VM_FAULT_xxx flags that give details about how the fault was handled. * @@ -1658,8 +1659,9 @@ int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); -extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, + unsigned long address, unsigned int flags, + struct pt_regs *regs); +extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); void unmap_mapping_pages(struct address_space *mapping, @@ -1668,14 +1670,14 @@ void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); #else static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) + unsigned long address, unsigned int flags, + struct pt_regs *regs) { /* should never happen if there's no MMU */ BUG(); return VM_FAULT_SIGBUS; } -static inline int fixup_user_fault(struct task_struct *tsk, - struct mm_struct *mm, unsigned long address, +static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked) { /* should never happen if there's no MMU */ @@ -1701,11 +1703,11 @@ extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked); -long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked); @@ -2599,7 +2601,7 @@ extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -/* CONFIG_STACK_GROWSUP still needs to to grow downwards at some places */ +/* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */ extern int expand_downwards(struct vm_area_struct *vma, unsigned long address); #if VM_GROWSUP diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 635a96cd9b1f..8379432f4f2f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -173,9 +173,15 @@ enum node_stat_item { NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_NODES, - WORKINGSET_REFAULT, - WORKINGSET_ACTIVATE, - WORKINGSET_RESTORE, + WORKINGSET_REFAULT_BASE, + WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE, + WORKINGSET_REFAULT_FILE, + WORKINGSET_ACTIVATE_BASE, + WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE, + WORKINGSET_ACTIVATE_FILE, + WORKINGSET_RESTORE_BASE, + WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE, + WORKINGSET_RESTORE_FILE, WORKINGSET_NODERECLAIM, NR_ANON_MAPPED, /* Mapped anonymous pages */ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. @@ -277,8 +283,8 @@ struct lruvec { unsigned long file_cost; /* Non-resident age, driven by LRU movement */ atomic_long_t nonresident_age; - /* Refaults at the time of last reclaim cycle */ - unsigned long refaults; + /* Refaults at the time of last reclaim cycle, anon=0, file=1 */ + unsigned long refaults[2]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; #ifdef CONFIG_MEMCG @@ -530,6 +536,7 @@ struct zone { * On compaction failure, 1<<compact_defer_shift compactions * are skipped before trying again. The number attempted since * last failure is tracked with compact_considered. + * compact_order_failed is the minimum compaction failed order. */ unsigned int compact_considered; unsigned int compact_defer_shift; diff --git a/include/linux/module.h b/include/linux/module.h index 2e6670860d27..e30ed5fa33a7 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -389,6 +389,7 @@ struct module { unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; const s32 *gpl_crcs; + bool using_gplonly_symbols; #ifdef CONFIG_UNUSED_SYMBOLS /* unused exported symbols. */ @@ -582,34 +583,14 @@ struct module *find_module(const char *name); struct symsearch { const struct kernel_symbol *start, *stop; const s32 *crcs; - enum { + enum mod_license { NOT_GPL_ONLY, GPL_ONLY, WILL_BE_GPL_ONLY, - } licence; + } license; bool unused; }; -/* - * Search for an exported symbol by name. - * - * Must be called with module_mutex held or preemption disabled. - */ -const struct kernel_symbol *find_symbol(const char *name, - struct module **owner, - const s32 **crc, - bool gplok, - bool warn); - -/* - * Walk the exported symbol table - * - * Must be called with module_mutex held or preemption disabled. - */ -bool each_symbol_section(bool (*fn)(const struct symsearch *arr, - struct module *owner, - void *data), void *data); - /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if symnum out of range. */ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, @@ -657,7 +638,6 @@ static inline void __module_get(struct module *module) #define symbol_put_addr(p) do { } while (0) #endif /* CONFIG_MODULE_UNLOAD */ -int ref_module(struct module *a, struct module *b); /* This is a #define so the string doesn't get put in every .o file */ #define module_name(mod) \ diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 3ef917ff0964..1ad5aa3b86d9 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -108,7 +108,7 @@ struct kparam_array * ".") the kernel commandline parameter. Note that - is changed to _, so * the user can use "foo-bar=1" even for variable "foo_bar". * - * @perm is 0 if the the variable is not to appear in sysfs, or 0444 + * @perm is 0 if the variable is not to appear in sysfs, or 0444 * for world-readable, 0644 for root-writable, etc. Note that if it * is writable, you may need to use kernel_param_lock() around * accesses (esp. charp, which can be kfreed when it changes). diff --git a/include/linux/oom.h b/include/linux/oom.h index c696c265f019..f022f581ac29 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -48,7 +48,7 @@ struct oom_control { /* Used by oom implementation, do not set */ unsigned long totalpages; struct task_struct *chosen; - unsigned long chosen_points; + long chosen_points; /* Used to print the constraint info. */ enum oom_constraint constraint; @@ -107,7 +107,7 @@ static inline vm_fault_t check_stable_address_space(struct mm_struct *mm) bool __oom_reap_task_mm(struct mm_struct *mm); -extern unsigned long oom_badness(struct task_struct *p, +long oom_badness(struct task_struct *p, unsigned long totalpages); extern bool out_of_memory(struct oom_control *oc); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5c709a1450b1..1ab1e24bcbce 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2661,6 +2661,8 @@ #define PCI_DEVICE_ID_INTEL_80332_1 0x0332 #define PCI_DEVICE_ID_INTEL_80333_0 0x0370 #define PCI_DEVICE_ID_INTEL_80333_1 0x0372 +#define PCI_DEVICE_ID_INTEL_QAT_DH895XCC 0x0435 +#define PCI_DEVICE_ID_INTEL_QAT_DH895XCC_VF 0x0443 #define PCI_DEVICE_ID_INTEL_82375 0x0482 #define PCI_DEVICE_ID_INTEL_82424 0x0483 #define PCI_DEVICE_ID_INTEL_82378 0x0484 @@ -2710,6 +2712,8 @@ #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI 0x1577 #define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_BRIDGE 0x1578 #define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 +#define PCI_DEVICE_ID_INTEL_QAT_C3XXX 0x19e2 +#define PCI_DEVICE_ID_INTEL_QAT_C3XXX_VF 0x19e3 #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 #define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 @@ -2926,6 +2930,8 @@ #define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 #define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 #define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 +#define PCI_DEVICE_ID_INTEL_QAT_C62X 0x37c8 +#define PCI_DEVICE_ID_INTEL_QAT_C62X_VF 0x37c9 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 53e97da1e8e2..a124c21e3204 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -804,7 +804,7 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, /* * No-op macros that just return the current protection value. Defined here - * because these macros can be used used even if CONFIG_MMU is not defined. + * because these macros can be used even if CONFIG_MMU is not defined. */ #ifndef pgprot_nx @@ -1234,7 +1234,7 @@ static inline int pmd_trans_unstable(pmd_t *pmd) * Technically a PTE can be PROTNONE even when not doing NUMA balancing but * the only case the kernel cares is for NUMA balancing and is only ever set * when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked - * _PAGE_PROTNONE so by by default, implement the helper as "always no". It + * _PAGE_PROTNONE so by default, implement the helper as "always no". It * is the responsibility of the caller to distinguish between PROT_NONE * protections and NUMA hinting fault protections. */ @@ -1318,10 +1318,10 @@ static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) /* * ARCHes with special requirements for evicting THP backing TLB entries can * implement this. Otherwise also, it can help optimize normal TLB flush in - * THP regime. stock flush_tlb_range() typically has optimization to nuke the - * entire TLB TLB if flush span is greater than a threshold, which will - * likely be true for a single huge page. Thus a single thp flush will - * invalidate the entire TLB which is not desitable. + * THP regime. Stock flush_tlb_range() typically has optimization to nuke the + * entire TLB if flush span is greater than a threshold, which will + * likely be true for a single huge page. Thus a single THP flush will + * invalidate the entire TLB which is not desirable. * e.g. see arch/arc: flush_pmd_tlb_range */ #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) diff --git a/include/linux/platform_data/davinci-cpufreq.h b/include/linux/platform_data/davinci-cpufreq.h index 3fbf9f2793b5..bc208c64e3d7 100644 --- a/include/linux/platform_data/davinci-cpufreq.h +++ b/include/linux/platform_data/davinci-cpufreq.h @@ -2,7 +2,7 @@ /* * TI DaVinci CPUFreq platform support. * - * Copyright (C) 2009 Texas Instruments, Inc. http://www.ti.com/ + * Copyright (C) 2009 Texas Instruments, Inc. https://www.ti.com/ */ #ifndef _MACH_DAVINCI_CPUFREQ_H diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h index 7fe80f1c7e08..5d1fb0d78a22 100644 --- a/include/linux/platform_data/davinci_asp.h +++ b/include/linux/platform_data/davinci_asp.h @@ -1,7 +1,7 @@ /* * TI DaVinci Audio Serial Port support * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index 0f491d8abfdd..3cc78f0447b1 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -2,7 +2,7 @@ /* * BCH Error Location Module * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef __ELM_H diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index a93841bfb9f7..e182a46e609f 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -1,7 +1,7 @@ /* * DaVinci GPIO Platform Related Defines * - * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2013 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h index ef663e570552..c9cc4e32435d 100644 --- a/include/linux/platform_data/gpmc-omap.h +++ b/include/linux/platform_data/gpmc-omap.h @@ -2,7 +2,7 @@ /* * OMAP GPMC Platform data * - * Copyright (C) 2014 Texas Instruments, Inc. - http://www.ti.com + * Copyright (C) 2014 Texas Instruments, Inc. - https://www.ti.com * Roger Quadros <[email protected]> */ diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 9cffa9a64ab3..1af9c01563f9 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -43,10 +43,13 @@ * * TYPE1 HW watchdog implementation exist in old systems. * All new systems have TYPE2 HW watchdog. + * TYPE3 HW watchdog can exist on all systems with new CPLD. + * TYPE3 is selected by WD capability bit. */ enum mlxreg_wdt_type { MLX_WDT_TYPE1, MLX_WDT_TYPE2, + MLX_WDT_TYPE3, }; /** @@ -93,7 +96,7 @@ struct mlxreg_core_data { umode_t mode; struct device_node *np; struct mlxreg_hotplug_device hpdev; - u8 health_cntr; + u32 health_cntr; bool attached; u8 regnum; }; diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h index a403dd51dacc..a49826214a39 100644 --- a/include/linux/platform_data/mtd-davinci-aemif.h +++ b/include/linux/platform_data/mtd-davinci-aemif.h @@ -1,7 +1,7 @@ /* * TI DaVinci AEMIF support * - * Copyright 2010 (C) Texas Instruments, Inc. http://www.ti.com/ + * Copyright 2010 (C) Texas Instruments, Inc. https://www.ti.com/ * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any diff --git a/include/linux/platform_data/omap-twl4030.h b/include/linux/platform_data/omap-twl4030.h index 8419c8caf54e..0dd851ea1c72 100644 --- a/include/linux/platform_data/omap-twl4030.h +++ b/include/linux/platform_data/omap-twl4030.h @@ -3,7 +3,7 @@ * omap-twl4030.h - ASoC machine driver for TI SoC based boards with twl4030 * codec, header. * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com * All rights reserved. * * Author: Peter Ujfalusi <[email protected]> diff --git a/include/linux/platform_data/uio_pruss.h b/include/linux/platform_data/uio_pruss.h index 3d47d219827f..31f2e22661bc 100644 --- a/include/linux/platform_data/uio_pruss.h +++ b/include/linux/platform_data/uio_pruss.h @@ -3,7 +3,7 @@ * * Platform data for uio_pruss driver * - * Copyright (C) 2010-11 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2010-11 Texas Instruments Incorporated - https://www.ti.com/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h index fa579b4c666b..5e70d667031c 100644 --- a/include/linux/platform_data/usb-omap.h +++ b/include/linux/platform_data/usb-omap.h @@ -1,7 +1,7 @@ /* * usb-omap.h - Platform data for the various OMAP USB IPs * - * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com * * This software is distributed under the terms of the GNU General Public * License ("GPL") version 2, as published by the Free Software Foundation. diff --git a/include/linux/poison.h b/include/linux/poison.h index df34330b4e34..dc8ae5d8db03 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -24,10 +24,6 @@ #define LIST_POISON2 ((void *) 0x122 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ -/* - * Magic number "tsta" to indicate a static timer initializer - * for the object debugging code. - */ #define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) /********** mm/page_poison.c **********/ diff --git a/include/linux/rtc.h b/include/linux/rtc.h index bba3db3f7efa..22d1575e4991 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -55,10 +55,6 @@ extern struct class *rtc_class; * * The (current) exceptions are mostly filesystem hooks: * - the proc() hook for procfs - * - non-ioctl() chardev hooks: open(), release() - * - * REVISIT those periodic irq calls *do* have ops_lock when they're - * issued through ioctl() ... */ struct rtc_class_ops { int (*ioctl)(struct device *, unsigned int, unsigned long); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 85023ddc2dc2..f889e332912f 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -178,14 +178,16 @@ static inline bool in_vfork(struct task_struct *tsk) */ static inline gfp_t current_gfp_context(gfp_t flags) { - if (unlikely(current->flags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { + unsigned int pflags = READ_ONCE(current->flags); + + if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) { /* * NOIO implies both NOIO and NOFS and it is a weaker context * so always make sure it makes precedence */ - if (current->flags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); - else if (current->flags & PF_MEMALLOC_NOFS) + else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; } return flags; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ae3060f0b0c9..a98965007eef 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -88,6 +88,7 @@ struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); +int kernel_wait(pid_t pid, int *stat); extern void free_task(struct task_struct *tsk); diff --git a/include/linux/soc/ti/k3-ringacc.h b/include/linux/soc/ti/k3-ringacc.h index 7ac115432fa1..5a472eca5ee4 100644 --- a/include/linux/soc/ti/k3-ringacc.h +++ b/include/linux/soc/ti/k3-ringacc.h @@ -2,7 +2,7 @@ /* * K3 Ring Accelerator (RA) subsystem interface * - * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com */ #ifndef __SOC_TI_K3_RINGACC_API_H_ diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h index 9745df6ed9d3..c75ef99c99ca 100644 --- a/include/linux/soc/ti/knav_qmss.h +++ b/include/linux/soc/ti/knav_qmss.h @@ -1,7 +1,7 @@ /* * Keystone Navigator Queue Management Sub-System header * - * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com + * Copyright (C) 2014 Texas Instruments Incorporated - https://www.ti.com * Author: Sandeep Nair <[email protected]> * Cyril Chemparathy <[email protected]> * Santosh Shilimkar <[email protected]> diff --git a/include/linux/soc/ti/ti-msgmgr.h b/include/linux/soc/ti/ti-msgmgr.h index eac8e0c6fe11..1f6e76d423cf 100644 --- a/include/linux/soc/ti/ti-msgmgr.h +++ b/include/linux/soc/ti/ti-msgmgr.h @@ -1,7 +1,7 @@ /* * Texas Instruments' Message Manager * - * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015-2016 Texas Instruments Incorporated - https://www.ti.com/ * Nishanth Menon * * This program is free software; you can redistribute it and/or modify diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 96840def9d69..ea193414298b 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -8,26 +8,9 @@ #ifndef _LINUX_SOCKPTR_H #define _LINUX_SOCKPTR_H -#include <linux/compiler.h> #include <linux/slab.h> #include <linux/uaccess.h> -#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE -typedef union { - void *kernel; - void __user *user; -} sockptr_t; - -static inline bool sockptr_is_kernel(sockptr_t sockptr) -{ - return (unsigned long)sockptr.kernel >= TASK_SIZE; -} - -static inline sockptr_t KERNEL_SOCKPTR(void *p) -{ - return (sockptr_t) { .kernel = p }; -} -#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ typedef struct { union { void *kernel; @@ -45,15 +28,10 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p, .is_kernel = true }; } -#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ -static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p, - size_t size) +static inline sockptr_t USER_SOCKPTR(void __user *p) { - if (!access_ok(p, size)) - return -EFAULT; - *sp = (sockptr_t) { .user = p }; - return 0; + return (sockptr_t) { .user = p }; } static inline bool sockptr_is_null(sockptr_t sockptr) diff --git a/include/linux/swap.h b/include/linux/swap.h index 7eb59bc552a5..661046994db4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -352,7 +352,7 @@ extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); -extern void lru_cache_add_active_or_unevictable(struct page *page, +extern void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ @@ -414,9 +414,14 @@ extern struct address_space *swapper_spaces[]; extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); -extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); -extern void __delete_from_swap_cache(struct page *, swp_entry_t entry); +extern void *get_shadow_from_swap_cache(swp_entry_t entry); +extern int add_to_swap_cache(struct page *page, swp_entry_t entry, + gfp_t gfp, void **shadowp); +extern void __delete_from_swap_cache(struct page *page, + swp_entry_t entry, void *shadow); extern void delete_from_swap_cache(struct page *); +extern void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, @@ -569,14 +574,19 @@ static inline int add_to_swap(struct page *page) return 0; } +static inline void *get_shadow_from_swap_cache(swp_entry_t entry) +{ + return NULL; +} + static inline int add_to_swap_cache(struct page *page, swp_entry_t entry, - gfp_t gfp_mask) + gfp_t gfp_mask, void **shadowp) { return -1; } static inline void __delete_from_swap_cache(struct page *page, - swp_entry_t entry) + swp_entry_t entry, void *shadow) { } @@ -584,6 +594,11 @@ static inline void delete_from_swap_cache(struct page *page) { } +static inline void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end) +{ +} + static inline int page_swapcount(struct page *page) { return 0; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a2429d336593..dc2b827c81e5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -263,7 +263,7 @@ static inline void addr_limit_user_check(void) return; #endif - if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS), + if (CHECK_DATA_CORRUPTION(uaccess_kernel(), "Invalid address limit on user-mode return")) force_sig(SIGKILL); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 0a76ddc07d59..94b285411659 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -6,11 +6,27 @@ #include <linux/sched.h> #include <linux/thread_info.h> -#define uaccess_kernel() segment_eq(get_fs(), KERNEL_DS) - #include <asm/uaccess.h> /* + * Force the uaccess routines to be wired up for actual userspace access, + * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone + * using force_uaccess_end below. + */ +static inline mm_segment_t force_uaccess_begin(void) +{ + mm_segment_t fs = get_fs(); + + set_fs(USER_DS); + return fs; +} + +static inline void force_uaccess_end(mm_segment_t oldfs) +{ + set_fs(oldfs); +} + +/* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and * __copy_{to,from}_user{,_inatomic}(). diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 553b34c8b5f7..977caf96c8d2 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -110,12 +110,6 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev, } #if defined(CONFIG_VGA_ARB) -extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc); -#else -static inline int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { return 0; } -#endif - -#if defined(CONFIG_VGA_ARB) extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); #else #define vga_put(pdev, rsrc) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 24fc7c3ae7d6..2e6ca53b9bbd 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -56,6 +56,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, + THP_MIGRATION_SUCCESS, + THP_MIGRATION_FAIL, + THP_MIGRATION_SPLIT, #endif #ifdef CONFIG_COMPACTION COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 1464ce6ffa31..9b19e6bb68b5 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -210,6 +210,8 @@ extern int watchdog_init_timeout(struct watchdog_device *wdd, extern int watchdog_register_device(struct watchdog_device *); extern void watchdog_unregister_device(struct watchdog_device *); +int watchdog_set_last_hw_keepalive(struct watchdog_device *, unsigned int); + /* devres register variant */ int devm_watchdog_register_device(struct device *dev, struct watchdog_device *); diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h index e497e621dbb7..3f496967b538 100644 --- a/include/linux/wkup_m3_ipc.h +++ b/include/linux/wkup_m3_ipc.h @@ -1,7 +1,7 @@ /* * TI Wakeup M3 for AMx3 SoCs Power Management Routines * - * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright (C) 2015 Texas Instruments Incorporated - https://www.ti.com/ * Dave Gerlach <[email protected]> * * This program is free software; you can redistribute it and/or diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index 52b073fea17f..df42511438d0 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -34,7 +34,7 @@ * ("BSD"). * * You can contact the author at: - * - xxHash homepage: http://cyan4973.github.io/xxHash/ + * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ diff --git a/include/linux/xz.h b/include/linux/xz.h index 64cffa6ddfce..9884c8440188 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -2,7 +2,7 @@ * XZ decompressor * * Authors: Lasse Collin <[email protected]> - * Igor Pavlov <http://7-zip.org/> + * Igor Pavlov <https://7-zip.org/> * * This file has been put into the public domain. * You can do whatever you want with this file. @@ -28,7 +28,7 @@ * enum xz_mode - Operation mode * * @XZ_SINGLE: Single-call mode. This uses less RAM than - * than multi-call modes, because the LZMA2 + * multi-call modes, because the LZMA2 * dictionary doesn't need to be allocated as * part of the decoder state. All required data * structures are allocated at initialization, diff --git a/include/linux/zlib.h b/include/linux/zlib.h index c757d848a758..78ede944c082 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -23,7 +23,7 @@ The data format used by the zlib library is described by RFCs (Request for - Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt + Comments) 1950 to 1952 in the files https://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 1e209ce7d1bd..aa8893c68c50 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -304,6 +304,10 @@ void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); +/* update the fast reuse flag when adding a socket */ +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk); + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #define TCP_PINGPONG_THRESH 3 diff --git a/include/net/tcp.h b/include/net/tcp.h index dbf5c791a6eb..eab6c7510b5b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1672,6 +1672,8 @@ void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key); +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, + u64 *key); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, diff --git a/include/soc/arc/aux.h b/include/soc/arc/aux.h index e223c4ffa153..9c2eff6140b6 100644 --- a/include/soc/arc/aux.h +++ b/include/soc/arc/aux.h @@ -22,7 +22,7 @@ static inline int read_aux_reg(u32 r) /* * function helps elide unused variable warning - * see: http://lists.infradead.org/pipermail/linux-snps-arc/2016-November/001748.html + * see: https://lists.infradead.org/pipermail/linux-snps-arc/2016-November/001748.html */ static inline void write_aux_reg(u32 r, u32 v) { diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 2c735a3e6613..9417a34aad08 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -17,7 +17,7 @@ ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH), ERSN(EPR),\ ERSN(SYSTEM_EVENT), ERSN(S390_STSI), ERSN(IOAPIC_EOI), \ - ERSN(HYPERV) + ERSN(HYPERV), ERSN(ARM_NISV) TRACE_EVENT(kvm_userspace_exit, TP_PROTO(__u32 reason, int errno), diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 705b33d1e395..4d434398d64d 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -46,13 +46,18 @@ MIGRATE_REASON TRACE_EVENT(mm_migrate_pages, TP_PROTO(unsigned long succeeded, unsigned long failed, - enum migrate_mode mode, int reason), + unsigned long thp_succeeded, unsigned long thp_failed, + unsigned long thp_split, enum migrate_mode mode, int reason), - TP_ARGS(succeeded, failed, mode, reason), + TP_ARGS(succeeded, failed, thp_succeeded, thp_failed, + thp_split, mode, reason), TP_STRUCT__entry( __field( unsigned long, succeeded) __field( unsigned long, failed) + __field( unsigned long, thp_succeeded) + __field( unsigned long, thp_failed) + __field( unsigned long, thp_split) __field( enum migrate_mode, mode) __field( int, reason) ), @@ -60,13 +65,19 @@ TRACE_EVENT(mm_migrate_pages, TP_fast_assign( __entry->succeeded = succeeded; __entry->failed = failed; + __entry->thp_succeeded = thp_succeeded; + __entry->thp_failed = thp_failed; + __entry->thp_split = thp_split; __entry->mode = mode; __entry->reason = reason; ), - TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s", + TP_printk("nr_succeeded=%lu nr_failed=%lu nr_thp_succeeded=%lu nr_thp_failed=%lu nr_thp_split=%lu mode=%s reason=%s", __entry->succeeded, __entry->failed, + __entry->thp_succeeded, + __entry->thp_failed, + __entry->thp_split, __print_symbolic(__entry->mode, MIGRATE_MODE), __print_symbolic(__entry->reason, MIGRATE_REASON)) ); diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 32c10a515e2d..9570a10cb949 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -307,6 +307,23 @@ TRACE_EVENT(urandom_read, __entry->pool_left, __entry->input_left) ); +TRACE_EVENT(prandom_u32, + + TP_PROTO(unsigned int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( unsigned int, ret) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret=%u" , __entry->ret) +); + #endif /* _TRACE_RANDOM_H */ /* This part must be outside protection */ diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 4bee7de5f306..82f327801267 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1004,7 +1004,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \ fourcc_mod_code(AMLOGIC, \ ((__layout) & __fourcc_mod_amlogic_layout_mask) | \ - ((__options) & __fourcc_mod_amlogic_options_mask \ + (((__options) & __fourcc_mod_amlogic_options_mask) \ << __fourcc_mod_amlogic_options_shift)) /* Amlogic FBC Layouts */ diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h index 374742651c30..62e625356dc8 100644 --- a/include/uapi/linux/auto_dev-ioctl.h +++ b/include/uapi/linux/auto_dev-ioctl.h @@ -82,7 +82,7 @@ struct args_ismountpoint { /* * All the ioctls use this structure. * When sending a path size must account for the total length - * of the chunk of memory otherwise is is the size of the + * of the chunk of memory otherwise it is the size of the * structure. */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c6dd0215482e..22220945a5fd 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -53,7 +53,7 @@ typedef __s64 Elf64_Sxword; * * - Oracle: Linker and Libraries. * Part No: 817–1984–19, August 2011. - * http://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf + * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf * * - System V ABI AMD64 Architecture Processor Supplement * Draft Version 0.99.4, diff --git a/include/uapi/linux/map_to_7segment.h b/include/uapi/linux/map_to_7segment.h index f9ed18134b83..13a06e5e966e 100644 --- a/include/uapi/linux/map_to_7segment.h +++ b/include/uapi/linux/map_to_7segment.h @@ -24,7 +24,7 @@ * of (ASCII) characters to a 7-segments notation. * * The 7 segment's wikipedia notation below is used as standard. - * See: http://en.wikipedia.org/wiki/Seven_segment_display + * See: https://en.wikipedia.org/wiki/Seven_segment_display * * Notation: +-a-+ * f b diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 2fce8b6876e9..f6d2f83cbe29 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -7,7 +7,7 @@ #ifndef __ASSEMBLY__ #ifndef __KERNEL__ #ifndef __EXPORTED_HEADERS__ -#warning "Attempt to use kernel headers from user space, see http://kernelnewbies.org/KernelHeaders" +#warning "Attempt to use kernel headers from user space, see https://kernelnewbies.org/KernelHeaders" #endif /* __EXPORTED_HEADERS__ */ #endif diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 48766fdf6580..0f865ae4ba89 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -1229,7 +1229,7 @@ struct usb_set_sel_req { * As per USB compliance update, a device that is actively drawing * more than 100mA from USB must report itself as bus-powered in * the GetStatus(DEVICE) call. - * http://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34 + * https://compliance.usb.org/index.asp?UpdateFile=Electrical&Format=Standard#34 */ #define USB_SELF_POWER_VBUS_MAX_DRAW 100 diff --git a/include/xen/interface/io/displif.h b/include/xen/interface/io/displif.h index fdc279dc4a88..d43ca0361f86 100644 --- a/include/xen/interface/io/displif.h +++ b/include/xen/interface/io/displif.h @@ -38,7 +38,8 @@ * Protocol version ****************************************************************************** */ -#define XENDISPL_PROTOCOL_VERSION "1" +#define XENDISPL_PROTOCOL_VERSION "2" +#define XENDISPL_PROTOCOL_VERSION_INT 2 /* ****************************************************************************** @@ -202,6 +203,9 @@ * Width and height of the connector in pixels separated by * XENDISPL_RESOLUTION_SEPARATOR. This defines visible area of the * display. + * If backend provides extended display identification data (EDID) with + * XENDISPL_OP_GET_EDID request then EDID values must take precedence + * over the resolutions defined here. * *------------------ Connector Request Transport Parameters ------------------- * @@ -349,6 +353,8 @@ #define XENDISPL_OP_FB_DETACH 0x13 #define XENDISPL_OP_SET_CONFIG 0x14 #define XENDISPL_OP_PG_FLIP 0x15 +/* The below command is available in protocol version 2 and above. */ +#define XENDISPL_OP_GET_EDID 0x16 /* ****************************************************************************** @@ -377,6 +383,10 @@ #define XENDISPL_FIELD_BE_ALLOC "be-alloc" #define XENDISPL_FIELD_UNIQUE_ID "unique-id" +#define XENDISPL_EDID_BLOCK_SIZE 128 +#define XENDISPL_EDID_BLOCK_COUNT 256 +#define XENDISPL_EDID_MAX_SIZE (XENDISPL_EDID_BLOCK_SIZE * XENDISPL_EDID_BLOCK_COUNT) + /* ****************************************************************************** * STATUS RETURN CODES @@ -451,7 +461,9 @@ * +----------------+----------------+----------------+----------------+ * | gref_directory | 40 * +----------------+----------------+----------------+----------------+ - * | reserved | 44 + * | data_ofs | 44 + * +----------------+----------------+----------------+----------------+ + * | reserved | 48 * +----------------+----------------+----------------+----------------+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| * +----------------+----------------+----------------+----------------+ @@ -494,6 +506,7 @@ * buffer size (buffer_sz) exceeds what can be addressed by this single page, * then reference to the next page must be supplied (see gref_dir_next_page * below) + * data_ofs - uint32_t, offset of the data in the buffer, octets */ #define XENDISPL_DBUF_FLG_REQ_ALLOC (1 << 0) @@ -506,6 +519,7 @@ struct xendispl_dbuf_create_req { uint32_t buffer_sz; uint32_t flags; grant_ref_t gref_directory; + uint32_t data_ofs; }; /* @@ -732,6 +746,44 @@ struct xendispl_page_flip_req { }; /* + * Request EDID - request EDID describing current connector: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | _OP_GET_EDID | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | buffer_sz | 8 + * +----------------+----------------+----------------+----------------+ + * | gref_directory | 12 + * +----------------+----------------+----------------+----------------+ + * | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Notes: + * - This command is not available in protocol version 1 and should be + * ignored. + * - This request is optional and if not supported then visible area + * is defined by the relevant XenStore's "resolution" property. + * - Shared buffer, allocated for EDID storage, must not be less then + * XENDISPL_EDID_MAX_SIZE octets. + * + * buffer_sz - uint32_t, buffer size to be allocated, octets + * gref_directory - grant_ref_t, a reference to the first shared page + * describing EDID buffer references. See XENDISPL_OP_DBUF_CREATE for + * grant page directory structure (struct xendispl_page_directory). + * + * See response format for this request. + */ + +struct xendispl_get_edid_req { + uint32_t buffer_sz; + grant_ref_t gref_directory; +}; + +/* *---------------------------------- Responses -------------------------------- * * All response packets have the same length (64 octets) @@ -753,6 +805,35 @@ struct xendispl_page_flip_req { * id - uint16_t, private guest value, echoed from request * status - int32_t, response status, zero on success and -XEN_EXX on failure * + * + * Get EDID response - response for XENDISPL_OP_GET_EDID: + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | id | operation | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | status | 8 + * +----------------+----------------+----------------+----------------+ + * | edid_sz | 12 + * +----------------+----------------+----------------+----------------+ + * | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 64 + * +----------------+----------------+----------------+----------------+ + * + * Notes: + * - This response is not available in protocol version 1 and should be + * ignored. + * + * edid_sz - uint32_t, size of the EDID, octets + */ + +struct xendispl_get_edid_resp { + uint32_t edid_sz; +}; + +/* *----------------------------------- Events ---------------------------------- * * Events are sent via a shared page allocated by the front and propagated by @@ -804,6 +885,7 @@ struct xendispl_req { struct xendispl_fb_detach_req fb_detach; struct xendispl_set_config_req set_config; struct xendispl_page_flip_req pg_flip; + struct xendispl_get_edid_req get_edid; uint8_t reserved[56]; } op; }; @@ -813,7 +895,10 @@ struct xendispl_resp { uint8_t operation; uint8_t reserved; int32_t status; - uint8_t reserved1[56]; + union { + struct xendispl_get_edid_resp get_edid; + uint8_t reserved1[56]; + } op; }; struct xendispl_evt { diff --git a/init/init_task.c b/init/init_task.c index 89024e8c4e95..f6889fce64af 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -65,6 +65,7 @@ struct task_struct init_task #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK __init_task_data #endif + __aligned(L1_CACHE_BYTES) = { #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), diff --git a/ipc/sem.c b/ipc/sem.c index 3687b71151b3..8c0244e0365e 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -585,8 +585,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) /* * Called with sem_ids.rwsem and ipcp locked. */ -static inline int sem_more_checks(struct kern_ipc_perm *ipcp, - struct ipc_params *params) +static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) { struct sem_array *sma; diff --git a/ipc/shm.c b/ipc/shm.c index bf38d7e2fbe9..f1ed36e3ac9f 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -711,8 +711,7 @@ no_file: /* * Called with shm_ids.rwsem and ipcp locked. */ -static inline int shm_more_checks(struct kern_ipc_perm *ipcp, - struct ipc_params *params) +static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) { struct shmid_kernel *shp; @@ -1381,7 +1380,6 @@ static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int versio case SHM_LOCK: case SHM_UNLOCK: return shmctl_do_lock(ns, shmid, cmd); - break; default: return -EINVAL; } diff --git a/kernel/Makefile b/kernel/Makefile index 5350fd292910..b3da548691c9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -36,7 +36,7 @@ KCOV_INSTRUMENT_stacktrace.o := n KCOV_INSTRUMENT_kcov.o := n KASAN_SANITIZE_kcov.o := n KCSAN_SANITIZE_kcov.o := n -CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) +CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack) -fno-stack-protector # cond_syscall is currently not LTO compatible CFLAGS_sys_ni.o = $(DISABLE_LTO) diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 363b9cafc2d8..b6715964b685 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link) struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); - if (iter_link->aux.map) - bpf_map_put_with_uref(iter_link->aux.map); + if (iter_link->tinfo->reg_info->detach_target) + iter_link->tinfo->reg_info->detach_target(&iter_link->aux); } static void bpf_iter_link_dealloc(struct bpf_link *link) @@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link) int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { + union bpf_iter_link_info __user *ulinfo; struct bpf_link_primer link_primer; struct bpf_iter_target_info *tinfo; - struct bpf_iter_aux_info aux = {}; + union bpf_iter_link_info linfo; struct bpf_iter_link *link; - u32 prog_btf_id, target_fd; + u32 prog_btf_id, linfo_len; bool existed = false; - struct bpf_map *map; int err; + if (attr->link_create.target_fd || attr->link_create.flags) + return -EINVAL; + + memset(&linfo, 0, sizeof(union bpf_iter_link_info)); + + ulinfo = u64_to_user_ptr(attr->link_create.iter_info); + linfo_len = attr->link_create.iter_info_len; + if (!ulinfo ^ !linfo_len) + return -EINVAL; + + if (ulinfo) { + err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), + linfo_len); + if (err) + return err; + linfo_len = min_t(u32, linfo_len, sizeof(linfo)); + if (copy_from_user(&linfo, ulinfo, linfo_len)) + return -EFAULT; + } + prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { @@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (!existed) return -ENOENT; - /* Make sure user supplied flags are target expected. */ - target_fd = attr->link_create.target_fd; - if (attr->link_create.flags != tinfo->reg_info->req_linfo) - return -EINVAL; - if (!attr->link_create.flags && target_fd) - return -EINVAL; - link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; @@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } - if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) { - map = bpf_map_get_with_uref(target_fd); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto cleanup_link; - } - - aux.map = map; - err = tinfo->reg_info->check_target(prog, &aux); + if (tinfo->reg_info->attach_target) { + err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); if (err) { - bpf_map_put_with_uref(map); - goto cleanup_link; + bpf_link_cleanup(&link_primer); + return err; } - - link->aux.map = map; } return bpf_link_settle(&link_primer); - -cleanup_link: - bpf_link_cleanup(&link_primer); - return err; } static void init_seq_meta(struct bpf_iter_priv_data *priv_data, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bde93344164d..ed0b3578867c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1966,7 +1966,7 @@ void bpf_prog_array_delete_safe(struct bpf_prog_array *array, * @index: the index of the program to replace * * Skips over dummy programs, by not counting them, when calculating - * the the position of the program to replace. + * the position of the program to replace. * * Return: * * 0 - Success diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index fbe1f557cb88..af86048e5afd 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = { .seq_info = &bpf_map_seq_info, }; -static int bpf_iter_check_map(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux) +static int bpf_iter_attach_map(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) { u32 key_acc_size, value_acc_size, key_size, value_size; - struct bpf_map *map = aux->map; + struct bpf_map *map; bool is_percpu = false; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || @@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog, else if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY) - return -EINVAL; + goto put_map; key_acc_size = prog->aux->max_rdonly_access; value_acc_size = prog->aux->max_rdwr_access; @@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog, else value_size = round_up(map->value_size, 8) * num_possible_cpus(); - if (key_acc_size > key_size || value_acc_size > value_size) - return -EACCES; + if (key_acc_size > key_size || value_acc_size > value_size) { + err = -EACCES; + goto put_map; + } + aux->map = map; return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); } DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, @@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, static const struct bpf_iter_reg bpf_map_elem_reg_info = { .target = "bpf_map_elem", - .check_target = bpf_iter_check_map, - .req_linfo = BPF_ITER_LINK_MAP_FD, + .attach_target = bpf_iter_attach_map, + .detach_target = bpf_iter_detach_map, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_map_elem, key), diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2f343ce15747..86299a292214 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog * return -EINVAL; } -#define BPF_LINK_CREATE_LAST_FIELD link_create.flags +#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len static int link_create(union bpf_attr *attr) { enum bpf_prog_type ptype; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b6ccfce3bf4c..ef938f17b944 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8294,7 +8294,7 @@ static bool stacksafe(struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) /* Ex: old explored (safe) state has STACK_SPILL in - * this stack slot, but current has has STACK_MISC -> + * this stack slot, but current has STACK_MISC -> * this verifier states are not equivalent, * return false to continue verification of this path */ diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 18175687133a..106e4500fd53 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -11,6 +11,8 @@ #include <asm/page.h> #include <asm/sections.h> +#include <crypto/sha.h> + /* vmcoreinfo stuff */ unsigned char *vmcoreinfo_data; size_t vmcoreinfo_size; @@ -376,6 +378,53 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void) } EXPORT_SYMBOL(paddr_vmcoreinfo_note); +#define NOTES_SIZE (&__stop_notes - &__start_notes) +#define BUILD_ID_MAX SHA1_DIGEST_SIZE +#define NT_GNU_BUILD_ID 3 + +struct elf_note_section { + struct elf_note n_hdr; + u8 n_data[]; +}; + +/* + * Add build ID from .notes section as generated by the GNU ld(1) + * or LLVM lld(1) --build-id option. + */ +static void add_build_id_vmcoreinfo(void) +{ + char build_id[BUILD_ID_MAX * 2 + 1]; + int n_remain = NOTES_SIZE; + + while (n_remain >= sizeof(struct elf_note)) { + const struct elf_note_section *note_sec = + &__start_notes + NOTES_SIZE - n_remain; + const u32 n_namesz = note_sec->n_hdr.n_namesz; + + if (note_sec->n_hdr.n_type == NT_GNU_BUILD_ID && + n_namesz != 0 && + !strcmp((char *)¬e_sec->n_data[0], "GNU")) { + if (note_sec->n_hdr.n_descsz <= BUILD_ID_MAX) { + const u32 n_descsz = note_sec->n_hdr.n_descsz; + const u8 *s = ¬e_sec->n_data[n_namesz]; + + s = PTR_ALIGN(s, 4); + bin2hex(build_id, s, n_descsz); + build_id[2 * n_descsz] = '\0'; + VMCOREINFO_BUILD_ID(build_id); + return; + } + pr_warn("Build ID is too large to include in vmcoreinfo: %u > %u\n", + note_sec->n_hdr.n_descsz, + BUILD_ID_MAX); + return; + } + n_remain -= sizeof(struct elf_note) + + ALIGN(note_sec->n_hdr.n_namesz, 4) + + ALIGN(note_sec->n_hdr.n_descsz, 4); + } +} + static int __init crash_save_vmcoreinfo_init(void) { vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); @@ -394,6 +443,7 @@ static int __init crash_save_vmcoreinfo_init(void) } VMCOREINFO_OSRELEASE(init_uts_ns.name.release); + add_build_id_vmcoreinfo(); VMCOREINFO_PAGESIZE(PAGE_SIZE); VMCOREINFO_SYMBOL(init_uts_ns); diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index c6ce894e4ce9..58cbe357fb2b 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -217,10 +217,9 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); - fs = get_fs(); - set_fs(USER_DS); + fs = force_uaccess_begin(); perf_callchain_user(&ctx, regs); - set_fs(fs); + force_uaccess_end(fs); } } diff --git a/kernel/events/core.c b/kernel/events/core.c index d1f0a7e5b182..6961333ebad5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6453,10 +6453,9 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, /* Data. */ sp = perf_user_stack_pointer(regs); - fs = get_fs(); - set_fs(USER_DS); + fs = force_uaccess_begin(); rem = __output_copy_user(handle, (void *) sp, dump_size); - set_fs(fs); + force_uaccess_end(fs); dyn_size = dump_size - rem; perf_output_skip(handle, rem); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 25de10c904e6..649fd53dc9ad 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -184,7 +184,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, if (new_page) { get_page(new_page); page_add_new_anon_rmap(new_page, vma, addr, false); - lru_cache_add_active_or_unevictable(new_page, vma); + lru_cache_add_inactive_or_unevictable(new_page, vma); } else /* no new page, just dec_mm_counter for old_page */ dec_mm_counter(mm, MM_ANONPAGES); @@ -376,7 +376,7 @@ __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) if (!vaddr || !d) return -EINVAL; - ret = get_user_pages_remote(NULL, mm, vaddr, 1, + ret = get_user_pages_remote(mm, vaddr, 1, FOLL_WRITE, &page, &vma, NULL); if (unlikely(ret <= 0)) { /* @@ -477,7 +477,7 @@ retry: if (is_register) gup_flags |= FOLL_SPLIT_PMD; /* Read the page with vaddr into memory */ - ret = get_user_pages_remote(NULL, mm, vaddr, 1, gup_flags, + ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, &old_page, &vma, NULL); if (ret <= 0) return ret; @@ -2029,7 +2029,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) * but we treat this as a 'remote' access since it is * essentially a kernel access to the memory. */ - result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page, + result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page, NULL, NULL); if (result < 0) return result; diff --git a/kernel/exit.c b/kernel/exit.c index e731c414e024..733e80f334e7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -732,7 +732,7 @@ void __noreturn do_exit(long code) * mm_release()->clear_child_tid() from writing to a user-controlled * kernel address. */ - set_fs(USER_DS); + force_uaccess_begin(); if (unlikely(in_atomic())) { pr_info("note: %s[%d] exited with preempt_count %d\n", @@ -1626,6 +1626,22 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options, return ret; } +int kernel_wait(pid_t pid, int *stat) +{ + struct wait_opts wo = { + .wo_type = PIDTYPE_PID, + .wo_pid = find_get_pid(pid), + .wo_flags = WEXITED, + }; + int ret; + + ret = do_wait(&wo); + if (ret > 0 && wo.wo_stat) + *stat = wo.wo_stat; + put_pid(wo.wo_pid); + return ret; +} + SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, int, options, struct rusage __user *, ru) { diff --git a/kernel/futex.c b/kernel/futex.c index 83404124b77b..61e8153e6c76 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -678,7 +678,7 @@ static int fault_in_user_writeable(u32 __user *uaddr) int ret; mmap_read_lock(mm); - ret = fixup_user_fault(current, mm, (unsigned long)uaddr, + ret = fixup_user_fault(mm, (unsigned long)uaddr, FAULT_FLAG_WRITE, NULL); mmap_read_unlock(mm); diff --git a/kernel/kcov.c b/kernel/kcov.c index 6afae0bcbac4..6b8368be89c8 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -96,7 +96,7 @@ struct kcov_percpu_data { int saved_sequence; }; -DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data); +static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data); /* Must be called with kcov_remote_lock locked. */ static struct kcov_remote *kcov_remote_find(u64 handle) @@ -775,7 +775,7 @@ static inline bool kcov_mode_enabled(unsigned int mode) return (mode & ~KCOV_IN_CTXSW) != KCOV_MODE_DISABLED; } -void kcov_remote_softirq_start(struct task_struct *t) +static void kcov_remote_softirq_start(struct task_struct *t) { struct kcov_percpu_data *data = this_cpu_ptr(&kcov_percpu_data); unsigned int mode; @@ -792,7 +792,7 @@ void kcov_remote_softirq_start(struct task_struct *t) } } -void kcov_remote_softirq_stop(struct task_struct *t) +static void kcov_remote_softirq_stop(struct task_struct *t) { struct kcov_percpu_data *data = this_cpu_ptr(&kcov_percpu_data); diff --git a/kernel/kmod.c b/kernel/kmod.c index 37c3c4b97b8e..3cd075ce2a1e 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -36,9 +36,8 @@ * * If you need less than 50 threads would mean we're dealing with systems * smaller than 3200 pages. This assumes you are capable of having ~13M memory, - * and this would only be an be an upper limit, after which the OOM killer - * would take effect. Systems like these are very unlikely if modules are - * enabled. + * and this would only be an upper limit, after which the OOM killer would take + * effect. Systems like these are very unlikely if modules are enabled. */ #define MAX_KMOD_CONCURRENT 50 static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT); diff --git a/kernel/kthread.c b/kernel/kthread.c index b2807e7be772..3edaa380dc7b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1258,8 +1258,7 @@ void kthread_use_mm(struct mm_struct *mm) if (active_mm != mm) mmdrop(active_mm); - to_kthread(tsk)->oldfs = get_fs(); - set_fs(USER_DS); + to_kthread(tsk)->oldfs = force_uaccess_begin(); } EXPORT_SYMBOL_GPL(kthread_use_mm); @@ -1274,7 +1273,7 @@ void kthread_unuse_mm(struct mm_struct *mm) WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(!tsk->mm); - set_fs(to_kthread(tsk)->oldfs); + force_uaccess_end(to_kthread(tsk)->oldfs); task_lock(tsk); sync_mm_rss(mm); diff --git a/kernel/module.c b/kernel/module.c index 8fa2600bde6a..1c5cff34d9f2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -422,7 +422,7 @@ static bool each_symbol_in_section(const struct symsearch *arr, } /* Returns true as soon as fn returns true, otherwise false. */ -bool each_symbol_section(bool (*fn)(const struct symsearch *arr, +static bool each_symbol_section(bool (*fn)(const struct symsearch *arr, struct module *owner, void *data), void *data) @@ -484,7 +484,6 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, } return false; } -EXPORT_SYMBOL_GPL(each_symbol_section); struct find_symbol_arg { /* Input */ @@ -496,6 +495,7 @@ struct find_symbol_arg { struct module *owner; const s32 *crc; const struct kernel_symbol *sym; + enum mod_license license; }; static bool check_exported_symbol(const struct symsearch *syms, @@ -505,9 +505,9 @@ static bool check_exported_symbol(const struct symsearch *syms, struct find_symbol_arg *fsa = data; if (!fsa->gplok) { - if (syms->licence == GPL_ONLY) + if (syms->license == GPL_ONLY) return false; - if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) { + if (syms->license == WILL_BE_GPL_ONLY && fsa->warn) { pr_warn("Symbol %s is being used by a non-GPL module, " "which will not be allowed in the future\n", fsa->name); @@ -529,6 +529,7 @@ static bool check_exported_symbol(const struct symsearch *syms, fsa->owner = owner; fsa->crc = symversion(syms->crcs, symnum); fsa->sym = &syms->start[symnum]; + fsa->license = syms->license; return true; } @@ -585,9 +586,10 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms, /* Find an exported symbol and return it, along with, (optional) crc and * (optional) module which owns it. Needs preempt disabled or module_mutex. */ -const struct kernel_symbol *find_symbol(const char *name, +static const struct kernel_symbol *find_symbol(const char *name, struct module **owner, const s32 **crc, + enum mod_license *license, bool gplok, bool warn) { @@ -602,13 +604,14 @@ const struct kernel_symbol *find_symbol(const char *name, *owner = fsa.owner; if (crc) *crc = fsa.crc; + if (license) + *license = fsa.license; return fsa.sym; } pr_debug("Failed to find symbol %s\n", name); return NULL; } -EXPORT_SYMBOL_GPL(find_symbol); /* * Search for module by name: must hold module_mutex (or preempt disabled @@ -869,7 +872,7 @@ static int add_module_usage(struct module *a, struct module *b) } /* Module a uses b: caller needs module_mutex() */ -int ref_module(struct module *a, struct module *b) +static int ref_module(struct module *a, struct module *b) { int err; @@ -888,7 +891,6 @@ int ref_module(struct module *a, struct module *b) } return 0; } -EXPORT_SYMBOL_GPL(ref_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) @@ -1077,7 +1079,7 @@ void __symbol_put(const char *symbol) struct module *owner; preempt_disable(); - if (!find_symbol(symbol, &owner, NULL, true, false)) + if (!find_symbol(symbol, &owner, NULL, NULL, true, false)) BUG(); module_put(owner); preempt_enable(); @@ -1169,11 +1171,10 @@ static inline void module_unload_free(struct module *mod) { } -int ref_module(struct module *a, struct module *b) +static int ref_module(struct module *a, struct module *b) { return strong_try_module_get(b); } -EXPORT_SYMBOL_GPL(ref_module); static inline int module_unload_init(struct module *mod) { @@ -1356,7 +1357,7 @@ static inline int check_modstruct_version(const struct load_info *info, * locking is necessary -- use preempt_disable() to placate lockdep. */ preempt_disable(); - if (!find_symbol("module_layout", NULL, &crc, true, false)) { + if (!find_symbol("module_layout", NULL, &crc, NULL, true, false)) { preempt_enable(); BUG(); } @@ -1430,6 +1431,24 @@ static int verify_namespace_is_imported(const struct load_info *info, return 0; } +static bool inherit_taint(struct module *mod, struct module *owner) +{ + if (!owner || !test_bit(TAINT_PROPRIETARY_MODULE, &owner->taints)) + return true; + + if (mod->using_gplonly_symbols) { + pr_err("%s: module using GPL-only symbols uses symbols from proprietary module %s.\n", + mod->name, owner->name); + return false; + } + + if (!test_bit(TAINT_PROPRIETARY_MODULE, &mod->taints)) { + pr_warn("%s: module uses symbols from proprietary module %s, inheriting taint.\n", + mod->name, owner->name); + set_bit(TAINT_PROPRIETARY_MODULE, &mod->taints); + } + return true; +} /* Resolve a symbol for this module. I.e. if we find one, record usage. */ static const struct kernel_symbol *resolve_symbol(struct module *mod, @@ -1440,6 +1459,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, struct module *owner; const struct kernel_symbol *sym; const s32 *crc; + enum mod_license license; int err; /* @@ -1449,11 +1469,19 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, */ sched_annotate_sleep(); mutex_lock(&module_mutex); - sym = find_symbol(name, &owner, &crc, + sym = find_symbol(name, &owner, &crc, &license, !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true); if (!sym) goto unlock; + if (license == GPL_ONLY) + mod->using_gplonly_symbols = true; + + if (!inherit_taint(mod, owner)) { + sym = NULL; + goto getname; + } + if (!check_version(info, name, mod, crc)) { sym = ERR_PTR(-EINVAL); goto getname; @@ -2236,7 +2264,7 @@ void *__symbol_get(const char *symbol) const struct kernel_symbol *sym; preempt_disable(); - sym = find_symbol(symbol, &owner, NULL, true, true); + sym = find_symbol(symbol, &owner, NULL, NULL, true, true); if (sym && strong_try_module_get(owner)) sym = NULL; preempt_enable(); @@ -2272,7 +2300,7 @@ static int verify_exported_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { if (find_symbol(kernel_symbol_name(s), &owner, NULL, - true, false)) { + NULL, true, false)) { pr_err("%s: exports duplicate symbol %s" " (owned by %s)\n", mod->name, kernel_symbol_name(s), @@ -4489,7 +4517,6 @@ struct module *__module_address(unsigned long addr) } return mod; } -EXPORT_SYMBOL_GPL(__module_address); /* * is_module_text_address - is this address inside module code? @@ -4528,7 +4555,6 @@ struct module *__module_text_address(unsigned long addr) } return mod; } -EXPORT_SYMBOL_GPL(__module_text_address); /* Don't grab lock, we're oopsing. */ void print_modules(void) diff --git a/kernel/panic.c b/kernel/panic.c index e2157ca387c8..aef8872ba843 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -505,7 +505,7 @@ static void do_oops_enter_exit(void) * Return true if the calling CPU is allowed to print oops-related info. * This is a bit racy.. */ -int oops_may_print(void) +bool oops_may_print(void) { return pause_on_oops_flag == 0; } @@ -551,7 +551,7 @@ static int init_oops_id(void) } late_initcall(init_oops_id); -void print_oops_end_marker(void) +static void print_oops_end_marker(void) { init_oops_id(); pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 2af66e449aa6..946f44a9e86a 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -233,10 +233,9 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size) if (current->flags & PF_KTHREAD) return 0; - fs = get_fs(); - set_fs(USER_DS); + fs = force_uaccess_begin(); arch_stack_walk_user(consume_entry, &c, task_pt_regs(current)); - set_fs(fs); + force_uaccess_end(fs); return c.len; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f785de3caac0..287862f91717 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2852,6 +2852,15 @@ static struct ctl_table vm_table[] = { .proc_handler = sysctl_compaction_handler, }, { + .procname = "compaction_proactiveness", + .data = &sysctl_compaction_proactiveness, + .maxlen = sizeof(sysctl_compaction_proactiveness), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &one_hundred, + }, + { .procname = "extfrag_threshold", .data = &sysctl_extfrag_threshold, .maxlen = sizeof(int), diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cb91ef902cc4..a8d4f253ed77 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -383,7 +383,7 @@ static DEFINE_RAW_SPINLOCK(trace_printk_lock); #define BPF_TRACE_PRINTK_SIZE 1024 -static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) +static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) { static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; diff --git a/kernel/umh.c b/kernel/umh.c index a25433f9cd9a..fcf3ee803630 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -119,37 +119,16 @@ static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) { pid_t pid; - /* If SIGCLD is ignored kernel_wait4 won't populate the status. */ + /* If SIGCLD is ignored do_wait won't populate the status. */ kernel_sigaction(SIGCHLD, SIG_DFL); pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); - if (pid < 0) { + if (pid < 0) sub_info->retval = pid; - } else { - int ret = -ECHILD; - /* - * Normally it is bogus to call wait4() from in-kernel because - * wait4() wants to write the exit code to a userspace address. - * But call_usermodehelper_exec_sync() always runs as kernel - * thread (workqueue) and put_user() to a kernel address works - * OK for kernel threads, due to their having an mm_segment_t - * which spans the entire address space. - * - * Thus the __user pointer cast is valid here. - */ - kernel_wait4(pid, (int __user *)&ret, 0, NULL); - - /* - * If ret is 0, either call_usermodehelper_exec_async failed and - * the real error code is already in sub_info->retval or - * sub_info->retval is 0 anyway, so don't mess with it then. - */ - if (ret) - sub_info->retval = ret; - } + else + kernel_wait(pid, &sub_info->retval); /* Restore default kernel sig handler */ kernel_sigaction(SIGCHLD, SIG_IGN); - umh_complete(sub_info); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a164785c3b48..e068c3c7189a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -365,6 +365,17 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. +config DEBUG_FORCE_FUNCTION_ALIGN_32B + bool "Force all function address 32B aligned" if EXPERT + help + There are cases that a commit from one domain changes the function + address alignment of other domains, and cause magic performance + bump (regression or improvement). Enable this option will help to + verify if the bump is caused by function alignment changes, while + it will slightly increase the kernel size and affect icache usage. + + It is mainly for debug and performance tuning use. + # # Select this config option from the architecture Kconfig, if it # is preferred to always offer frame pointers as a config @@ -906,7 +917,7 @@ config PANIC_TIMEOUT int "panic timeout" default 0 help - Set the timeout value (in seconds) until a reboot occurs when the + Set the timeout value (in seconds) until a reboot occurs when the kernel panics. If n = 0, then we wait forever. A timeout value n > 0 will wait n seconds before rebooting, while a timeout value n < 0 will reboot immediately. @@ -1067,6 +1078,7 @@ config WQ_WATCHDOG config TEST_LOCKUP tristate "Test module to generate lockups" + depends on m help This builds the "test_lockup" module that helps to make sure that watchdogs and lockup detectors are working properly. @@ -2203,7 +2215,7 @@ config LIST_KUNIT_TEST and associated macros. KUnit tests run during boot and output the results to the debug log - in TAP format (http://testanything.org/). Only useful for kernel devs + in TAP format (https://testanything.org/). Only useful for kernel devs running the KUnit test harness, and not intended for inclusion into a production build. @@ -2224,6 +2236,17 @@ config LINEAR_RANGES_TEST If unsure, say N. +config BITS_TEST + tristate "KUnit test for bits.h" + depends on KUNIT + help + This builds the bits unit test. + Tests the logic of macros defined in bits.h. + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config TEST_UDELAY tristate "udelay test driver" help diff --git a/lib/Makefile b/lib/Makefile index 9d1fd82ea145..e290fc5707ea 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -342,3 +342,4 @@ obj-$(CONFIG_PLDMFW) += pldmfw/ # KUnit tests obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o +obj-$(CONFIG_BITS_TEST) += test_bits.o diff --git a/lib/bitmap.c b/lib/bitmap.c index 0364452b1617..c13d859bc7ab 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -212,13 +212,13 @@ void bitmap_cut(unsigned long *dst, const unsigned long *src, unsigned long keep = 0, carry; int i; - memmove(dst, src, len * sizeof(*dst)); - if (first % BITS_PER_LONG) { keep = src[first / BITS_PER_LONG] & (~0UL >> (BITS_PER_LONG - first % BITS_PER_LONG)); } + memmove(dst, src, len * sizeof(*dst)); + while (cut--) { for (i = first / BITS_PER_LONG; i < len; i++) { if (i < len - 1) diff --git a/lib/crc64.c b/lib/crc64.c index f8928ce28280..47cfa054827f 100644 --- a/lib/crc64.c +++ b/lib/crc64.c @@ -4,7 +4,7 @@ * * This is a basic crc64 implementation following ECMA-182 specification, * which can be found from, - * http://www.ecma-international.org/publications/standards/Ecma-182.htm + * https://www.ecma-international.org/publications/standards/Ecma-182.htm * * Dr. Ross N. Williams has a great document to introduce the idea of CRC * algorithm, here the CRC64 code is also inspired by the table-driven diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 7c4932eed748..f9628f3924ce 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -34,7 +34,7 @@ Phone (337) 232-1234 or 1-800-738-2226 Fax (337) 232-1297 - http://www.hospiceacadiana.com/ + https://www.hospiceacadiana.com/ Manuel */ diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index ed7a1fd819f2..1cf409ef8d04 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -8,7 +8,7 @@ *implementation for lzma. *Copyright (C) 2006 Aurelien Jacobs < [email protected] > * - *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) *Copyright (C) 1999-2005 Igor Pavlov * *Copyrights of the parts, see headers below. @@ -56,7 +56,7 @@ static long long INIT read_int(unsigned char *ptr, int size) /* Small range coder implementation for lzma. *Copyright (C) 2006 Aurelien Jacobs < [email protected] > * - *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) *Copyright (c) 1999-2005 Igor Pavlov */ @@ -213,7 +213,7 @@ rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) * Small lzma deflate implementation. * Copyright (C) 2006 Aurelien Jacobs < [email protected] > * - * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Based on LzmaDecode.c from the LZMA SDK 4.22 (https://www.7-zip.org/) * Copyright (C) 1999-2005 Igor Pavlov */ diff --git a/lib/kstrtox.c b/lib/kstrtox.c index 1006bf70bf74..a14ccf905055 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -115,8 +115,7 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must - * be checked. + * Preferred over simple_strtoull(). Return code must be checked. */ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) { @@ -139,8 +138,7 @@ EXPORT_SYMBOL(kstrtoull); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must - * be checked. + * Preferred over simple_strtoll(). Return code must be checked. */ int kstrtoll(const char *s, unsigned int base, long long *res) { @@ -211,8 +209,7 @@ EXPORT_SYMBOL(_kstrtol); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must - * be checked. + * Preferred over simple_strtoul(). Return code must be checked. */ int kstrtouint(const char *s, unsigned int base, unsigned int *res) { @@ -242,8 +239,7 @@ EXPORT_SYMBOL(kstrtouint); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must - * be checked. + * Preferred over simple_strtol(). Return code must be checked. */ int kstrtoint(const char *s, unsigned int base, int *res) { diff --git a/lib/math/rational.c b/lib/math/rational.c index 31fb27db2deb..df75c8809693 100644 --- a/lib/math/rational.c +++ b/lib/math/rational.c @@ -27,7 +27,7 @@ * with the fractional part size described in given_denominator. * * for theoretical background, see: - * http://en.wikipedia.org/wiki/Continued_fraction + * https://en.wikipedia.org/wiki/Continued_fraction */ void rational_best_approximation( diff --git a/lib/random32.c b/lib/random32.c index 3d749abb9e80..932345323af0 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,6 +39,7 @@ #include <linux/random.h> #include <linux/sched.h> #include <asm/unaligned.h> +#include <trace/events/random.h> #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); @@ -82,6 +83,7 @@ u32 prandom_u32(void) u32 res; res = prandom_u32_state(state); + trace_prandom_u32(res); put_cpu_var(net_rand_state); return res; diff --git a/lib/rbtree.c b/lib/rbtree.c index 8545872e61db..c4ac5c2421f2 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -13,7 +13,7 @@ #include <linux/export.h> /* - * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree * * 1) A node is either red or black * 2) The root is black diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6b13150667f5..df903c53952b 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -610,6 +610,63 @@ static void __init test_for_each_set_clump8(void) expect_eq_clump8(start, CLUMP_EXP_NUMBITS, clump_exp, &clump); } +struct test_bitmap_cut { + unsigned int first; + unsigned int cut; + unsigned int nbits; + unsigned long in[4]; + unsigned long expected[4]; +}; + +static struct test_bitmap_cut test_cut[] = { + { 0, 0, 8, { 0x0000000aUL, }, { 0x0000000aUL, }, }, + { 0, 0, 32, { 0xdadadeadUL, }, { 0xdadadeadUL, }, }, + { 0, 3, 8, { 0x000000aaUL, }, { 0x00000015UL, }, }, + { 3, 3, 8, { 0x000000aaUL, }, { 0x00000012UL, }, }, + { 0, 1, 32, { 0xa5a5a5a5UL, }, { 0x52d2d2d2UL, }, }, + { 0, 8, 32, { 0xdeadc0deUL, }, { 0x00deadc0UL, }, }, + { 1, 1, 32, { 0x5a5a5a5aUL, }, { 0x2d2d2d2cUL, }, }, + { 0, 15, 32, { 0xa5a5a5a5UL, }, { 0x00014b4bUL, }, }, + { 0, 16, 32, { 0xa5a5a5a5UL, }, { 0x0000a5a5UL, }, }, + { 15, 15, 32, { 0xa5a5a5a5UL, }, { 0x000125a5UL, }, }, + { 15, 16, 32, { 0xa5a5a5a5UL, }, { 0x0000a5a5UL, }, }, + { 16, 15, 32, { 0xa5a5a5a5UL, }, { 0x0001a5a5UL, }, }, + + { BITS_PER_LONG, BITS_PER_LONG, BITS_PER_LONG, + { 0xa5a5a5a5UL, 0xa5a5a5a5UL, }, + { 0xa5a5a5a5UL, 0xa5a5a5a5UL, }, + }, + { 1, BITS_PER_LONG - 1, BITS_PER_LONG, + { 0xa5a5a5a5UL, 0xa5a5a5a5UL, }, + { 0x00000001UL, 0x00000001UL, }, + }, + + { 0, BITS_PER_LONG * 2, BITS_PER_LONG * 2 + 1, + { 0xa5a5a5a5UL, 0x00000001UL, 0x00000001UL, 0x00000001UL }, + { 0x00000001UL, }, + }, + { 16, BITS_PER_LONG * 2 + 1, BITS_PER_LONG * 2 + 1 + 16, + { 0x0000ffffUL, 0x5a5a5a5aUL, 0x5a5a5a5aUL, 0x5a5a5a5aUL }, + { 0x2d2dffffUL, }, + }, +}; + +static void __init test_bitmap_cut(void) +{ + unsigned long b[5], *in = &b[1], *out = &b[0]; /* Partial overlap */ + int i; + + for (i = 0; i < ARRAY_SIZE(test_cut); i++) { + struct test_bitmap_cut *t = &test_cut[i]; + + memcpy(in, t->in, sizeof(t->in)); + + bitmap_cut(out, in, t->first, t->cut, t->nbits); + + expect_eq_bitmap(t->expected, out, t->nbits); + } +} + static void __init selftest(void) { test_zero_clear(); @@ -623,6 +680,7 @@ static void __init selftest(void) test_bitmap_parselist_user(); test_mem_optimisations(); test_for_each_set_clump8(); + test_bitmap_cut(); } KSTM_MODULE_LOADERS(test_bitmap); diff --git a/lib/test_bitops.c b/lib/test_bitops.c index ced25e3a779b..471141ddd691 100644 --- a/lib/test_bitops.c +++ b/lib/test_bitops.c @@ -52,9 +52,9 @@ static unsigned long order_comb_long[][2] = { static int __init test_bitops_startup(void) { - int i; + int i, bit_set; - pr_warn("Loaded test module\n"); + pr_info("Starting bitops test\n"); set_bit(BITOPS_4, g_bitmap); set_bit(BITOPS_7, g_bitmap); set_bit(BITOPS_11, g_bitmap); @@ -81,12 +81,8 @@ static int __init test_bitops_startup(void) order_comb_long[i][0]); } #endif - return 0; -} -static void __exit test_bitops_unstartup(void) -{ - int bit_set; + barrier(); clear_bit(BITOPS_4, g_bitmap); clear_bit(BITOPS_7, g_bitmap); @@ -98,7 +94,13 @@ static void __exit test_bitops_unstartup(void) if (bit_set != BITOPS_LAST) pr_err("ERROR: FOUND SET BIT %d\n", bit_set); - pr_warn("Unloaded test module\n"); + pr_info("Completed bitops test\n"); + + return 0; +} + +static void __exit test_bitops_unstartup(void) +{ } module_init(test_bitops_startup); diff --git a/lib/test_bits.c b/lib/test_bits.c new file mode 100644 index 000000000000..c9368a2314e7 --- /dev/null +++ b/lib/test_bits.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Test cases for functions and macros in bits.h + */ + +#include <kunit/test.h> +#include <linux/bits.h> + + +static void genmask_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 1ul, GENMASK(0, 0)); + KUNIT_EXPECT_EQ(test, 3ul, GENMASK(1, 0)); + KUNIT_EXPECT_EQ(test, 6ul, GENMASK(2, 1)); + KUNIT_EXPECT_EQ(test, 0xFFFFFFFFul, GENMASK(31, 0)); + +#ifdef TEST_GENMASK_FAILURES + /* these should fail compilation */ + GENMASK(0, 1); + GENMASK(0, 10); + GENMASK(9, 10); +#endif + + +} + +static void genmask_ull_test(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, 1ull, GENMASK_ULL(0, 0)); + KUNIT_EXPECT_EQ(test, 3ull, GENMASK_ULL(1, 0)); + KUNIT_EXPECT_EQ(test, 0x000000ffffe00000ull, GENMASK_ULL(39, 21)); + KUNIT_EXPECT_EQ(test, 0xffffffffffffffffull, GENMASK_ULL(63, 0)); + +#ifdef TEST_GENMASK_FAILURES + /* these should fail compilation */ + GENMASK_ULL(0, 1); + GENMASK_ULL(0, 10); + GENMASK_ULL(9, 10); +#endif +} + +static void genmask_input_check_test(struct kunit *test) +{ + unsigned int x, y; + int z, w; + + /* Unknown input */ + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(x, 0)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(0, x)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(x, y)); + + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(z, 0)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(0, z)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(z, w)); + + /* Valid input */ + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(1, 1)); + KUNIT_EXPECT_EQ(test, 0, GENMASK_INPUT_CHECK(39, 21)); +} + + +static struct kunit_case bits_test_cases[] = { + KUNIT_CASE(genmask_test), + KUNIT_CASE(genmask_ull_test), + KUNIT_CASE(genmask_input_check_test), + {} +}; + +static struct kunit_suite bits_test_suite = { + .name = "bits-test", + .test_cases = bits_test_cases, +}; +kunit_test_suite(bits_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/lib/test_kmod.c b/lib/test_kmod.c index e651c37d56db..eab52770070d 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -745,7 +745,7 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev, break; case TEST_KMOD_FS_TYPE: kfree_const(config->test_fs); - config->test_driver = NULL; + config->test_fs = NULL; copied = config_copy_test_fs(config, test_str, strlen(test_str)); break; diff --git a/lib/test_lockup.c b/lib/test_lockup.c index ff26f36d729f..f1a020bcc763 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -400,7 +400,7 @@ static void test_lockup(bool master) test_unlock(master, true); } -DEFINE_PER_CPU(struct work_struct, test_works); +static DEFINE_PER_CPU(struct work_struct, test_works); static void test_work_fn(struct work_struct *work) { @@ -512,8 +512,8 @@ static int __init test_lockup_init(void) if (test_file_path[0]) { test_file = filp_open(test_file_path, O_RDONLY, 0); if (IS_ERR(test_file)) { - pr_err("cannot find file_path\n"); - return -EINVAL; + pr_err("failed to open %s: %ld\n", test_file_path, PTR_ERR(test_file)); + return PTR_ERR(test_file); } test_inode = file_inode(test_file); } else if (test_lock_inode || diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 277cb4417ac2..4cf250031f0f 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -11,7 +11,7 @@ * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore. * Communications of the Association for Computing Machinery, * 20(10), 1977, pp. 762-772. - * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf + * https://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf * * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004 * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf diff --git a/lib/xxhash.c b/lib/xxhash.c index aa61e2a3802f..d5bb9ff10607 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -34,7 +34,7 @@ * ("BSD"). * * You can contact the author at: - * - xxHash homepage: http://cyan4973.github.io/xxHash/ + * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index 912aae5fa09e..88a2c35e1b59 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -2,7 +2,7 @@ * CRC32 using the polynomial from IEEE-802.3 * * Authors: Lasse Collin <[email protected]> - * Igor Pavlov <http://7-zip.org/> + * Igor Pavlov <https://7-zip.org/> * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index a768e6d28bbb..72ddac6ef2ec 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -2,7 +2,7 @@ * Branch/Call/Jump (BCJ) filter decoders * * Authors: Lasse Collin <[email protected]> - * Igor Pavlov <http://7-zip.org/> + * Igor Pavlov <https://7-zip.org/> * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index 156f26fdc4c9..9f336bc07ed6 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -2,7 +2,7 @@ * LZMA2 decoder * * Authors: Lasse Collin <[email protected]> - * Igor Pavlov <http://7-zip.org/> + * Igor Pavlov <https://7-zip.org/> * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h index 071d67bee9f5..92d852d4f87a 100644 --- a/lib/xz/xz_lzma2.h +++ b/lib/xz/xz_lzma2.h @@ -2,7 +2,7 @@ * LZMA2 definitions * * Authors: Lasse Collin <[email protected]> - * Igor Pavlov <http://7-zip.org/> + * Igor Pavlov <https://7-zip.org/> * * This file has been put into the public domain. * You can do whatever you want with this file. diff --git a/lib/xz/xz_stream.h b/lib/xz/xz_stream.h index 66cb5a7055ec..430bb3a0d195 100644 --- a/lib/xz/xz_stream.h +++ b/lib/xz/xz_stream.h @@ -19,7 +19,7 @@ /* * See the .xz file format specification at - * http://tukaani.org/xz/xz-file-format.txt + * https://tukaani.org/xz/xz-file-format.txt * to understand the container format. */ @@ -52,7 +52,7 @@ unsigned long cma_get_size(const struct cma *cma) const char *cma_get_name(const struct cma *cma) { - return cma->name ? cma->name : "(undefined)"; + return cma->name; } static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, @@ -93,17 +93,15 @@ static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, mutex_unlock(&cma->lock); } -static int __init cma_activate_area(struct cma *cma) +static void __init cma_activate_area(struct cma *cma) { unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; unsigned i = cma->count >> pageblock_order; struct zone *zone; cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL); - if (!cma->bitmap) { - cma->count = 0; - return -ENOMEM; - } + if (!cma->bitmap) + goto out_error; WARN_ON_ONCE(!pfn_valid(pfn)); zone = page_zone(pfn_to_page(pfn)); @@ -133,25 +131,22 @@ static int __init cma_activate_area(struct cma *cma) spin_lock_init(&cma->mem_head_lock); #endif - return 0; + return; not_in_zone: - pr_err("CMA area %s could not be activated\n", cma->name); bitmap_free(cma->bitmap); +out_error: cma->count = 0; - return -EINVAL; + pr_err("CMA area %s could not be activated\n", cma->name); + return; } static int __init cma_init_reserved_areas(void) { int i; - for (i = 0; i < cma_area_count; i++) { - int ret = cma_activate_area(&cma_areas[i]); - - if (ret) - return ret; - } + for (i = 0; i < cma_area_count; i++) + cma_activate_area(&cma_areas[i]); return 0; } @@ -202,13 +197,12 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * subsystems (like slab allocator) are available. */ cma = &cma_areas[cma_area_count]; - if (name) { - cma->name = name; - } else { - cma->name = kasprintf(GFP_KERNEL, "cma%d\n", cma_area_count); - if (!cma->name) - return -ENOMEM; - } + + if (name) + snprintf(cma->name, CMA_MAX_NAME, name); + else + snprintf(cma->name, CMA_MAX_NAME, "cma%d\n", cma_area_count); + cma->base_pfn = PFN_DOWN(base); cma->count = size >> PAGE_SHIFT; cma->order_per_bit = order_per_bit; @@ -425,7 +419,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, struct page *page = NULL; int ret = -ENOMEM; - if (!cma || !cma->count) + if (!cma || !cma->count || !cma->bitmap) return NULL; pr_debug("%s(cma %p, count %zu, align %d)\n", __func__, (void *)cma, @@ -4,6 +4,8 @@ #include <linux/debugfs.h> +#define CMA_MAX_NAME 64 + struct cma { unsigned long base_pfn; unsigned long count; @@ -15,7 +17,7 @@ struct cma { spinlock_t mem_head_lock; struct debugfs_u32_array dfs_bitmap; #endif - const char *name; + char name[CMA_MAX_NAME]; }; extern struct cma cma_areas[MAX_CMA_AREAS]; diff --git a/mm/compaction.c b/mm/compaction.c index 86375605faa9..b89581bf859c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -50,6 +50,24 @@ static inline void count_compact_events(enum vm_event_item item, long delta) #define pageblock_start_pfn(pfn) block_start_pfn(pfn, pageblock_order) #define pageblock_end_pfn(pfn) block_end_pfn(pfn, pageblock_order) +/* + * Fragmentation score check interval for proactive compaction purposes. + */ +static const unsigned int HPAGE_FRAG_CHECK_INTERVAL_MSEC = 500; + +/* + * Page order with-respect-to which proactive compaction + * calculates external fragmentation, which is used as + * the "fragmentation score" of a node/zone. + */ +#if defined CONFIG_TRANSPARENT_HUGEPAGE +#define COMPACTION_HPAGE_ORDER HPAGE_PMD_ORDER +#elif defined CONFIG_HUGETLBFS +#define COMPACTION_HPAGE_ORDER HUGETLB_PAGE_ORDER +#else +#define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT) +#endif + static unsigned long release_freepages(struct list_head *freelist) { struct page *page, *next; @@ -136,7 +154,7 @@ EXPORT_SYMBOL(__ClearPageMovable); /* * Compaction is deferred when compaction fails to result in a page - * allocation success. 1 << compact_defer_limit compactions are skipped up + * allocation success. 1 << compact_defer_shift, compactions are skipped up * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT */ void defer_compaction(struct zone *zone, int order) @@ -1459,7 +1477,7 @@ static void isolate_freepages(struct compact_control *cc) * this pfn aligned down to the pageblock boundary, because we do * block_start_pfn -= pageblock_nr_pages in the for loop. * For ending point, take care when isolating in last pageblock of a - * a zone which ends in the middle of a pageblock. + * zone which ends in the middle of a pageblock. * The low boundary is the end of the pageblock the migration scanner * is using. */ @@ -1857,6 +1875,76 @@ static inline bool is_via_compact_memory(int order) return order == -1; } +static bool kswapd_is_running(pg_data_t *pgdat) +{ + return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING); +} + +/* + * A zone's fragmentation score is the external fragmentation wrt to the + * COMPACTION_HPAGE_ORDER scaled by the zone's size. It returns a value + * in the range [0, 100]. + * + * The scaling factor ensures that proactive compaction focuses on larger + * zones like ZONE_NORMAL, rather than smaller, specialized zones like + * ZONE_DMA32. For smaller zones, the score value remains close to zero, + * and thus never exceeds the high threshold for proactive compaction. + */ +static unsigned int fragmentation_score_zone(struct zone *zone) +{ + unsigned long score; + + score = zone->present_pages * + extfrag_for_order(zone, COMPACTION_HPAGE_ORDER); + return div64_ul(score, zone->zone_pgdat->node_present_pages + 1); +} + +/* + * The per-node proactive (background) compaction process is started by its + * corresponding kcompactd thread when the node's fragmentation score + * exceeds the high threshold. The compaction process remains active till + * the node's score falls below the low threshold, or one of the back-off + * conditions is met. + */ +static unsigned int fragmentation_score_node(pg_data_t *pgdat) +{ + unsigned int score = 0; + int zoneid; + + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { + struct zone *zone; + + zone = &pgdat->node_zones[zoneid]; + score += fragmentation_score_zone(zone); + } + + return score; +} + +static unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low) +{ + unsigned int wmark_low; + + /* + * Cap the low watermak to avoid excessive compaction + * activity in case a user sets the proactivess tunable + * close to 100 (maximum). + */ + wmark_low = max(100U - sysctl_compaction_proactiveness, 5U); + return low ? wmark_low : min(wmark_low + 10, 100U); +} + +static bool should_proactive_compact_node(pg_data_t *pgdat) +{ + int wmark_high; + + if (!sysctl_compaction_proactiveness || kswapd_is_running(pgdat)) + return false; + + wmark_high = fragmentation_score_wmark(pgdat, false); + return fragmentation_score_node(pgdat) > wmark_high; +} + static enum compact_result __compact_finished(struct compact_control *cc) { unsigned int order; @@ -1883,6 +1971,25 @@ static enum compact_result __compact_finished(struct compact_control *cc) return COMPACT_PARTIAL_SKIPPED; } + if (cc->proactive_compaction) { + int score, wmark_low; + pg_data_t *pgdat; + + pgdat = cc->zone->zone_pgdat; + if (kswapd_is_running(pgdat)) + return COMPACT_PARTIAL_SKIPPED; + + score = fragmentation_score_zone(cc->zone); + wmark_low = fragmentation_score_wmark(pgdat, true); + + if (score > wmark_low) + ret = COMPACT_CONTINUE; + else + ret = COMPACT_SUCCESS; + + goto out; + } + if (is_via_compact_memory(cc->order)) return COMPACT_CONTINUE; @@ -1941,6 +2048,7 @@ static enum compact_result __compact_finished(struct compact_control *cc) } } +out: if (cc->contended || fatal_signal_pending(current)) ret = COMPACT_CONTENDED; @@ -2421,6 +2529,41 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, return rc; } +/* + * Compact all zones within a node till each zone's fragmentation score + * reaches within proactive compaction thresholds (as determined by the + * proactiveness tunable). + * + * It is possible that the function returns before reaching score targets + * due to various back-off conditions, such as, contention on per-node or + * per-zone locks. + */ +static void proactive_compact_node(pg_data_t *pgdat) +{ + int zoneid; + struct zone *zone; + struct compact_control cc = { + .order = -1, + .mode = MIGRATE_SYNC_LIGHT, + .ignore_skip_hint = true, + .whole_zone = true, + .gfp_mask = GFP_KERNEL, + .proactive_compaction = true, + }; + + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { + zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) + continue; + + cc.zone = zone; + + compact_zone(&cc, NULL); + + VM_BUG_ON(!list_empty(&cc.freepages)); + VM_BUG_ON(!list_empty(&cc.migratepages)); + } +} /* Compact all zones within a node */ static void compact_node(int nid) @@ -2468,6 +2611,13 @@ static void compact_nodes(void) int sysctl_compact_memory; /* + * Tunable for proactive compaction. It determines how + * aggressively the kernel should compact memory in the + * background. It takes values in the range [0, 100]. + */ +unsigned int __read_mostly sysctl_compaction_proactiveness = 20; + +/* * This is the entry point for compacting all nodes via * /proc/sys/vm/compact_memory */ @@ -2646,6 +2796,7 @@ static int kcompactd(void *p) { pg_data_t *pgdat = (pg_data_t*)p; struct task_struct *tsk = current; + unsigned int proactive_defer = 0; const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); @@ -2661,12 +2812,34 @@ static int kcompactd(void *p) unsigned long pflags; trace_mm_compaction_kcompactd_sleep(pgdat->node_id); - wait_event_freezable(pgdat->kcompactd_wait, - kcompactd_work_requested(pgdat)); + if (wait_event_freezable_timeout(pgdat->kcompactd_wait, + kcompactd_work_requested(pgdat), + msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) { + + psi_memstall_enter(&pflags); + kcompactd_do_work(pgdat); + psi_memstall_leave(&pflags); + continue; + } - psi_memstall_enter(&pflags); - kcompactd_do_work(pgdat); - psi_memstall_leave(&pflags); + /* kcompactd wait timeout */ + if (should_proactive_compact_node(pgdat)) { + unsigned int prev_score, score; + + if (proactive_defer) { + proactive_defer--; + continue; + } + prev_score = fragmentation_score_node(pgdat); + proactive_compact_node(pgdat); + score = fragmentation_score_node(pgdat); + /* + * Defer proactive compaction if the fragmentation + * score did not go down i.e. no progress made. + */ + proactive_defer = score < prev_score ? + 0 : 1 << COMPACT_MAX_DEFER_SHIFT; + } } return 0; diff --git a/mm/filemap.c b/mm/filemap.c index f2bb5ff0293d..8e75bce0346d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2885,7 +2885,7 @@ filler: * Case a, the page will be up to date when the page is unlocked. * There is no need to serialise on the page lock here as the page * is pinned so the lock gives no additional protection. Even if the - * the page is truncated, the data is still valid if PageUptodate as + * page is truncated, the data is still valid if PageUptodate as * it's a race vs truncate race. * Case b, the page will not be up to date * Case c, the page may be truncated but in itself, the data may still @@ -859,7 +859,7 @@ unmap: * does not include FOLL_NOWAIT, the mmap_lock may be released. If it * is, *@locked will be set to 0 and -EBUSY returned. */ -static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, +static int faultin_page(struct vm_area_struct *vma, unsigned long address, unsigned int *flags, int *locked) { unsigned int fault_flags = 0; @@ -884,7 +884,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, fault_flags |= FAULT_FLAG_TRIED; } - ret = handle_mm_fault(vma, address, fault_flags); + ret = handle_mm_fault(vma, address, fault_flags, NULL); if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, *flags); @@ -893,13 +893,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, BUG(); } - if (tsk) { - if (ret & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - } - if (ret & VM_FAULT_RETRY) { if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *locked = 0; @@ -969,7 +962,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) /** * __get_user_pages() - pin user pages in memory - * @tsk: task_struct of target task * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -1028,7 +1020,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * instead of __get_user_pages. __get_user_pages should be used only if * you need some special @gup_flags. */ -static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, +static long __get_user_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1110,8 +1102,7 @@ retry: page = follow_page_mask(vma, start, foll_flags, &ctx); if (!page) { - ret = faultin_page(tsk, vma, start, &foll_flags, - locked); + ret = faultin_page(vma, start, &foll_flags, locked); switch (ret) { case 0: goto retry; @@ -1185,8 +1176,6 @@ static bool vma_permits_fault(struct vm_area_struct *vma, /** * fixup_user_fault() - manually resolve a user page fault - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @address: user address * @fault_flags:flags to pass down to handle_mm_fault() @@ -1214,7 +1203,7 @@ static bool vma_permits_fault(struct vm_area_struct *vma, * This function will not return with an unlocked mmap_lock. So it has not the * same semantics wrt the @mm->mmap_lock as does filemap_fault(). */ -int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, +int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked) { @@ -1238,7 +1227,7 @@ retry: fatal_signal_pending(current)) return -EINTR; - ret = handle_mm_fault(vma, address, fault_flags); + ret = handle_mm_fault(vma, address, fault_flags, NULL); major |= ret & VM_FAULT_MAJOR; if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, 0); @@ -1255,12 +1244,6 @@ retry: goto retry; } - if (tsk) { - if (major) - tsk->maj_flt++; - else - tsk->min_flt++; - } return 0; } EXPORT_SYMBOL_GPL(fixup_user_fault); @@ -1269,8 +1252,7 @@ EXPORT_SYMBOL_GPL(fixup_user_fault); * Please note that this function, unlike __get_user_pages will not * return 0 for nr_pages > 0 without FOLL_NOWAIT */ -static __always_inline long __get_user_pages_locked(struct task_struct *tsk, - struct mm_struct *mm, +static __always_inline long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1303,7 +1285,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, pages_done = 0; lock_dropped = false; for (;;) { - ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages, + ret = __get_user_pages(mm, start, nr_pages, flags, pages, vmas, locked); if (!locked) /* VM_FAULT_RETRY couldn't trigger, bypass */ @@ -1363,7 +1345,7 @@ retry: } *locked = 1; - ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED, + ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED, pages, NULL, locked); if (!*locked) { /* Continue to retry until we succeeded */ @@ -1450,7 +1432,7 @@ long populate_vma_page_range(struct vm_area_struct *vma, * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ - return __get_user_pages(current, mm, start, nr_pages, gup_flags, + return __get_user_pages(mm, start, nr_pages, gup_flags, NULL, NULL, locked); } @@ -1534,7 +1516,7 @@ struct page *get_dump_page(unsigned long addr) struct vm_area_struct *vma; struct page *page; - if (__get_user_pages(current, current->mm, addr, 1, + if (__get_user_pages(current->mm, addr, 1, FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma, NULL) < 1) return NULL; @@ -1543,8 +1525,7 @@ struct page *get_dump_page(unsigned long addr) } #endif /* CONFIG_ELF_CORE */ #else /* CONFIG_MMU */ -static long __get_user_pages_locked(struct task_struct *tsk, - struct mm_struct *mm, unsigned long start, +static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, int *locked, unsigned int foll_flags) @@ -1609,59 +1590,7 @@ static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) } #ifdef CONFIG_CMA -static struct page *new_non_cma_page(struct page *page, unsigned long private) -{ - /* - * We want to make sure we allocate the new page from the same node - * as the source page. - */ - int nid = page_to_nid(page); - /* - * Trying to allocate a page for migration. Ignore allocation - * failure warnings. We don't force __GFP_THISNODE here because - * this node here is the node where we have CMA reservation and - * in some case these nodes will have really less non movable - * allocation memory. - */ - gfp_t gfp_mask = GFP_USER | __GFP_NOWARN; - - if (PageHighMem(page)) - gfp_mask |= __GFP_HIGHMEM; - -#ifdef CONFIG_HUGETLB_PAGE - if (PageHuge(page)) { - struct hstate *h = page_hstate(page); - /* - * We don't want to dequeue from the pool because pool pages will - * mostly be from the CMA region. - */ - return alloc_migrate_huge_page(h, gfp_mask, nid, NULL); - } -#endif - if (PageTransHuge(page)) { - struct page *thp; - /* - * ignore allocation failure warnings - */ - gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN; - - /* - * Remove the movable mask so that we don't allocate from - * CMA area again. - */ - thp_gfpmask &= ~__GFP_MOVABLE; - thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } - - return __alloc_pages_node(nid, gfp_mask, 0); -} - -static long check_and_migrate_cma_pages(struct task_struct *tsk, - struct mm_struct *mm, +static long check_and_migrate_cma_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1674,6 +1603,10 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, bool migrate_allow = true; LIST_HEAD(cma_page_list); long ret = nr_pages; + struct migration_target_control mtc = { + .nid = NUMA_NO_NODE, + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN, + }; check_again: for (i = 0; i < nr_pages;) { @@ -1719,8 +1652,8 @@ check_again: for (i = 0; i < nr_pages; i++) put_page(pages[i]); - if (migrate_pages(&cma_page_list, new_non_cma_page, - NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) { + if (migrate_pages(&cma_page_list, alloc_migration_target, NULL, + (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) { /* * some of the pages failed migration. Do get_user_pages * without migration. @@ -1735,7 +1668,7 @@ check_again: * again migrating any new CMA pages which we failed to isolate * earlier. */ - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, + ret = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, gup_flags); @@ -1749,8 +1682,7 @@ check_again: return ret; } #else -static long check_and_migrate_cma_pages(struct task_struct *tsk, - struct mm_struct *mm, +static long check_and_migrate_cma_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1765,8 +1697,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which * allows us to process the FOLL_LONGTERM flag. */ -static long __gup_longterm_locked(struct task_struct *tsk, - struct mm_struct *mm, +static long __gup_longterm_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1791,11 +1722,10 @@ static long __gup_longterm_locked(struct task_struct *tsk, flags = memalloc_nocma_save(); } - rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, + rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas_tmp, NULL, gup_flags); if (gup_flags & FOLL_LONGTERM) { - memalloc_nocma_restore(flags); if (rc < 0) goto out; @@ -1806,32 +1736,31 @@ static long __gup_longterm_locked(struct task_struct *tsk, goto out; } - rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages, + rc = check_and_migrate_cma_pages(mm, start, rc, pages, vmas_tmp, gup_flags); +out: + memalloc_nocma_restore(flags); } -out: if (vmas_tmp != vmas) kfree(vmas_tmp); return rc; } #else /* !CONFIG_FS_DAX && !CONFIG_CMA */ -static __always_inline long __gup_longterm_locked(struct task_struct *tsk, - struct mm_struct *mm, +static __always_inline long __gup_longterm_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, unsigned int flags) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, flags); } #endif /* CONFIG_FS_DAX || CONFIG_CMA */ #ifdef CONFIG_MMU -static long __get_user_pages_remote(struct task_struct *tsk, - struct mm_struct *mm, +static long __get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1850,20 +1779,18 @@ static long __get_user_pages_remote(struct task_struct *tsk, * This will check the vmas (even if our vmas arg is NULL) * and return -ENOTSUPP if DAX isn't allowed in this case: */ - return __gup_longterm_locked(tsk, mm, start, nr_pages, pages, + return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, gup_flags | FOLL_TOUCH | FOLL_REMOTE); } - return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked, gup_flags | FOLL_TOUCH | FOLL_REMOTE); } /** * get_user_pages_remote() - pin user pages in memory - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -1922,7 +1849,7 @@ static long __get_user_pages_remote(struct task_struct *tsk, * should use get_user_pages_remote because it cannot pass * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault. */ -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1934,13 +1861,13 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) return -EINVAL; - return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, + return __get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); } EXPORT_SYMBOL(get_user_pages_remote); #else /* CONFIG_MMU */ -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1948,8 +1875,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, return 0; } -static long __get_user_pages_remote(struct task_struct *tsk, - struct mm_struct *mm, +static long __get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1969,11 +1895,10 @@ static long __get_user_pages_remote(struct task_struct *tsk, * @vmas: array of pointers to vmas corresponding to each page. * Or NULL if the caller does not require them. * - * This is the same as get_user_pages_remote(), just with a - * less-flexible calling convention where we assume that the task - * and mm being operated on are the current task's and don't allow - * passing of a locked parameter. We also obviously don't pass - * FOLL_REMOTE in here. + * This is the same as get_user_pages_remote(), just with a less-flexible + * calling convention where we assume that the mm being operated on belongs to + * the current task, and doesn't allow passing of a locked parameter. We also + * obviously don't pass FOLL_REMOTE in here. */ long get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, @@ -1986,7 +1911,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) return -EINVAL; - return __gup_longterm_locked(current, current->mm, start, nr_pages, + return __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); @@ -1996,7 +1921,7 @@ EXPORT_SYMBOL(get_user_pages); * * mmap_read_lock(mm); * do_something() - * get_user_pages(tsk, mm, ..., pages, NULL); + * get_user_pages(mm, ..., pages, NULL); * mmap_read_unlock(mm); * * to: @@ -2004,7 +1929,7 @@ EXPORT_SYMBOL(get_user_pages); * int locked = 1; * mmap_read_lock(mm); * do_something() - * get_user_pages_locked(tsk, mm, ..., pages, &locked); + * get_user_pages_locked(mm, ..., pages, &locked); * if (locked) * mmap_read_unlock(mm); * @@ -2042,7 +1967,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) return -EINVAL; - return __get_user_pages_locked(current, current->mm, start, nr_pages, + return __get_user_pages_locked(current->mm, start, nr_pages, pages, NULL, locked, gup_flags | FOLL_TOUCH); } @@ -2052,12 +1977,12 @@ EXPORT_SYMBOL(get_user_pages_locked); * get_user_pages_unlocked() is suitable to replace the form: * * mmap_read_lock(mm); - * get_user_pages(tsk, mm, ..., pages, NULL); + * get_user_pages(mm, ..., pages, NULL); * mmap_read_unlock(mm); * * with: * - * get_user_pages_unlocked(tsk, mm, ..., pages); + * get_user_pages_unlocked(mm, ..., pages); * * It is functionally equivalent to get_user_pages_fast so * get_user_pages_fast should be used instead if specific gup_flags @@ -2080,7 +2005,7 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, return -EINVAL; mmap_read_lock(mm); - ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL, + ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL, &locked, gup_flags | FOLL_TOUCH); if (locked) mmap_read_unlock(mm); @@ -2725,7 +2650,7 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages, */ if (gup_flags & FOLL_LONGTERM) { mmap_read_lock(current->mm); - ret = __gup_longterm_locked(current, current->mm, + ret = __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL, gup_flags); mmap_read_unlock(current->mm); @@ -2968,10 +2893,8 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages, EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); /** - * pin_user_pages_remote() - pin pages of a remote process (task != current) + * pin_user_pages_remote() - pin pages of a remote process * - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -2992,7 +2915,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. */ -long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -3002,7 +2925,7 @@ long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, return -EINVAL; gup_flags |= FOLL_PIN; - return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, + return __get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); } EXPORT_SYMBOL(pin_user_pages_remote); @@ -3034,7 +2957,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, return -EINVAL; gup_flags |= FOLL_PIN; - return __gup_longterm_locked(current, current->mm, start, nr_pages, + return __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags); } EXPORT_SYMBOL(pin_user_pages); @@ -3079,7 +3002,7 @@ long pin_user_pages_locked(unsigned long start, unsigned long nr_pages, return -EINVAL; gup_flags |= FOLL_PIN; - return __get_user_pages_locked(current, current->mm, start, nr_pages, + return __get_user_pages_locked(current->mm, start, nr_pages, pages, NULL, locked, gup_flags | FOLL_TOUCH); } @@ -75,7 +75,8 @@ static int hmm_vma_fault(unsigned long addr, unsigned long end, } for (; addr < end; addr += PAGE_SIZE) - if (handle_mm_fault(vma, addr, fault_flags) & VM_FAULT_ERROR) + if (handle_mm_fault(vma, addr, fault_flags, NULL) & + VM_FAULT_ERROR) return -EFAULT; return -EBUSY; } @@ -249,7 +250,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, swp_entry_t entry = pte_to_swp_entry(pte); /* - * Never fault in device private pages pages, but just report + * Never fault in device private pages, but just report * the PFN even if not present. */ if (hmm_is_device_private_entry(range, entry)) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 206f52b36ffb..2ccff8472cd4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -303,24 +303,6 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj, static struct kobj_attribute hpage_pmd_size_attr = __ATTR_RO(hpage_pmd_size); -#ifdef CONFIG_DEBUG_VM -static ssize_t debug_cow_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return single_hugepage_flag_show(kobj, attr, buf, - TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG); -} -static ssize_t debug_cow_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - return single_hugepage_flag_store(kobj, attr, buf, count, - TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG); -} -static struct kobj_attribute debug_cow_attr = - __ATTR(debug_cow, 0644, debug_cow_show, debug_cow_store); -#endif /* CONFIG_DEBUG_VM */ - static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, @@ -329,9 +311,6 @@ static struct attribute *hugepage_attr[] = { #ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, #endif -#ifdef CONFIG_DEBUG_VM - &debug_cow_attr.attr, -#endif NULL, }; @@ -640,7 +619,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, entry = mk_huge_pmd(page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr, true); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e52c878940bb..a301c2d672bf 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -19,6 +19,7 @@ #include <linux/memblock.h> #include <linux/sysfs.h> #include <linux/slab.h> +#include <linux/sched/mm.h> #include <linux/mmdebug.h> #include <linux/sched/signal.h> #include <linux/rmap.h> @@ -133,7 +134,7 @@ void hugepage_put_subpool(struct hugepage_subpool *spool) /* * Subpool accounting for allocating and reserving pages. * Return -ENOMEM if there are not enough resources to satisfy the - * the request. Otherwise, return the number of pages by which the + * request. Otherwise, return the number of pages by which the * global pools must be adjusted (upward). The returned value may * only be different than the passed value (delta) in the case where * a subpool minimum size must be maintained. @@ -1040,10 +1041,16 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) { struct page *page; + bool nocma = !!(current->flags & PF_MEMALLOC_NOCMA); + + list_for_each_entry(page, &h->hugepage_freelists[nid], lru) { + if (nocma && is_migrate_cma_page(page)) + continue; - list_for_each_entry(page, &h->hugepage_freelists[nid], lru) if (!PageHWPoison(page)) break; + } + /* * if 'non-isolated free hugepage' not found on the list, * the allocation fails. @@ -1093,15 +1100,6 @@ retry_cpuset: return NULL; } -/* Movability of hugepages depends on migration support. */ -static inline gfp_t htlb_alloc_mask(struct hstate *h) -{ - if (hugepage_movable_supported(h)) - return GFP_HIGHUSER_MOVABLE; - else - return GFP_HIGHUSER; -} - static struct page *dequeue_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address, int avoid_reserve, @@ -1944,7 +1942,7 @@ out_unlock: return page; } -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, +static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { struct page *page; @@ -1986,31 +1984,9 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h, } /* page migration callback function */ -struct page *alloc_huge_page_node(struct hstate *h, int nid) -{ - gfp_t gfp_mask = htlb_alloc_mask(h); - struct page *page = NULL; - - if (nid != NUMA_NO_NODE) - gfp_mask |= __GFP_THISNODE; - - spin_lock(&hugetlb_lock); - if (h->free_huge_pages - h->resv_huge_pages > 0) - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL); - spin_unlock(&hugetlb_lock); - - if (!page) - page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL); - - return page; -} - -/* page migration callback function */ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask) + nodemask_t *nmask, gfp_t gfp_mask) { - gfp_t gfp_mask = htlb_alloc_mask(h); - spin_lock(&hugetlb_lock); if (h->free_huge_pages - h->resv_huge_pages > 0) { struct page *page; @@ -2038,7 +2014,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, gfp_mask = htlb_alloc_mask(h); node = huge_node(vma, address, gfp_mask, &mpol, &nodemask); - page = alloc_huge_page_nodemask(h, node, nodemask); + page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask); mpol_cond_put(mpol); return page; @@ -2167,7 +2143,7 @@ static void return_unused_surplus_pages(struct hstate *h, * evenly across all nodes with memory. Iterate across these nodes * until we can no longer free unreserved surplus pages. This occurs * when the nodes with surplus pages have no free pages. - * free_pool_huge_page() will balance the the freed pages across the + * free_pool_huge_page() will balance the freed pages across the * on-line nodes with memory and will handle the hstate accounting. * * Note that we decrement resv_huge_pages as we free the pages. If @@ -3458,13 +3434,21 @@ static int __init default_hugepagesz_setup(char *s) } __setup("default_hugepagesz=", default_hugepagesz_setup); -static unsigned int cpuset_mems_nr(unsigned int *array) +static unsigned int allowed_mems_nr(struct hstate *h) { int node; unsigned int nr = 0; + nodemask_t *mpol_allowed; + unsigned int *array = h->free_huge_pages_node; + gfp_t gfp_mask = htlb_alloc_mask(h); - for_each_node_mask(node, cpuset_current_mems_allowed) - nr += array[node]; + mpol_allowed = policy_nodemask_current(gfp_mask); + + for_each_node_mask(node, cpuset_current_mems_allowed) { + if (!mpol_allowed || + (mpol_allowed && node_isset(node, *mpol_allowed))) + nr += array[node]; + } return nr; } @@ -3643,12 +3627,18 @@ static int hugetlb_acct_memory(struct hstate *h, long delta) * we fall back to check against current free page availability as * a best attempt and hopefully to minimize the impact of changing * semantics that cpuset has. + * + * Apart from cpuset, we also have memory policy mechanism that + * also determines from which node the kernel will allocate memory + * in a NUMA system. So similar to cpuset, we also should consider + * the memory policy of the current task. Similar to the description + * above. */ if (delta > 0) { if (gather_surplus_pages(h, delta) < 0) goto out; - if (delta > cpuset_mems_nr(h->free_huge_pages_node)) { + if (delta > allowed_mems_nr(h)) { return_unused_surplus_pages(h, delta); goto out; } @@ -3953,7 +3943,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, &address, ptep)) { spin_unlock(ptl); /* * We just unmapped a page of PMDs by clearing a PUD. @@ -4540,10 +4530,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); - } else { - ptep = huge_pte_alloc(mm, haddr, huge_page_size(h)); - if (!ptep) - return VM_FAULT_OOM; } /* @@ -5020,7 +5006,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, &address, ptep)) { pages++; spin_unlock(ptl); shared_pmd = true; @@ -5401,12 +5387,14 @@ out: * returns: 1 successfully unmapped a shared pte page * 0 the underlying pte page is not shared, or it is the last user */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { pgd_t *pgd = pgd_offset(mm, *addr); p4d_t *p4d = p4d_offset(pgd, *addr); pud_t *pud = pud_offset(p4d, *addr); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); BUG_ON(page_count(virt_to_page(ptep)) == 0); if (page_count(virt_to_page(ptep)) == 1) return 0; @@ -5424,7 +5412,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return NULL; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { return 0; } @@ -5694,12 +5683,14 @@ void __init hugetlb_cma_reserve(int order) reserved = 0; for_each_node_state(nid, N_ONLINE) { int res; + char name[20]; size = min(per_node, hugetlb_cma_size - reserved); size = round_up(size, PAGE_SIZE << order); + snprintf(name, 20, "hugetlb%d", nid); res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, - 0, false, "hugetlb", + 0, false, name, &hugetlb_cma[nid], nid); if (res) { pr_warn("hugetlb_cma: reservation failed: err %d, node %d", diff --git a/mm/internal.h b/mm/internal.h index 9886db20d94f..d11a9a8d2135 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -239,6 +239,7 @@ struct compact_control { bool no_set_skip_hint; /* Don't mark blocks for skipping */ bool ignore_block_suitable; /* Scan blocks considered unsuitable */ bool direct_compaction; /* False from kcompactd or /proc/... */ + bool proactive_compaction; /* kcompactd proactive compaction */ bool whole_zone; /* Whole zone should/has been scanned */ bool contended; /* Signal lock or sched contention */ bool rescan; /* Rescanning the same pageblock */ @@ -612,5 +613,11 @@ static inline bool is_migrate_highatomic_page(struct page *page) } void setup_zone_pageset(struct zone *zone); -extern struct page *alloc_new_node_page(struct page *page, unsigned long node); + +struct migration_target_control { + int nid; /* preferred node id */ + nodemask_t *nmask; + gfp_t gfp_mask; +}; + #endif /* __MM_INTERNAL_H */ diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b52bd46ad146..15a9af791014 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1173,7 +1173,7 @@ static void collapse_huge_page(struct mm_struct *mm, spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); page_add_new_anon_rmap(new_page, vma, address, true); - lru_cache_add_active_or_unevictable(new_page, vma); + lru_cache_add_inactive_or_unevictable(new_page, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache_pmd(vma, address, pmd); @@ -480,7 +480,8 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) break; if (PageKsm(page)) ret = handle_mm_fault(vma, addr, - FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE); + FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE, + NULL); else ret = VM_FAULT_WRITE; put_page(page); diff --git a/mm/maccess.c b/mm/maccess.c index f98ff91e32c6..3bd70405f2d8 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -205,15 +205,14 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) long copy_from_user_nofault(void *dst, const void __user *src, size_t size) { long ret = -EFAULT; - mm_segment_t old_fs = get_fs(); + mm_segment_t old_fs = force_uaccess_begin(); - set_fs(USER_DS); if (access_ok(src, size)) { pagefault_disable(); ret = __copy_from_user_inatomic(dst, src, size); pagefault_enable(); } - set_fs(old_fs); + force_uaccess_end(old_fs); if (ret) return -EFAULT; @@ -233,15 +232,14 @@ EXPORT_SYMBOL_GPL(copy_from_user_nofault); long copy_to_user_nofault(void __user *dst, const void *src, size_t size) { long ret = -EFAULT; - mm_segment_t old_fs = get_fs(); + mm_segment_t old_fs = force_uaccess_begin(); - set_fs(USER_DS); if (access_ok(dst, size)) { pagefault_disable(); ret = __copy_to_user_inatomic(dst, src, size); pagefault_enable(); } - set_fs(old_fs); + force_uaccess_end(old_fs); if (ret) return -EFAULT; @@ -270,17 +268,17 @@ EXPORT_SYMBOL_GPL(copy_to_user_nofault); long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count) { - mm_segment_t old_fs = get_fs(); + mm_segment_t old_fs; long ret; if (unlikely(count <= 0)) return 0; - set_fs(USER_DS); + old_fs = force_uaccess_begin(); pagefault_disable(); ret = strncpy_from_user(dst, unsafe_addr, count); pagefault_enable(); - set_fs(old_fs); + force_uaccess_end(old_fs); if (ret >= count) { ret = count; @@ -310,14 +308,14 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, */ long strnlen_user_nofault(const void __user *unsafe_addr, long count) { - mm_segment_t old_fs = get_fs(); + mm_segment_t old_fs; int ret; - set_fs(USER_DS); + old_fs = force_uaccess_begin(); pagefault_disable(); ret = strnlen_user(unsafe_addr, count); pagefault_enable(); - set_fs(old_fs); + force_uaccess_end(old_fs); return ret; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8d9ceea7fe4d..9d87082e64aa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -781,7 +781,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) if (mem_cgroup_disabled()) return; - if (vmstat_item_in_bytes(idx)) + if (memcg_stat_item_in_bytes(idx)) threshold <<= PAGE_SHIFT; x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); @@ -1488,6 +1488,8 @@ static char *memory_stat_format(struct mem_cgroup *memcg) seq_buf_printf(&s, "slab %llu\n", (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B))); + seq_buf_printf(&s, "percpu %llu\n", + (u64)memcg_page_state(memcg, MEMCG_PERCPU_B)); seq_buf_printf(&s, "sock %llu\n", (u64)memcg_page_state(memcg, MEMCG_SOCK) * PAGE_SIZE); @@ -1528,12 +1530,18 @@ static char *memory_stat_format(struct mem_cgroup *memcg) seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT), memcg_events(memcg, PGMAJFAULT)); - seq_buf_printf(&s, "workingset_refault %lu\n", - memcg_page_state(memcg, WORKINGSET_REFAULT)); - seq_buf_printf(&s, "workingset_activate %lu\n", - memcg_page_state(memcg, WORKINGSET_ACTIVATE)); + seq_buf_printf(&s, "workingset_refault_anon %lu\n", + memcg_page_state(memcg, WORKINGSET_REFAULT_ANON)); + seq_buf_printf(&s, "workingset_refault_file %lu\n", + memcg_page_state(memcg, WORKINGSET_REFAULT_FILE)); + seq_buf_printf(&s, "workingset_activate_anon %lu\n", + memcg_page_state(memcg, WORKINGSET_ACTIVATE_ANON)); + seq_buf_printf(&s, "workingset_activate_file %lu\n", + memcg_page_state(memcg, WORKINGSET_ACTIVATE_FILE)); + seq_buf_printf(&s, "workingset_restore %lu\n", + memcg_page_state(memcg, WORKINGSET_RESTORE_ANON)); seq_buf_printf(&s, "workingset_restore %lu\n", - memcg_page_state(memcg, WORKINGSET_RESTORE)); + memcg_page_state(memcg, WORKINGSET_RESTORE_FILE)); seq_buf_printf(&s, "workingset_nodereclaim %lu\n", memcg_page_state(memcg, WORKINGSET_NODERECLAIM)); @@ -2414,7 +2422,7 @@ static void high_work_func(struct work_struct *work) * * - MEMCG_DELAY_PRECISION_SHIFT: Extra precision bits while translating the * overage ratio to a delay. - * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down down the + * - MEMCG_DELAY_SCALING_SHIFT: The number of bits to scale down the * proposed penalty in order to reduce to a reasonable number of jiffies, and * to produce a reasonable delay curve. * @@ -5129,13 +5137,15 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - pn->lruvec_stat_local = alloc_percpu(struct lruvec_stat); + pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat, + GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_local) { kfree(pn); return 1; } - pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat); + pn->lruvec_stat_cpu = alloc_percpu_gfp(struct lruvec_stat, + GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_cpu) { free_percpu(pn->lruvec_stat_local); kfree(pn); @@ -5209,11 +5219,13 @@ static struct mem_cgroup *mem_cgroup_alloc(void) goto fail; } - memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu); + memcg->vmstats_local = alloc_percpu_gfp(struct memcg_vmstats_percpu, + GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_local) goto fail; - memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu); + memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu, + GFP_KERNEL_ACCOUNT); if (!memcg->vmstats_percpu) goto fail; @@ -5262,7 +5274,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) struct mem_cgroup *memcg; long error = -ENOMEM; + memalloc_use_memcg(parent); memcg = mem_cgroup_alloc(); + memalloc_unuse_memcg(); if (IS_ERR(memcg)) return ERR_CAST(memcg); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 47b8ccb1fb9b..f1aa6433f404 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1648,9 +1648,12 @@ EXPORT_SYMBOL(unpoison_memory); static struct page *new_page(struct page *p, unsigned long private) { - int nid = page_to_nid(p); + struct migration_target_control mtc = { + .nid = page_to_nid(p), + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, + }; - return new_page_nodemask(p, nid, &node_states[N_MEMORY]); + return alloc_migration_target(p, (unsigned long)&mtc); } /* diff --git a/mm/memory.c b/mm/memory.c index c39a13b09602..228efaca75d3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -71,6 +71,8 @@ #include <linux/dax.h> #include <linux/oom.h> #include <linux/numa.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> #include <trace/events/kmem.h> @@ -1800,7 +1802,7 @@ out_unlock: * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * - * This is exactly like vmf_insert_pfn(), except that it allows drivers to + * This is exactly like vmf_insert_pfn(), except that it allows drivers * to override pgprot on a per-page basis. * * This only makes sense for IO mappings, and it makes no sense for @@ -1936,7 +1938,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * - * This is exactly like vmf_insert_mixed(), except that it allows drivers to + * This is exactly like vmf_insert_mixed(), except that it allows drivers * to override pgprot on a per-page basis. * * Typically this function should be used by drivers to set caching- and @@ -2715,7 +2717,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) */ ptep_clear_flush_notify(vma, vmf->address, vmf->pte); page_add_new_anon_rmap(new_page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(new_page, vma); + lru_cache_add_inactive_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary * mmu page tables (such as kvm shadow page tables), we want the @@ -3098,6 +3100,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) int locked; int exclusive = 0; vm_fault_t ret = 0; + void *shadow = NULL; if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; @@ -3149,13 +3152,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out_page; } - /* - * XXX: Move to lru_cache_add() when it - * supports new vs putback - */ - spin_lock_irq(&page_pgdat(page)->lru_lock); - lru_note_cost_page(page); - spin_unlock_irq(&page_pgdat(page)->lru_lock); + shadow = get_shadow_from_swap_cache(entry); + if (shadow) + workingset_refault(page, shadow); lru_cache_add(page); swap_readpage(page, true); @@ -3266,10 +3265,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) /* ksm created a completely new copy */ if (unlikely(page != swapcache && swapcache)) { page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); } else { do_page_add_anon_rmap(page, vma, vmf->address, exclusive); - activate_page(page); } swap_free(entry); @@ -3414,7 +3412,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); setpte: set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); @@ -3672,7 +3670,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, false); @@ -4360,6 +4358,67 @@ retry_pud: return handle_pte_fault(&vmf); } +/** + * mm_account_fault - Do page fault accountings + * + * @regs: the pt_regs struct pointer. When set to NULL, will skip accounting + * of perf event counters, but we'll still do the per-task accounting to + * the task who triggered this page fault. + * @address: the faulted address. + * @flags: the fault flags. + * @ret: the fault retcode. + * + * This will take care of most of the page fault accountings. Meanwhile, it + * will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter + * updates. However note that the handling of PERF_COUNT_SW_PAGE_FAULTS should + * still be in per-arch page fault handlers at the entry of page fault. + */ +static inline void mm_account_fault(struct pt_regs *regs, + unsigned long address, unsigned int flags, + vm_fault_t ret) +{ + bool major; + + /* + * We don't do accounting for some specific faults: + * + * - Unsuccessful faults (e.g. when the address wasn't valid). That + * includes arch_vma_access_permitted() failing before reaching here. + * So this is not a "this many hardware page faults" counter. We + * should use the hw profiling for that. + * + * - Incomplete faults (VM_FAULT_RETRY). They will only be counted + * once they're completed. + */ + if (ret & (VM_FAULT_ERROR | VM_FAULT_RETRY)) + return; + + /* + * We define the fault as a major fault when the final successful fault + * is VM_FAULT_MAJOR, or if it retried (which implies that we couldn't + * handle it immediately previously). + */ + major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED); + + if (major) + current->maj_flt++; + else + current->min_flt++; + + /* + * If the fault is done for GUP, regs will be NULL. We only do the + * accounting for the per thread fault counters who triggered the + * fault, and we skip the perf event updates. + */ + if (!regs) + return; + + if (major) + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + else + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); +} + /* * By the time we get here, we already hold the mm semaphore * @@ -4367,7 +4426,7 @@ retry_pud: * return value. See filemap_fault() and __lock_page_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, - unsigned int flags) + unsigned int flags, struct pt_regs *regs) { vm_fault_t ret; @@ -4408,6 +4467,8 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, mem_cgroup_oom_synchronize(false); } + mm_account_fault(regs, address, flags, ret); + return ret; } EXPORT_SYMBOL_GPL(handle_mm_fault); @@ -4681,7 +4742,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, void *maddr; struct page *page = NULL; - ret = get_user_pages_remote(tsk, mm, addr, 1, + ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, &vma, NULL); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ac6961abaa10..c32ead89c911 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -350,6 +350,16 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, return err; } +#ifdef CONFIG_NUMA +int __weak memory_add_physaddr_to_nid(u64 start) +{ + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif + /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, @@ -844,8 +854,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, node_states_set_node(nid, &arg); if (need_zonelists_rebuild) build_all_zonelists(NULL); - else - zone_pcp_update(zone); + zone_pcp_update(zone); init_per_zone_wmark_min(); @@ -1267,19 +1276,23 @@ found: static struct page *new_node_page(struct page *page, unsigned long private) { - int nid = page_to_nid(page); nodemask_t nmask = node_states[N_MEMORY]; + struct migration_target_control mtc = { + .nid = page_to_nid(page), + .nmask = &nmask, + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, + }; /* * try to allocate from a different node but reuse this node if there * are no other online nodes to be used (e.g. we are offlining a part * of the only existing node) */ - node_clear(nid, nmask); + node_clear(mtc.nid, nmask); if (nodes_empty(nmask)) - node_set(nid, nmask); + node_set(mtc.nid, nmask); - return new_page_nodemask(page, nid, &nmask); + return alloc_migration_target(page, (unsigned long)&mtc); } static int @@ -1747,7 +1760,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) */ rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb); if (rc) - goto done; + return rc; /* remove memmap entry */ firmware_map_remove(start, start + size, "System RAM"); @@ -1771,9 +1784,8 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) try_offline_node(nid); -done: mem_hotplug_done(); - return rc; + return 0; } /** diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b9e85d467352..afaa09ff9f6c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -129,7 +129,7 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES]; /** * numa_map_to_online_node - Find closest online node - * @nid: Node id to start the search + * @node: Node id to start the search * * Lookup the next closest node by distance if @nid is not online. */ @@ -1065,27 +1065,6 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist, return 0; } -/* page allocation callback for NUMA node migration */ -struct page *alloc_new_node_page(struct page *page, unsigned long node) -{ - if (PageHuge(page)) - return alloc_huge_page_node(page_hstate(compound_head(page)), - node); - else if (PageTransHuge(page)) { - struct page *thp; - - thp = alloc_pages_node(node, - (GFP_TRANSHUGE | __GFP_THISNODE), - HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } else - return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE | - __GFP_THISNODE, 0); -} - /* * Migrate pages from one node to a target node. * Returns error or the number of pages not migrated. @@ -1096,6 +1075,10 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, nodemask_t nmask; LIST_HEAD(pagelist); int err = 0; + struct migration_target_control mtc = { + .nid = dest, + .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, + }; nodes_clear(nmask); node_set(source, nmask); @@ -1110,8 +1093,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, flags | MPOL_MF_DISCONTIG_OK, &pagelist); if (!list_empty(&pagelist)) { - err = migrate_pages(&pagelist, alloc_new_node_page, NULL, dest, - MIGRATE_SYNC, MR_SYSCALL); + err = migrate_pages(&pagelist, alloc_migration_target, NULL, + (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL); if (err) putback_movable_pages(&pagelist); } @@ -1632,11 +1615,11 @@ static int kernel_get_mempolicy(int __user *policy, int pval; nodemask_t nodes; - addr = untagged_addr(addr); - if (nmask != NULL && maxnode < nr_node_ids) return -EINVAL; + addr = untagged_addr(addr); + err = do_get_mempolicy(&pval, &nodes, addr, flags); if (err) @@ -1890,7 +1873,7 @@ static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone) * Return a nodemask representing a mempolicy for filtering nodes for * page allocation */ -static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) +nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) { /* Lower zones don't get a nodemask applied for MPOL_BIND */ if (unlikely(policy->mode == MPOL_BIND) && diff --git a/mm/migrate.c b/mm/migrate.c index d179657f8685..5053439be6ab 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1418,22 +1418,35 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, enum migrate_mode mode, int reason) { int retry = 1; + int thp_retry = 1; int nr_failed = 0; int nr_succeeded = 0; + int nr_thp_succeeded = 0; + int nr_thp_failed = 0; + int nr_thp_split = 0; int pass = 0; + bool is_thp = false; struct page *page; struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; - int rc; + int rc, nr_subpages; if (!swapwrite) current->flags |= PF_SWAPWRITE; - for(pass = 0; pass < 10 && retry; pass++) { + for (pass = 0; pass < 10 && (retry || thp_retry); pass++) { retry = 0; + thp_retry = 0; list_for_each_entry_safe(page, page2, from, lru) { retry: + /* + * THP statistics is based on the source huge page. + * Capture required information that might get lost + * during migration. + */ + is_thp = PageTransHuge(page); + nr_subpages = hpage_nr_pages(page); cond_resched(); if (PageHuge(page)) @@ -1464,15 +1477,30 @@ retry: unlock_page(page); if (!rc) { list_safe_reset_next(page, page2, lru); + nr_thp_split++; goto retry; } } + if (is_thp) { + nr_thp_failed++; + nr_failed += nr_subpages; + goto out; + } nr_failed++; goto out; case -EAGAIN: + if (is_thp) { + thp_retry++; + break; + } retry++; break; case MIGRATEPAGE_SUCCESS: + if (is_thp) { + nr_thp_succeeded++; + nr_succeeded += nr_subpages; + break; + } nr_succeeded++; break; default: @@ -1482,19 +1510,27 @@ retry: * removed from migration page list and not * retried in the next outer loop. */ + if (is_thp) { + nr_thp_failed++; + nr_failed += nr_subpages; + break; + } nr_failed++; break; } } } - nr_failed += retry; + nr_failed += retry + thp_retry; + nr_thp_failed += thp_retry; rc = nr_failed; out: - if (nr_succeeded) - count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); - if (nr_failed) - count_vm_events(PGMIGRATE_FAIL, nr_failed); - trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason); + count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); + count_vm_events(PGMIGRATE_FAIL, nr_failed); + count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded); + count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed); + count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split); + trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded, + nr_thp_failed, nr_thp_split, mode, reason); if (!swapwrite) current->flags &= ~PF_SWAPWRITE; @@ -1502,6 +1538,49 @@ out: return rc; } +struct page *alloc_migration_target(struct page *page, unsigned long private) +{ + struct migration_target_control *mtc; + gfp_t gfp_mask; + unsigned int order = 0; + struct page *new_page = NULL; + int nid; + int zidx; + + mtc = (struct migration_target_control *)private; + gfp_mask = mtc->gfp_mask; + nid = mtc->nid; + if (nid == NUMA_NO_NODE) + nid = page_to_nid(page); + + if (PageHuge(page)) { + struct hstate *h = page_hstate(compound_head(page)); + + gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); + return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask); + } + + if (PageTransHuge(page)) { + /* + * clear __GFP_RECLAIM to make the migration callback + * consistent with regular THP allocations. + */ + gfp_mask &= ~__GFP_RECLAIM; + gfp_mask |= GFP_TRANSHUGE; + order = HPAGE_PMD_ORDER; + } + zidx = zone_idx(page_zone(page)); + if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE) + gfp_mask |= __GFP_HIGHMEM; + + new_page = __alloc_pages_nodemask(gfp_mask, order, nid, mtc->nmask); + + if (new_page && PageTransHuge(new_page)) + prep_transhuge_page(new_page); + + return new_page; +} + #ifdef CONFIG_NUMA static int store_status(int __user *status, int start, int value, int nr) @@ -1519,9 +1598,13 @@ static int do_move_pages_to_node(struct mm_struct *mm, struct list_head *pagelist, int node) { int err; + struct migration_target_control mtc = { + .nid = node, + .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, + }; - err = migrate_pages(pagelist, alloc_new_node_page, NULL, node, - MIGRATE_SYNC, MR_SYSCALL); + err = migrate_pages(pagelist, alloc_migration_target, NULL, + (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL); if (err) putback_movable_pages(pagelist); return err; @@ -2168,6 +2251,16 @@ static int migrate_vma_collect_hole(unsigned long start, struct migrate_vma *migrate = walk->private; unsigned long addr; + /* Only allow populating anonymous memory. */ + if (!vma_is_anonymous(walk->vma)) { + for (addr = start; addr < end; addr += PAGE_SIZE) { + migrate->src[migrate->npages] = 0; + migrate->dst[migrate->npages] = 0; + migrate->npages++; + } + return 0; + } + for (addr = start; addr < end; addr += PAGE_SIZE) { migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE; migrate->dst[migrate->npages] = 0; @@ -2260,8 +2353,10 @@ again: pte = *ptep; if (pte_none(pte)) { - mpfn = MIGRATE_PFN_MIGRATE; - migrate->cpages++; + if (vma_is_anonymous(vma)) { + mpfn = MIGRATE_PFN_MIGRATE; + migrate->cpages++; + } goto next; } @@ -2619,7 +2714,7 @@ restore: /** * migrate_vma_setup() - prepare to migrate a range of memory - * @args: contains the vma, start, and and pfns arrays for the migration + * @args: contains the vma, start, and pfns arrays for the migration * * Returns: negative errno on failures, 0 when 0 or more pages were migrated * without an error. @@ -2830,7 +2925,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, inc_mm_counter(mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, addr, false); if (!is_zone_device_page(page)) - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); get_page(page); if (flush) { diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 352bb9f3ecc0..4fc918163dd3 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -166,7 +166,7 @@ static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) /** * mmu_interval_read_begin - Begin a read side critical section against a VA * range - * interval_sub: The interval subscription + * @interval_sub: The interval subscription * * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a * collision-retry scheme similar to seqcount for the VA range under @@ -686,7 +686,7 @@ EXPORT_SYMBOL_GPL(__mmu_notifier_register); /** * mmu_notifier_register - Register a notifier on a mm - * @mn: The notifier to attach + * @subscription: The notifier to attach * @mm: The mm to attach the notifier to * * Must not hold mmap_lock nor any other VM related lock when calling @@ -856,7 +856,7 @@ static void mmu_notifier_free_rcu(struct rcu_head *rcu) /** * mmu_notifier_put - Release the reference on the notifier - * @mn: The notifier to act on + * @subscription: The notifier to act on * * This function must be paired with each mmu_notifier_get(), it releases the * reference obtained by the get. If this is the last reference then process @@ -965,7 +965,8 @@ static int __mmu_interval_notifier_insert( * @interval_sub: Interval subscription to register * @start: Starting virtual address to monitor * @length: Length of the range to monitor - * @mm : mm_struct to attach to + * @mm: mm_struct to attach to + * @ops: Interval notifier operations to be called on matching events * * This function subscribes the interval notifier for notifications from the * mm. Upon return the ops related to mmu_interval_notifier will be called diff --git a/mm/nommu.c b/mm/nommu.c index 340ae7774c13..75a327149af1 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1762,8 +1762,8 @@ EXPORT_SYMBOL_GPL(access_process_vm); * @newsize: The proposed filesize of the inode * * Check the shared mappings on an inode on behalf of a shrinking truncate to - * make sure that that any outstanding VMAs aren't broken and then shrink the - * vm_regions that extend that beyond so that do_mmap() doesn't + * make sure that any outstanding VMAs aren't broken and then shrink the + * vm_regions that extend beyond so that do_mmap() doesn't * automatically grant mappings that are too large. */ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d30ce75f23fb..e90f25d6385d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -196,17 +196,17 @@ static bool is_dump_unreclaim_slabs(void) * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ -unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) +long oom_badness(struct task_struct *p, unsigned long totalpages) { long points; long adj; if (oom_unkillable_task(p)) - return 0; + return LONG_MIN; p = find_lock_task_mm(p); if (!p) - return 0; + return LONG_MIN; /* * Do not even consider tasks which are explicitly marked oom @@ -218,7 +218,7 @@ unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) test_bit(MMF_OOM_SKIP, &p->mm->flags) || in_vfork(p)) { task_unlock(p); - return 0; + return LONG_MIN; } /* @@ -233,11 +233,7 @@ unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) adj *= totalpages / 1000; points += adj; - /* - * Never return 0 for an eligible task regardless of the root bonus and - * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). - */ - return points > 0 ? points : 1; + return points; } static const char * const oom_constraint_text[] = { @@ -310,7 +306,7 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc) static int oom_evaluate_task(struct task_struct *task, void *arg) { struct oom_control *oc = arg; - unsigned long points; + long points; if (oom_unkillable_task(task)) goto next; @@ -336,12 +332,12 @@ static int oom_evaluate_task(struct task_struct *task, void *arg) * killed first if it triggers an oom, then select it. */ if (oom_task_origin(task)) { - points = ULONG_MAX; + points = LONG_MAX; goto select; } points = oom_badness(task, oc->totalpages); - if (!points || points < oc->chosen_points) + if (points == LONG_MIN || points < oc->chosen_points) goto next; select: @@ -365,6 +361,8 @@ abort: */ static void select_bad_process(struct oom_control *oc) { + oc->chosen_points = LONG_MIN; + if (is_memcg_oom(oc)) mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); else { @@ -863,6 +861,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) p = find_lock_task_mm(victim); if (!p) { + pr_info("%s: OOM victim %d (%s) is already exiting. Skip killing the task\n", + message, task_pid_nr(victim), victim->comm); put_task_struct(victim); return; } else if (victim != p) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 167732f4d124..8b7d0ecf30b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4282,7 +4282,7 @@ retry: /* * If an allocation failed after direct reclaim, it could be because * pages are pinned on the per-cpu lists or in high alloc reserves. - * Shrink them them and try again + * Shrink them and try again */ if (!page && !drained) { unreserve_highatomic_pageblock(ac, false); @@ -6192,7 +6192,7 @@ static int zone_batchsize(struct zone *zone) * locking. * * Any new users of pcp->batch and pcp->high should ensure they can cope with - * those fields changing asynchronously (acording the the above rule). + * those fields changing asynchronously (acording to the above rule). * * mutex_is_locked(&pcp_batch_high_lock) required when calling this function * outside of boot time (or some other assurance that no concurrent updaters @@ -8203,7 +8203,7 @@ void *__init alloc_large_system_hash(const char *tablename, * race condition. So you can't expect this function should be exact. * * Returns a page without holding a reference. If the caller wants to - * dereference that page (e.g., dumping), it has to make sure that that it + * dereference that page (e.g., dumping), it has to make sure that it * cannot get removed (e.g., via memory unplug) concurrently. * */ @@ -8347,6 +8347,10 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, unsigned long pfn = start; unsigned int tries = 0; int ret = 0; + struct migration_target_control mtc = { + .nid = zone_to_nid(cc->zone), + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, + }; migrate_prep(); @@ -8373,8 +8377,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, &cc->migratepages); cc->nr_migratepages -= nr_reclaimed; - ret = migrate_pages(&cc->migratepages, alloc_migrate_target, - NULL, 0, cc->mode, MR_CONTIG_RANGE); + ret = migrate_pages(&cc->migratepages, alloc_migration_target, + NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE); } if (ret < 0) { putback_movable_pages(&cc->migratepages); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index f6d07c5f0d34..242c03121d73 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -306,8 +306,3 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, return pfn < end_pfn ? -EBUSY : 0; } - -struct page *alloc_migrate_target(struct page *page, unsigned long private) -{ - return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]); -} diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 0468ba500bd4..18b768ac7dca 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -6,6 +6,25 @@ #include <linux/percpu.h> /* + * There are two chunk types: root and memcg-aware. + * Chunks of each type have separate slots list. + * + * Memcg-aware chunks have an attached vector of obj_cgroup pointers, which is + * used to store memcg membership data of a percpu object. Obj_cgroups are + * ref-counted pointers to a memory cgroup with an ability to switch dynamically + * to the parent memory cgroup. This allows to reclaim a deleted memory cgroup + * without reclaiming of all outstanding objects, which hold a reference at it. + */ +enum pcpu_chunk_type { + PCPU_CHUNK_ROOT, +#ifdef CONFIG_MEMCG_KMEM + PCPU_CHUNK_MEMCG, +#endif + PCPU_NR_CHUNK_TYPES, + PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES +}; + +/* * pcpu_block_md is the metadata block struct. * Each chunk's bitmap is split into a number of full blocks. * All units are in terms of bits. @@ -54,6 +73,9 @@ struct pcpu_chunk { int end_offset; /* additional area required to have the region end page aligned */ +#ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup **obj_cgroups; /* vector of object cgroups */ +#endif int nr_pages; /* # of pages served by this chunk */ int nr_populated; /* # of populated pages */ @@ -63,7 +85,7 @@ struct pcpu_chunk { extern spinlock_t pcpu_lock; -extern struct list_head *pcpu_slot; +extern struct list_head *pcpu_chunk_lists; extern int pcpu_nr_slots; extern int pcpu_nr_empty_pop_pages; @@ -106,6 +128,37 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk) return pcpu_nr_pages_to_map_bits(chunk->nr_pages); } +#ifdef CONFIG_MEMCG_KMEM +static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk) +{ + if (chunk->obj_cgroups) + return PCPU_CHUNK_MEMCG; + return PCPU_CHUNK_ROOT; +} + +static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type) +{ + return chunk_type == PCPU_CHUNK_MEMCG; +} + +#else +static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk) +{ + return PCPU_CHUNK_ROOT; +} + +static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type) +{ + return false; +} +#endif + +static inline struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type) +{ + return &pcpu_chunk_lists[pcpu_nr_slots * + pcpu_is_memcg_chunk(chunk_type)]; +} + #ifdef CONFIG_PERCPU_STATS #include <linux/spinlock.h> diff --git a/mm/percpu-km.c b/mm/percpu-km.c index 20d2b69a13b0..35c9941077ee 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -44,7 +44,8 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, /* nada */ } -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, + gfp_t gfp) { const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; struct pcpu_chunk *chunk; @@ -52,7 +53,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) unsigned long flags; int i; - chunk = pcpu_alloc_chunk(gfp); + chunk = pcpu_alloc_chunk(type, gfp); if (!chunk) return NULL; diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index 32558063c3f9..c8400a2adbc2 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -34,11 +34,15 @@ static int find_max_nr_alloc(void) { struct pcpu_chunk *chunk; int slot, max_nr_alloc; + enum pcpu_chunk_type type; max_nr_alloc = 0; - for (slot = 0; slot < pcpu_nr_slots; slot++) - list_for_each_entry(chunk, &pcpu_slot[slot], list) - max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc); + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + for (slot = 0; slot < pcpu_nr_slots; slot++) + list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot], + list) + max_nr_alloc = max(max_nr_alloc, + chunk->nr_alloc); return max_nr_alloc; } @@ -129,6 +133,9 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, P("cur_min_alloc", cur_min_alloc); P("cur_med_alloc", cur_med_alloc); P("cur_max_alloc", cur_max_alloc); +#ifdef CONFIG_MEMCG_KMEM + P("memcg_aware", pcpu_is_memcg_chunk(pcpu_chunk_type(chunk))); +#endif seq_putc(m, '\n'); } @@ -137,6 +144,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) struct pcpu_chunk *chunk; int slot, max_nr_alloc; int *buffer; + enum pcpu_chunk_type type; alloc_buffer: spin_lock_irq(&pcpu_lock); @@ -202,18 +210,18 @@ alloc_buffer: chunk_map_stats(m, pcpu_reserved_chunk, buffer); } - for (slot = 0; slot < pcpu_nr_slots; slot++) { - list_for_each_entry(chunk, &pcpu_slot[slot], list) { - if (chunk == pcpu_first_chunk) { - seq_puts(m, "Chunk: <- First Chunk\n"); - chunk_map_stats(m, chunk, buffer); - - - } else { - seq_puts(m, "Chunk:\n"); - chunk_map_stats(m, chunk, buffer); + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) { + for (slot = 0; slot < pcpu_nr_slots; slot++) { + list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot], + list) { + if (chunk == pcpu_first_chunk) { + seq_puts(m, "Chunk: <- First Chunk\n"); + chunk_map_stats(m, chunk, buffer); + } else { + seq_puts(m, "Chunk:\n"); + chunk_map_stats(m, chunk, buffer); + } } - } } diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index a2b395acef89..e46f7a6917f9 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -328,12 +328,13 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, pcpu_free_pages(chunk, pages, page_start, page_end); } -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, + gfp_t gfp) { struct pcpu_chunk *chunk; struct vm_struct **vms; - chunk = pcpu_alloc_chunk(gfp); + chunk = pcpu_alloc_chunk(type, gfp); if (!chunk) return NULL; diff --git a/mm/percpu.c b/mm/percpu.c index b626766160ce..f4709629e6de 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -37,9 +37,14 @@ * takes care of normal allocations. * * The allocator organizes chunks into lists according to free size and - * tries to allocate from the fullest chunk first. Each chunk is managed - * by a bitmap with metadata blocks. The allocation map is updated on - * every allocation and free to reflect the current state while the boundary + * memcg-awareness. To make a percpu allocation memcg-aware the __GFP_ACCOUNT + * flag should be passed. All memcg-aware allocations are sharing one set + * of chunks and all unaccounted allocations and allocations performed + * by processes belonging to the root memory cgroup are using the second set. + * + * The allocator tries to allocate from the fullest chunk first. Each chunk + * is managed by a bitmap with metadata blocks. The allocation map is updated + * on every allocation and free to reflect the current state while the boundary * map is only updated on allocation. Each metadata block contains * information to help mitigate the need to iterate over large portions * of the bitmap. The reverse mapping from page to chunk is stored in @@ -81,6 +86,7 @@ #include <linux/kmemleak.h> #include <linux/sched.h> #include <linux/sched/mm.h> +#include <linux/memcontrol.h> #include <asm/cacheflush.h> #include <asm/sections.h> @@ -160,7 +166,7 @@ struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init; DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ -struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */ +struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */ /* chunks which need their map areas extended, protected by pcpu_lock */ static LIST_HEAD(pcpu_map_extend_chunks); @@ -500,6 +506,9 @@ static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot, bool move_front) { if (chunk != pcpu_reserved_chunk) { + struct list_head *pcpu_slot; + + pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); if (move_front) list_move(&chunk->list, &pcpu_slot[slot]); else @@ -1211,11 +1220,14 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits, * * This function determines the size of an allocation to free using * the boundary bitmap and clears the allocation map. + * + * RETURNS: + * Number of freed bytes. */ -static void pcpu_free_area(struct pcpu_chunk *chunk, int off) +static int pcpu_free_area(struct pcpu_chunk *chunk, int off) { struct pcpu_block_md *chunk_md = &chunk->chunk_md; - int bit_off, bits, end, oslot; + int bit_off, bits, end, oslot, freed; lockdep_assert_held(&pcpu_lock); pcpu_stats_area_dealloc(chunk); @@ -1230,8 +1242,10 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int off) bits = end - bit_off; bitmap_clear(chunk->alloc_map, bit_off, bits); + freed = bits * PCPU_MIN_ALLOC_SIZE; + /* update metadata */ - chunk->free_bytes += bits * PCPU_MIN_ALLOC_SIZE; + chunk->free_bytes += freed; /* update first free bit */ chunk_md->first_free = min(chunk_md->first_free, bit_off); @@ -1239,6 +1253,8 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int off) pcpu_block_update_hint_free(chunk, bit_off, bits); pcpu_chunk_relocate(chunk, oslot); + + return freed; } static void pcpu_init_md_block(struct pcpu_block_md *block, int nr_bits) @@ -1334,6 +1350,10 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, panic("%s: Failed to allocate %zu bytes\n", __func__, alloc_size); +#ifdef CONFIG_MEMCG_KMEM + /* first chunk isn't memcg-aware */ + chunk->obj_cgroups = NULL; +#endif pcpu_init_md_blocks(chunk); /* manage populated page bitmap */ @@ -1373,7 +1393,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, return chunk; } -static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) +static struct pcpu_chunk *pcpu_alloc_chunk(enum pcpu_chunk_type type, gfp_t gfp) { struct pcpu_chunk *chunk; int region_bits; @@ -1401,6 +1421,16 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) if (!chunk->md_blocks) goto md_blocks_fail; +#ifdef CONFIG_MEMCG_KMEM + if (pcpu_is_memcg_chunk(type)) { + chunk->obj_cgroups = + pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) * + sizeof(struct obj_cgroup *), gfp); + if (!chunk->obj_cgroups) + goto objcg_fail; + } +#endif + pcpu_init_md_blocks(chunk); /* init metadata */ @@ -1408,6 +1438,10 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) return chunk; +#ifdef CONFIG_MEMCG_KMEM +objcg_fail: + pcpu_mem_free(chunk->md_blocks); +#endif md_blocks_fail: pcpu_mem_free(chunk->bound_map); bound_map_fail: @@ -1422,6 +1456,9 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) { if (!chunk) return; +#ifdef CONFIG_MEMCG_KMEM + pcpu_mem_free(chunk->obj_cgroups); +#endif pcpu_mem_free(chunk->md_blocks); pcpu_mem_free(chunk->bound_map); pcpu_mem_free(chunk->alloc_map); @@ -1498,7 +1535,8 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int page_start, int page_end, gfp_t gfp); static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int page_start, int page_end); -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, + gfp_t gfp); static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); static struct page *pcpu_addr_to_page(void *addr); static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); @@ -1540,6 +1578,87 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) return pcpu_get_page_chunk(pcpu_addr_to_page(addr)); } +#ifdef CONFIG_MEMCG_KMEM +static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, + struct obj_cgroup **objcgp) +{ + struct obj_cgroup *objcg; + + if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT) || + memcg_kmem_bypass()) + return PCPU_CHUNK_ROOT; + + objcg = get_obj_cgroup_from_current(); + if (!objcg) + return PCPU_CHUNK_ROOT; + + if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) { + obj_cgroup_put(objcg); + return PCPU_FAIL_ALLOC; + } + + *objcgp = objcg; + return PCPU_CHUNK_MEMCG; +} + +static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, + struct pcpu_chunk *chunk, int off, + size_t size) +{ + if (!objcg) + return; + + if (chunk) { + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg; + + rcu_read_lock(); + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, + size * num_possible_cpus()); + rcu_read_unlock(); + } else { + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); + obj_cgroup_put(objcg); + } +} + +static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) +{ + struct obj_cgroup *objcg; + + if (!pcpu_is_memcg_chunk(pcpu_chunk_type(chunk))) + return; + + objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT]; + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL; + + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); + + rcu_read_lock(); + mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, + -(size * num_possible_cpus())); + rcu_read_unlock(); + + obj_cgroup_put(objcg); +} + +#else /* CONFIG_MEMCG_KMEM */ +static enum pcpu_chunk_type +pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp) +{ + return PCPU_CHUNK_ROOT; +} + +static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, + struct pcpu_chunk *chunk, int off, + size_t size) +{ +} + +static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) +{ +} +#endif /* CONFIG_MEMCG_KMEM */ + /** * pcpu_alloc - the percpu allocator * @size: size of area to allocate in bytes @@ -1561,6 +1680,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t pcpu_gfp; bool is_atomic; bool do_warn; + enum pcpu_chunk_type type; + struct list_head *pcpu_slot; + struct obj_cgroup *objcg = NULL; static int warn_limit = 10; struct pcpu_chunk *chunk, *next; const char *err; @@ -1595,16 +1717,23 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, return NULL; } + type = pcpu_memcg_pre_alloc_hook(size, gfp, &objcg); + if (unlikely(type == PCPU_FAIL_ALLOC)) + return NULL; + pcpu_slot = pcpu_chunk_list(type); + if (!is_atomic) { /* * pcpu_balance_workfn() allocates memory under this mutex, * and it may wait for memory reclaim. Allow current task * to become OOM victim, in case of memory pressure. */ - if (gfp & __GFP_NOFAIL) + if (gfp & __GFP_NOFAIL) { mutex_lock(&pcpu_alloc_mutex); - else if (mutex_lock_killable(&pcpu_alloc_mutex)) + } else if (mutex_lock_killable(&pcpu_alloc_mutex)) { + pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); return NULL; + } } spin_lock_irqsave(&pcpu_lock, flags); @@ -1659,7 +1788,7 @@ restart: } if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { - chunk = pcpu_create_chunk(pcpu_gfp); + chunk = pcpu_create_chunk(type, pcpu_gfp); if (!chunk) { err = "failed to allocate new chunk"; goto fail; @@ -1716,6 +1845,8 @@ area_found: trace_percpu_alloc_percpu(reserved, is_atomic, size, align, chunk->base_addr, off, ptr); + pcpu_memcg_post_alloc_hook(objcg, chunk, off, size); + return ptr; fail_unlock: @@ -1737,6 +1868,9 @@ fail: } else { mutex_unlock(&pcpu_alloc_mutex); } + + pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); + return NULL; } @@ -1796,8 +1930,8 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) } /** - * pcpu_balance_workfn - manage the amount of free chunks and populated pages - * @work: unused + * __pcpu_balance_workfn - manage the amount of free chunks and populated pages + * @type: chunk type * * Reclaim all fully free chunks except for the first one. This is also * responsible for maintaining the pool of empty populated pages. However, @@ -1806,11 +1940,12 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) * allocation causes the failure as it is possible that requests can be * serviced from already backed regions. */ -static void pcpu_balance_workfn(struct work_struct *work) +static void __pcpu_balance_workfn(enum pcpu_chunk_type type) { /* gfp flags passed to underlying allocators */ const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; LIST_HEAD(to_free); + struct list_head *pcpu_slot = pcpu_chunk_list(type); struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; int slot, nr_to_pop, ret; @@ -1908,7 +2043,7 @@ retry_pop: if (nr_to_pop) { /* ran out of chunks to populate, create a new one and retry */ - chunk = pcpu_create_chunk(gfp); + chunk = pcpu_create_chunk(type, gfp); if (chunk) { spin_lock_irq(&pcpu_lock); pcpu_chunk_relocate(chunk, -1); @@ -1921,6 +2056,20 @@ retry_pop: } /** + * pcpu_balance_workfn - manage the amount of free chunks and populated pages + * @work: unused + * + * Call __pcpu_balance_workfn() for each chunk type. + */ +static void pcpu_balance_workfn(struct work_struct *work) +{ + enum pcpu_chunk_type type; + + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + __pcpu_balance_workfn(type); +} + +/** * free_percpu - free percpu area * @ptr: pointer to area to free * @@ -1934,8 +2083,9 @@ void free_percpu(void __percpu *ptr) void *addr; struct pcpu_chunk *chunk; unsigned long flags; - int off; + int size, off; bool need_balance = false; + struct list_head *pcpu_slot; if (!ptr) return; @@ -1949,7 +2099,11 @@ void free_percpu(void __percpu *ptr) chunk = pcpu_chunk_addr_search(addr); off = addr - chunk->base_addr; - pcpu_free_area(chunk, off); + size = pcpu_free_area(chunk, off); + + pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); + + pcpu_memcg_free_hook(chunk, off, size); /* if there are more than one fully free chunks, wake up grim reaper */ if (chunk->free_bytes == pcpu_unit_size) { @@ -2260,6 +2414,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, int map_size; unsigned long tmp_addr; size_t alloc_size; + enum pcpu_chunk_type type; #define PCPU_SETUP_BUG_ON(cond) do { \ if (unlikely(cond)) { \ @@ -2377,13 +2532,18 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, * empty chunks. */ pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; - pcpu_slot = memblock_alloc(pcpu_nr_slots * sizeof(pcpu_slot[0]), - SMP_CACHE_BYTES); - if (!pcpu_slot) + pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots * + sizeof(pcpu_chunk_lists[0]) * + PCPU_NR_CHUNK_TYPES, + SMP_CACHE_BYTES); + if (!pcpu_chunk_lists) panic("%s: Failed to allocate %zu bytes\n", __func__, - pcpu_nr_slots * sizeof(pcpu_slot[0])); - for (i = 0; i < pcpu_nr_slots; i++) - INIT_LIST_HEAD(&pcpu_slot[i]); + pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]) * + PCPU_NR_CHUNK_TYPES); + + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + for (i = 0; i < pcpu_nr_slots; i++) + INIT_LIST_HEAD(&pcpu_chunk_list(type)[i]); /* * The end of the static region needs to be aligned with the diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index cc85ce81914a..29c052099aff 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -105,7 +105,7 @@ static int process_vm_rw_single_vec(unsigned long addr, * current/current->mm */ mmap_read_lock(mm); - pinned_pages = pin_user_pages_remote(task, mm, pa, pinned_pages, + pinned_pages = pin_user_pages_remote(mm, pa, pinned_pages, flags, process_pages, NULL, &locked); if (locked) diff --git a/mm/rmap.c b/mm/rmap.c index 5fe2dedce1fc..6cce9ef06753 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1469,7 +1469,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * do this outside rmap routines. */ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); - if (huge_pmd_unshare(mm, &address, pvmw.pte)) { + if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { /* * huge_pmd_unshare unmapped an entire PMD * page. There is no way of knowing exactly diff --git a/mm/shmem.c b/mm/shmem.c index eb6b36d89722..271548ca20f3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1434,7 +1434,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) list_add(&info->swaplist, &shmem_swaplist); if (add_to_swap_cache(page, swap, - __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN) == 0) { + __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, + NULL) == 0) { spin_lock_irq(&info->lock); shmem_recalc_inode(inode); info->swapped++; @@ -1685,7 +1686,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, * Swap in the page pointed to by *pagep. * Caller has to make sure that *pagep contains a valid swapped page. * Returns 0 and the page in pagep if success. On failure, returns the - * the error code and NULL in *pagep. + * error code and NULL in *pagep. */ static int shmem_swapin_page(struct inode *inode, pgoff_t index, struct page **pagep, enum sgp_type sgp, diff --git a/mm/slab_common.c b/mm/slab_common.c index a513f3237155..f9ccd5dc13f3 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -419,7 +419,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) /* * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the * @slab_caches_to_rcu_destroy list. The slab pages are freed - * through RCU and and the associated kmem_cache are dereferenced + * through RCU and the associated kmem_cache are dereferenced * while freeing the pages, so the kmem_caches should be freed only * after the pending RCU operations are finished. As rcu_barrier() * is a pretty slow operation, we batch all pending destructions diff --git a/mm/swap.c b/mm/swap.c index de257c0a89b1..9285e60c7d6e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -476,23 +476,24 @@ void lru_cache_add(struct page *page) EXPORT_SYMBOL(lru_cache_add); /** - * lru_cache_add_active_or_unevictable + * lru_cache_add_inactive_or_unevictable * @page: the page to be added to LRU * @vma: vma in which page is mapped for determining reclaimability * - * Place @page on the active or unevictable LRU list, depending on its + * Place @page on the inactive or unevictable LRU list, depending on its * evictability. Note that if the page is not evictable, it goes * directly back onto it's zone's unevictable list, it does NOT use a * per cpu pagevec. */ -void lru_cache_add_active_or_unevictable(struct page *page, +void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma) { + bool unevictable; + VM_BUG_ON_PAGE(PageLRU(page), page); - if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) - SetPageActive(page); - else if (!TestSetPageMlocked(page)) { + unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED; + if (unlikely(unevictable) && !TestSetPageMlocked(page)) { /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte diff --git a/mm/swap_state.c b/mm/swap_state.c index e82f4f8b1f63..b73aabdfd35a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -106,16 +106,32 @@ void show_swap_cache_info(void) printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); } +void *get_shadow_from_swap_cache(swp_entry_t entry) +{ + struct address_space *address_space = swap_address_space(entry); + pgoff_t idx = swp_offset(entry); + struct page *page; + + page = find_get_entry(address_space, idx); + if (xa_is_value(page)) + return page; + if (page) + put_page(page); + return NULL; +} + /* * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ -int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp) +int add_to_swap_cache(struct page *page, swp_entry_t entry, + gfp_t gfp, void **shadowp) { struct address_space *address_space = swap_address_space(entry); pgoff_t idx = swp_offset(entry); XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page)); unsigned long i, nr = hpage_nr_pages(page); + void *old; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapCache(page), page); @@ -125,16 +141,25 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp) SetPageSwapCache(page); do { + unsigned long nr_shadows = 0; + xas_lock_irq(&xas); xas_create_range(&xas); if (xas_error(&xas)) goto unlock; for (i = 0; i < nr; i++) { VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); + old = xas_load(&xas); + if (xa_is_value(old)) { + nr_shadows++; + if (shadowp) + *shadowp = old; + } set_page_private(page + i, entry.val + i); xas_store(&xas, page); xas_next(&xas); } + address_space->nrexceptional -= nr_shadows; address_space->nrpages += nr; __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); ADD_CACHE_INFO(add_total, nr); @@ -154,7 +179,8 @@ unlock: * This must be called only on pages that have * been verified to be in the swap cache. */ -void __delete_from_swap_cache(struct page *page, swp_entry_t entry) +void __delete_from_swap_cache(struct page *page, + swp_entry_t entry, void *shadow) { struct address_space *address_space = swap_address_space(entry); int i, nr = hpage_nr_pages(page); @@ -166,12 +192,14 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry) VM_BUG_ON_PAGE(PageWriteback(page), page); for (i = 0; i < nr; i++) { - void *entry = xas_store(&xas, NULL); + void *entry = xas_store(&xas, shadow); VM_BUG_ON_PAGE(entry != page, entry); set_page_private(page + i, 0); xas_next(&xas); } ClearPageSwapCache(page); + if (shadow) + address_space->nrexceptional += nr; address_space->nrpages -= nr; __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); ADD_CACHE_INFO(del_total, nr); @@ -208,7 +236,7 @@ int add_to_swap(struct page *page) * Add it to the swap cache. */ err = add_to_swap_cache(page, entry, - __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); + __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); if (err) /* * add_to_swap_cache() doesn't return -EEXIST, so we can safely @@ -246,13 +274,44 @@ void delete_from_swap_cache(struct page *page) struct address_space *address_space = swap_address_space(entry); xa_lock_irq(&address_space->i_pages); - __delete_from_swap_cache(page, entry); + __delete_from_swap_cache(page, entry, NULL); xa_unlock_irq(&address_space->i_pages); put_swap_page(page, entry); page_ref_sub(page, hpage_nr_pages(page)); } +void clear_shadow_from_swap_cache(int type, unsigned long begin, + unsigned long end) +{ + unsigned long curr = begin; + void *old; + + for (;;) { + unsigned long nr_shadows = 0; + swp_entry_t entry = swp_entry(type, curr); + struct address_space *address_space = swap_address_space(entry); + XA_STATE(xas, &address_space->i_pages, curr); + + xa_lock_irq(&address_space->i_pages); + xas_for_each(&xas, old, end) { + if (!xa_is_value(old)) + continue; + xas_store(&xas, NULL); + nr_shadows++; + } + address_space->nrexceptional -= nr_shadows; + xa_unlock_irq(&address_space->i_pages); + + /* search the next swapcache until we meet end */ + curr >>= SWAP_ADDRESS_SPACE_SHIFT; + curr++; + curr <<= SWAP_ADDRESS_SPACE_SHIFT; + if (curr > end) + break; + } +} + /* * If we are the only user, then try to free up the swap cache. * @@ -361,6 +420,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, { struct swap_info_struct *si; struct page *page; + void *shadow = NULL; *new_page_allocated = false; @@ -429,7 +489,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, __SetPageSwapBacked(page); /* May fail (-ENOMEM) if XArray node allocation failed. */ - if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK)) { + if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) { put_swap_page(page, entry); goto fail_unlock; } @@ -439,10 +499,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, goto fail_unlock; } - /* XXX: Move to lru_cache_add() when it supports new vs putback */ - spin_lock_irq(&page_pgdat(page)->lru_lock); - lru_note_cost_page(page); - spin_unlock_irq(&page_pgdat(page)->lru_lock); + if (shadow) + workingset_refault(page, shadow); /* Caller will initiate read into locked page */ SetPageWorkingset(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 6c26916e95fd..e653eea1eb88 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -696,6 +696,7 @@ static void add_to_avail_list(struct swap_info_struct *p) static void swap_range_free(struct swap_info_struct *si, unsigned long offset, unsigned int nr_entries) { + unsigned long begin = offset; unsigned long end = offset + nr_entries - 1; void (*swap_slot_free_notify)(struct block_device *, unsigned long); @@ -721,6 +722,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, swap_slot_free_notify(si->bdev, offset); offset++; } + clear_shadow_from_swap_cache(si->type, begin, end); } static void set_cluster_next(struct swap_info_struct *si, unsigned long next) @@ -1915,7 +1917,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, page_add_anon_rmap(page, vma, addr, false); } else { /* ksm created a completely new copy */ page_add_new_anon_rmap(page, vma, addr, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); } swap_free(entry); /* diff --git a/mm/usercopy.c b/mm/usercopy.c index 660717a1ea5c..b3de3c4eefba 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -43,7 +43,7 @@ static noinline int check_stack_object(const void *obj, unsigned long len) /* * Reject: object partially overlaps the stack (passing the - * the check above means at least one end is within the stack, + * check above means at least one end is within the stack, * so if this check fails, the other end is outside the stack). */ if (obj < stack || stackend < obj + len) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index b80419320c7d..9a3d451402d7 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -123,7 +123,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, inc_mm_counter(dst_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, dst_vma, dst_addr, false); - lru_cache_add_active_or_unevictable(page, dst_vma); + lru_cache_add_inactive_or_unevictable(page, dst_vma); set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); diff --git a/mm/vmscan.c b/mm/vmscan.c index 72da290b171b..738115ed75e2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -854,6 +854,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, { unsigned long flags; int refcount; + void *shadow = NULL; BUG_ON(!PageLocked(page)); BUG_ON(mapping != page_mapping(page)); @@ -896,13 +897,13 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; mem_cgroup_swapout(page, swap); - __delete_from_swap_cache(page, swap); + if (reclaimed && !mapping_exiting(mapping)) + shadow = workingset_eviction(page, target_memcg); + __delete_from_swap_cache(page, swap, shadow); xa_unlock_irqrestore(&mapping->i_pages, flags); put_swap_page(page, swap); - workingset_eviction(page, target_memcg); } else { void (*freepage)(struct page *); - void *shadow = NULL; freepage = mapping->a_ops->freepage; /* @@ -998,8 +999,6 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_RECLAIM; if (referenced_ptes) { - if (PageSwapBacked(page)) - return PAGEREF_ACTIVATE; /* * All mapped pages start out with page table * references from the instantiating fault, so we need @@ -1022,7 +1021,7 @@ static enum page_references page_check_references(struct page *page, /* * Activate file-backed executable pages after first usage. */ - if (vm_flags & VM_EXEC) + if ((vm_flags & VM_EXEC) && !PageSwapBacked(page)) return PAGEREF_ACTIVATE; return PAGEREF_KEEP; @@ -2685,7 +2684,10 @@ again: if (!sc->force_deactivate) { unsigned long refaults; - if (inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_ANON); + if (refaults != target_lruvec->refaults[0] || + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) sc->may_deactivate |= DEACTIVATE_ANON; else sc->may_deactivate &= ~DEACTIVATE_ANON; @@ -2696,8 +2698,8 @@ again: * rid of any stale active pages quickly. */ refaults = lruvec_page_state(target_lruvec, - WORKINGSET_ACTIVATE); - if (refaults != target_lruvec->refaults || + WORKINGSET_ACTIVATE_FILE); + if (refaults != target_lruvec->refaults[1] || inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) sc->may_deactivate |= DEACTIVATE_FILE; else @@ -2796,7 +2798,7 @@ again: set_bit(PGDAT_DIRTY, &pgdat->flags); /* - * If kswapd scans pages marked marked for immediate + * If kswapd scans pages marked for immediate * reclaim and under writeback (nr_immediate), it * implies that pages are cycling through the LRU * faster than they are written so also forcibly stall. @@ -2974,8 +2976,10 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat) unsigned long refaults; target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat); - refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE); - target_lruvec->refaults = refaults; + refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); + target_lruvec->refaults[0] = refaults; + refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_FILE); + target_lruvec->refaults[1] = refaults; } /* @@ -3369,7 +3373,7 @@ static bool pgdat_watermark_boosted(pg_data_t *pgdat, int highest_zoneidx) /* * Check for watermark boosts top-down as the higher zones * are more likely to be boosted. Both watermarks and boosts - * should not be checked at the time time as reclaim would + * should not be checked at the same time as reclaim would * start prematurely when there is no boosting and a lower * zone is balanced. */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 2b866cbab11d..727a26d1ec1d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1096,6 +1096,24 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); } +/* + * Calculates external fragmentation within a zone wrt the given order. + * It is defined as the percentage of pages found in blocks of size + * less than 1 << order. It returns values in range [0, 100]. + */ +unsigned int extfrag_for_order(struct zone *zone, unsigned int order) +{ + struct contig_page_info info; + + fill_contig_page_info(zone, order, &info); + if (info.free_pages == 0) + return 0; + + return div_u64((info.free_pages - + (info.free_blocks_suitable << order)) * 100, + info.free_pages); +} + /* Same as __fragmentation index but allocs contig_page_info on stack */ int fragmentation_index(struct zone *zone, unsigned int order) { @@ -1167,9 +1185,12 @@ const char * const vmstat_text[] = { "nr_isolated_anon", "nr_isolated_file", "workingset_nodes", - "workingset_refault", - "workingset_activate", - "workingset_restore", + "workingset_refault_anon", + "workingset_refault_file", + "workingset_activate_anon", + "workingset_activate_file", + "workingset_restore_anon", + "workingset_restore_file", "workingset_nodereclaim", "nr_anon_pages", "nr_mapped", @@ -1256,6 +1277,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_MIGRATION "pgmigrate_success", "pgmigrate_fail", + "thp_migration_success", + "thp_migration_fail", + "thp_migration_split", #endif #ifdef CONFIG_COMPACTION "compact_migrate_scanned", diff --git a/mm/workingset.c b/mm/workingset.c index b199726924dd..8cbe4e3cbe5c 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -6,6 +6,7 @@ */ #include <linux/memcontrol.h> +#include <linux/mm_inline.h> #include <linux/writeback.h> #include <linux/shmem_fs.h> #include <linux/pagemap.h> @@ -280,6 +281,7 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) */ void workingset_refault(struct page *page, void *shadow) { + bool file = page_is_file_lru(page); struct mem_cgroup *eviction_memcg; struct lruvec *eviction_lruvec; unsigned long refault_distance; @@ -346,27 +348,34 @@ void workingset_refault(struct page *page, void *shadow) memcg = page_memcg(page); lruvec = mem_cgroup_lruvec(memcg, pgdat); - inc_lruvec_state(lruvec, WORKINGSET_REFAULT); + inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); /* * Compare the distance to the existing workingset size. We * don't activate pages that couldn't stay resident even if - * all the memory was available to the page cache. Whether - * cache can compete with anon or not depends on having swap. + * all the memory was available to the workingset. Whether + * workingset competition needs to consider anon or not depends + * on having swap. */ workingset_size = lruvec_page_state(eviction_lruvec, NR_ACTIVE_FILE); - if (mem_cgroup_get_nr_swap_pages(memcg) > 0) { + if (!file) { workingset_size += lruvec_page_state(eviction_lruvec, - NR_INACTIVE_ANON); + NR_INACTIVE_FILE); + } + if (mem_cgroup_get_nr_swap_pages(memcg) > 0) { workingset_size += lruvec_page_state(eviction_lruvec, NR_ACTIVE_ANON); + if (file) { + workingset_size += lruvec_page_state(eviction_lruvec, + NR_INACTIVE_ANON); + } } if (refault_distance > workingset_size) goto out; SetPageActive(page); workingset_age_nonresident(lruvec, hpage_nr_pages(page)); - inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE); + inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); /* Page was active prior to eviction */ if (workingset) { @@ -375,7 +384,7 @@ void workingset_refault(struct page *page, void *shadow) spin_lock_irq(&page_pgdat(page)->lru_lock); lru_note_cost_page(page); spin_unlock_irq(&page_pgdat(page)->lru_lock); - inc_lruvec_state(lruvec, WORKINGSET_RESTORE); + inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); } out: rcu_read_unlock(); diff --git a/mm/zpool.c b/mm/zpool.c index 863669212070..3744a2d1a624 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -239,15 +239,15 @@ const char *zpool_get_type(struct zpool *zpool) } /** - * zpool_malloc_support_movable() - Check if the zpool support - * allocate movable memory + * zpool_malloc_support_movable() - Check if the zpool supports + * allocating movable memory * @zpool: The zpool to check * - * This returns if the zpool support allocate movable memory. + * This returns if the zpool supports allocating movable memory. * * Implementations must guarantee this to be thread-safe. * - * Returns: true if if the zpool support allocate movable memory, false if not + * Returns: true if the zpool supports allocating movable memory, false if not */ bool zpool_malloc_support_movable(struct zpool *zpool) { diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 952a01e45c6a..c36fdff9a371 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -79,7 +79,7 @@ /* * Object location (<PFN>, <obj_idx>) is encoded as - * as single (unsigned long) handle value. + * a single (unsigned long) handle value. * * Note that object index <obj_idx> starts from 0. * diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index d7bec7adc267..f36f9a3a4e20 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -13,7 +13,7 @@ config CEPH_LIB common functionality to both the Ceph filesystem and to the rados block device (rbd). - More information at http://ceph.newdream.net/. + More information at https://ceph.io/. If unsure, say N. diff --git a/net/ceph/ceph_hash.c b/net/ceph/ceph_hash.c index 9a5850f264ed..81e1e006c540 100644 --- a/net/ceph/ceph_hash.c +++ b/net/ceph/ceph_hash.c @@ -4,7 +4,7 @@ /* * Robert Jenkin's hash function. - * http://burtleburtle.net/bob/hash/evahash.html + * https://burtleburtle.net/bob/hash/evahash.html * This is in the public domain. */ #define mix(a, b, c) \ diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c index e5cc603cdb17..fe79f6d2d0db 100644 --- a/net/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c @@ -7,7 +7,7 @@ /* * Robert Jenkins' function for mixing 32-bit values - * http://burtleburtle.net/bob/hash/evahash.html + * https://burtleburtle.net/bob/hash/evahash.html * a, b = random bits, c = input and output */ #define crush_hashmix(a, b, c) do { \ diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 3f323ed9df52..07e5614eb3f1 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -298,7 +298,7 @@ static __u64 crush_ln(unsigned int xin) * * for reference, see: * - * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables + * https://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables * */ diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 409d505ff320..2110439f8a24 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -223,6 +223,9 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req) if (op->op == CEPH_OSD_OP_WATCH) seq_printf(s, "-%s", ceph_osd_watch_op_name(op->watch.op)); + else if (op->op == CEPH_OSD_OP_CALL) + seq_printf(s, "-%s/%s", op->cls.class_name, + op->cls.method_name); } seq_putc(s, '\n'); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2db8b44e70c2..e4fbcad6e7d8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -525,7 +525,7 @@ EXPORT_SYMBOL(ceph_osdc_put_request); static void request_init(struct ceph_osd_request *req) { - /* req only, each op is zeroed in _osd_req_op_init() */ + /* req only, each op is zeroed in osd_req_op_init() */ memset(req, 0, sizeof(*req)); kref_init(&req->r_kref); @@ -746,8 +746,8 @@ EXPORT_SYMBOL(ceph_osdc_alloc_messages); * other information associated with them. It also serves as a * common init routine for all the other init functions, below. */ -static struct ceph_osd_req_op * -_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, +struct ceph_osd_req_op * +osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags) { struct ceph_osd_req_op *op; @@ -762,12 +762,6 @@ _osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, return op; } - -void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode, u32 flags) -{ - (void)_osd_req_op_init(osd_req, which, opcode, flags); -} EXPORT_SYMBOL(osd_req_op_init); void osd_req_op_extent_init(struct ceph_osd_request *osd_req, @@ -775,8 +769,8 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, u64 offset, u64 length, u64 truncate_size, u32 truncate_seq) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - opcode, 0); + struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, + opcode, 0); size_t payload_len = 0; BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && @@ -822,7 +816,7 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, BUG_ON(which + 1 >= osd_req->r_num_ops); prev_op = &osd_req->r_ops[which]; - op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); + op = osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags); /* dup previous one */ op->indata_len = prev_op->indata_len; op->outdata_len = prev_op->outdata_len; @@ -845,7 +839,7 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, size_t size; int ret; - op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); + op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0); pagelist = ceph_pagelist_alloc(GFP_NOFS); if (!pagelist) @@ -883,8 +877,8 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, const char *name, const void *value, size_t size, u8 cmp_op, u8 cmp_mode) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - opcode, 0); + struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, + opcode, 0); struct ceph_pagelist *pagelist; size_t payload_len; int ret; @@ -928,7 +922,7 @@ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which, { struct ceph_osd_req_op *op; - op = _osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); + op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); op->watch.cookie = cookie; op->watch.op = watch_opcode; op->watch.gen = 0; @@ -943,10 +937,9 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, u64 expected_write_size, u32 flags) { - struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, - CEPH_OSD_OP_SETALLOCHINT, - 0); + struct ceph_osd_req_op *op; + op = osd_req_op_init(osd_req, which, CEPH_OSD_OP_SETALLOCHINT, 0); op->alloc_hint.expected_object_size = expected_object_size; op->alloc_hint.expected_write_size = expected_write_size; op->alloc_hint.flags = flags; @@ -3076,9 +3069,7 @@ static void send_linger(struct ceph_osd_linger_request *lreq) cancel_linger_request(req); request_reinit(req); - ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); - ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); - req->r_flags = lreq->t.flags; + target_copy(&req->r_t, &lreq->t); req->r_mtime = lreq->mtime; mutex_lock(&lreq->lock); @@ -4801,7 +4792,7 @@ static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which, struct ceph_pagelist *pl; int ret; - op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0); pl = ceph_pagelist_alloc(GFP_NOIO); if (!pl) @@ -4870,7 +4861,7 @@ static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, struct ceph_pagelist *pl; int ret; - op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); op->notify.cookie = cookie; pl = ceph_pagelist_alloc(GFP_NOIO); @@ -5334,8 +5325,8 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req, if (IS_ERR(pages)) return PTR_ERR(pages); - op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, - dst_fadvise_flags); + op = osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM2, + dst_fadvise_flags); op->copy_from.snapid = src_snapid; op->copy_from.src_version = src_version; op->copy_from.flags = copy_from_flags; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d3377c90a291..b988f48153a4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data, return 0; } -static int bpf_iter_check_map(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux) +static int bpf_iter_attach_map(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) { - struct bpf_map *map = aux->map; + struct bpf_map *map; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) - return -EINVAL; + goto put_map; - if (prog->aux->max_rdonly_access > map->value_size) - return -EACCES; + if (prog->aux->max_rdonly_access > map->value_size) { + err = -EACCES; + goto put_map; + } + aux->map = map; return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); } static const struct seq_operations bpf_sk_storage_map_seq_ops = { @@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { .target = "bpf_sk_storage_map", - .check_target = bpf_iter_check_map, - .req_linfo = BPF_ITER_LINK_MAP_FD, + .attach_target = bpf_iter_attach_map, + .detach_target = bpf_iter_detach_map, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9de33b594ff2..efec66fa78b7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -757,11 +757,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, return err; } - hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; - cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); - if (cpumask_empty(mask)) { - free_cpumask_var(mask); - return -EINVAL; + if (!cpumask_empty(mask)) { + hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; + cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); + if (cpumask_empty(mask)) { + free_cpumask_var(mask); + return -EINVAL; + } } map = kzalloc(max_t(unsigned int, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2828f6d5ba89..7e2e502ef519 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4853,7 +4853,7 @@ static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) if (err < 0) goto out; - if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + if (ip_is_fragment(ip_hdr(skb))) fragment = true; off = ip_hdrlen(skb); diff --git a/net/core/sock.c b/net/core/sock.c index a2044b4b606b..e4f40b175acb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3414,6 +3414,16 @@ static void sock_inuse_add(struct net *net, int val) } #endif +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) +{ + if (!twsk_prot) + return; + kfree(twsk_prot->twsk_slab_name); + twsk_prot->twsk_slab_name = NULL; + kmem_cache_destroy(twsk_prot->twsk_slab); + twsk_prot->twsk_slab = NULL; +} + static void req_prot_cleanup(struct request_sock_ops *rsk_prot) { if (!rsk_prot) @@ -3484,7 +3494,7 @@ int proto_register(struct proto *prot, int alloc_slab) prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } } @@ -3492,15 +3502,15 @@ int proto_register(struct proto *prot, int alloc_slab) ret = assign_proto_idx(prot); if (ret) { mutex_unlock(&proto_list_mutex); - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } list_add(&prot->node, &proto_list); mutex_unlock(&proto_list_mutex); return ret; -out_free_timewait_sock_slab_name: +out_free_timewait_sock_slab: if (alloc_slab && prot->twsk_prot) - kfree(prot->twsk_prot->twsk_slab_name); + tw_prot_cleanup(prot->twsk_prot); out_free_request_sock_slab: if (alloc_slab) { req_prot_cleanup(prot->rsk_prot); @@ -3524,12 +3534,7 @@ void proto_unregister(struct proto *prot) prot->slab = NULL; req_prot_cleanup(prot->rsk_prot); - - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(prot->twsk_prot->twsk_slab_name); - prot->twsk_prot->twsk_slab = NULL; - } + tw_prot_cleanup(prot->twsk_prot); } EXPORT_SYMBOL(proto_unregister); diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 545b2640f019..1b34cb9a7708 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -57,18 +57,16 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, return bpfilter_mbox_request(sk, optname, optval, optlen, true); } -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, - char __user *user_optval, int __user *optlen) +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) { - sockptr_t optval; - int err, len; + int len; if (get_user(len, optlen)) return -EFAULT; - err = init_user_sockptr(&optval, user_optval, len); - if (err) - return err; - return bpfilter_mbox_request(sk, optname, optval, len, false); + + return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, + false); } static int __init bpfilter_sockopt_init(void) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d1a3913eebe0..b457dd2d6c75 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -296,6 +296,57 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, ipv6_only_sock(sk), true, false); } +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk) +{ + kuid_t uid = sock_i_uid(sk); + bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; + + if (hlist_empty(&tb->owners)) { + tb->fastreuse = reuse; + if (sk->sk_reuseport) { + tb->fastreuseport = FASTREUSEPORT_ANY; + tb->fastuid = uid; + tb->fast_rcv_saddr = sk->sk_rcv_saddr; + tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; +#if IS_ENABLED(CONFIG_IPV6) + tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; +#endif + } else { + tb->fastreuseport = 0; + } + } else { + if (!reuse) + tb->fastreuse = 0; + if (sk->sk_reuseport) { + /* We didn't match or we don't have fastreuseport set on + * the tb, but we have sk_reuseport set on this socket + * and we know that there are no bind conflicts with + * this socket in this tb, so reset our tb's reuseport + * settings so that any subsequent sockets that match + * our current socket will be put on the fast path. + * + * If we reset we need to set FASTREUSEPORT_STRICT so we + * do extra checking for all subsequent sk_reuseport + * socks. + */ + if (!sk_reuseport_match(tb, sk)) { + tb->fastreuseport = FASTREUSEPORT_STRICT; + tb->fastuid = uid; + tb->fast_rcv_saddr = sk->sk_rcv_saddr; + tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; +#if IS_ENABLED(CONFIG_IPV6) + tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; +#endif + } + } else { + tb->fastreuseport = 0; + } + } +} + /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. * We try to allocate an odd port (and leave even ports for connect()) @@ -308,7 +359,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); struct inet_bind_bucket *tb = NULL; - kuid_t uid = sock_i_uid(sk); int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); @@ -345,49 +395,8 @@ tb_found: goto fail_unlock; } success: - if (hlist_empty(&tb->owners)) { - tb->fastreuse = reuse; - if (sk->sk_reuseport) { - tb->fastreuseport = FASTREUSEPORT_ANY; - tb->fastuid = uid; - tb->fast_rcv_saddr = sk->sk_rcv_saddr; - tb->fast_ipv6_only = ipv6_only_sock(sk); - tb->fast_sk_family = sk->sk_family; -#if IS_ENABLED(CONFIG_IPV6) - tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; -#endif - } else { - tb->fastreuseport = 0; - } - } else { - if (!reuse) - tb->fastreuse = 0; - if (sk->sk_reuseport) { - /* We didn't match or we don't have fastreuseport set on - * the tb, but we have sk_reuseport set on this socket - * and we know that there are no bind conflicts with - * this socket in this tb, so reset our tb's reuseport - * settings so that any subsequent sockets that match - * our current socket will be put on the fast path. - * - * If we reset we need to set FASTREUSEPORT_STRICT so we - * do extra checking for all subsequent sk_reuseport - * socks. - */ - if (!sk_reuseport_match(tb, sk)) { - tb->fastreuseport = FASTREUSEPORT_STRICT; - tb->fastuid = uid; - tb->fast_rcv_saddr = sk->sk_rcv_saddr; - tb->fast_ipv6_only = ipv6_only_sock(sk); - tb->fast_sk_family = sk->sk_family; -#if IS_ENABLED(CONFIG_IPV6) - tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; -#endif - } - } else { - tb->fastreuseport = 0; - } - } + inet_csk_update_fastreuse(tb, sk); + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, port); WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 4eb4cd8d20dd..239e54474b65 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -163,6 +163,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) return -ENOMEM; } } + inet_csk_update_fastreuse(tb, child); } inet_bind_hash(child, tb, port); spin_unlock(&head->lock); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5653e3b011bf..54023a46db04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -301,24 +301,16 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2 * TCP_FASTOPEN_KEY_MAX) + (TCP_FASTOPEN_KEY_MAX * 5)) }; - struct tcp_fastopen_context *ctx; - u32 user_key[TCP_FASTOPEN_KEY_MAX * 4]; - __le32 key[TCP_FASTOPEN_KEY_MAX * 4]; + u32 user_key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u32)]; + __le32 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(__le32)]; char *backup_data; - int ret, i = 0, off = 0, n_keys = 0; + int ret, i = 0, off = 0, n_keys; tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); if (!tbl.data) return -ENOMEM; - rcu_read_lock(); - ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); - if (ctx) { - n_keys = tcp_fastopen_context_len(ctx); - memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys); - } - rcu_read_unlock(); - + n_keys = tcp_fastopen_get_cipher(net, NULL, (u64 *)key); if (!n_keys) { memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH); n_keys = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c06d2bfd2ec4..31f3b858db81 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3685,22 +3685,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; case TCP_FASTOPEN_KEY: { - __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; - struct tcp_fastopen_context *ctx; - unsigned int key_len = 0; + u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)]; + unsigned int key_len; if (get_user(len, optlen)) return -EFAULT; - rcu_read_lock(); - ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); - if (ctx) { - key_len = tcp_fastopen_context_len(ctx) * - TCP_FASTOPEN_KEY_LENGTH; - memcpy(&key[0], &ctx->key[0], key_len); - } - rcu_read_unlock(); - + key_len = tcp_fastopen_get_cipher(net, icsk, key) * + TCP_FASTOPEN_KEY_LENGTH; len = min_t(unsigned int, len, key_len); if (put_user(len, optlen)) return -EFAULT; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index c1a54f3d58f5..09b62de04eea 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -108,6 +108,29 @@ out: return err; } +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, + u64 *key) +{ + struct tcp_fastopen_context *ctx; + int n_keys = 0, i; + + rcu_read_lock(); + if (icsk) + ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); + else + ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctx) { + n_keys = tcp_fastopen_context_len(ctx); + for (i = 0; i < n_keys; i++) { + put_unaligned_le64(ctx->key[i].key[0], key + (i * 2)); + put_unaligned_le64(ctx->key[i].key[1], key + (i * 2) + 1); + } + } + rcu_read_unlock(); + + return n_keys; +} + static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, struct sk_buff *syn, const siphash_key_t *key, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 96f4f2fe50ad..e8cac2655c82 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -423,12 +423,12 @@ static void mptcp_sock_destruct(struct sock *sk) * also remove the mptcp socket, via * sock_put(ctx->conn). * - * Problem is that the mptcp socket will not be in - * SYN_RECV state and doesn't have SOCK_DEAD flag. + * Problem is that the mptcp socket will be in + * ESTABLISHED state and will not have the SOCK_DEAD flag. * Both result in warnings from inet_sock_destruct. */ - if (sk->sk_state == TCP_SYN_RECV) { + if (sk->sk_state == TCP_ESTABLISHED) { sk->sk_state = TCP_CLOSE; WARN_ON_ONCE(sk->sk_socket); sock_orphan(sk); diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index b2061b6746ea..955c195ae14b 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -328,10 +328,13 @@ static int rawsock_create(struct net *net, struct socket *sock, if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW)) return -ESOCKTNOSUPPORT; - if (sock->type == SOCK_RAW) + if (sock->type == SOCK_RAW) { + if (!capable(CAP_NET_RAW)) + return -EPERM; sock->ops = &rawsock_raw_ops; - else + } else { sock->ops = &rawsock_ops; + } sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern); if (!sk) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 42f8cc70bb2c..6e47ef7ef036 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1756,6 +1756,7 @@ err: /* Called with ovs_mutex. */ static void __dp_destroy(struct datapath *dp) { + struct flow_table *table = &dp->table; int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { @@ -1774,7 +1775,14 @@ static void __dp_destroy(struct datapath *dp) */ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); - /* RCU destroy the flow table */ + /* Flush sw_flow in the tables. RCU cb only releases resource + * such as dp, ports and tables. That may avoid some issues + * such as RCU usage warning. + */ + table_instance_flow_flush(table, ovsl_dereference(table->ti), + ovsl_dereference(table->ufid_ti)); + + /* RCU destroy the ports, meters and flow tables. */ call_rcu(&dp->rcu, destroy_dp_rcu); } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 8c12675cbb67..e2235849a57e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -473,19 +473,15 @@ static void table_instance_flow_free(struct flow_table *table, flow_mask_remove(table, flow->mask); } -static void table_instance_destroy(struct flow_table *table, - struct table_instance *ti, - struct table_instance *ufid_ti, - bool deferred) +/* Must be called with OVS mutex held. */ +void table_instance_flow_flush(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti) { int i; - if (!ti) - return; - - BUG_ON(!ufid_ti); if (ti->keep_flows) - goto skip_flows; + return; for (i = 0; i < ti->n_buckets; i++) { struct sw_flow *flow; @@ -497,18 +493,16 @@ static void table_instance_destroy(struct flow_table *table, table_instance_flow_free(table, ti, ufid_ti, flow, false); - ovs_flow_free(flow, deferred); + ovs_flow_free(flow, true); } } +} -skip_flows: - if (deferred) { - call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); - call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); - } else { - __table_instance_destroy(ti); - __table_instance_destroy(ufid_ti); - } +static void table_instance_destroy(struct table_instance *ti, + struct table_instance *ufid_ti) +{ + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); } /* No need for locking this function is called from RCU callback or @@ -523,7 +517,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) call_rcu(&mc->rcu, mask_cache_rcu_cb); call_rcu(&ma->rcu, mask_array_rcu_cb); - table_instance_destroy(table, ti, ufid_ti, false); + table_instance_destroy(ti, ufid_ti); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -641,7 +635,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) flow_table->count = 0; flow_table->ufid_count = 0; - table_instance_destroy(flow_table, old_ti, old_ufid_ti, true); + table_instance_flow_flush(flow_table, old_ti, old_ufid_ti); + table_instance_destroy(old_ti, old_ufid_ti); return 0; err_free_ti: diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 74ce48fecba9..6e7d4ac59353 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -105,5 +105,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, bool full, const struct sw_flow_mask *mask); void ovs_flow_masks_rebalance(struct flow_table *table); +void table_instance_flow_flush(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti); #endif /* flow_table.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0b8160d1a6e0..479c257ded73 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -941,6 +941,7 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) } static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) + __releases(&pkc->blk_fill_in_prog_lock) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); @@ -989,6 +990,7 @@ static void prb_fill_curr_block(char *curr, struct tpacket_kbdq_core *pkc, struct tpacket_block_desc *pbd, unsigned int len) + __acquires(&pkc->blk_fill_in_prog_lock) { struct tpacket3_hdr *ppd; @@ -2286,8 +2288,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (do_vnet && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) + vio_le(), true, 0)) { + if (po->tp_version == TPACKET_V3) + prb_clear_blk_fill_status(&po->rx_ring); goto drop_n_account; + } if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); @@ -2393,7 +2398,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, __clear_bit(slot_id, po->rx_ring.rx_owner_map); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); - } else { + } else if (po->tp_version == TPACKET_V3) { prb_clear_blk_fill_status(&po->rx_ring); } diff --git a/net/socket.c b/net/socket.c index aff52e81653c..dbbe8ea7d395 100644 --- a/net/socket.c +++ b/net/socket.c @@ -500,7 +500,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) if (f.file) { sock = sock_from_file(f.file, err); if (likely(sock)) { - *fput_needed = f.flags; + *fput_needed = f.flags & FDPUT_FPUT; return sock; } fdput(f); @@ -1325,7 +1325,7 @@ int sock_wake_async(struct socket_wq *wq, int how, int band) case SOCK_WAKE_SPACE: if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) break; - /* fall through */ + fallthrough; case SOCK_WAKE_IO: call_kill: kill_fasync(&wq->fasync_list, SIGIO, band); @@ -1804,8 +1804,7 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, upeer_addrlen, flags, rlimit(RLIMIT_NOFILE)); - if (f.flags) - fput(f.file); + fdput(f); } return ret; @@ -1868,8 +1867,7 @@ int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) ret = move_addr_to_kernel(uservaddr, addrlen, &address); if (!ret) ret = __sys_connect_file(f.file, &address, addrlen, 0); - if (f.flags) - fput(f.file); + fdput(f); } return ret; @@ -2097,7 +2095,7 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, int optlen) { - sockptr_t optval; + sockptr_t optval = USER_SOCKPTR(user_optval); char *kernel_optval = NULL; int err, fput_needed; struct socket *sock; @@ -2105,10 +2103,6 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, if (optlen < 0) return -EINVAL; - err = init_user_sockptr(&optval, user_optval, optlen); - if (err) - return err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) return err; @@ -3065,7 +3059,7 @@ static int __init sock_init(void) err = register_filesystem(&sock_fs_type); if (err) - goto out_fs; + goto out; sock_mnt = kern_mount(&sock_fs_type); if (IS_ERR(sock_mnt)) { err = PTR_ERR(sock_mnt); @@ -3088,7 +3082,6 @@ out: out_mount: unregister_filesystem(&sock_fs_type); -out_fs: goto out; } @@ -3161,13 +3154,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) return -ENOMEM; buf_size += rule_cnt * sizeof(u32); - /* fall through */ + fallthrough; case ETHTOOL_GRXRINGS: case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_SRXCLSRLINS: convert_out = true; - /* fall through */ + fallthrough; case ETHTOOL_SRXCLSRLDEL: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 18fa6067bb7f..b74e2741f74f 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -561,7 +561,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct iov_iter msg_iter; - char *kaddr = kmap(page); + char *kaddr; struct kvec iov; int rc; @@ -576,6 +576,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, goto out; } + kaddr = kmap(page); iov.iov_base = kaddr + offset; iov.iov_len = size; iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 710bd44eaa49..9a3d9fedd7aa 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -935,7 +935,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int ret = 0; int pending; - if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_CMSG_COMPAT)) return -EOPNOTSUPP; mutex_lock(&tls_ctx->tx_lock); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 27bbcfad9c17..9e93bc201cc0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1032,7 +1032,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, } /* Connected sockets that can produce data can be written. */ - if (sk->sk_state == TCP_ESTABLISHED) { + if (transport && sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py index 00a10a293f4f..1548f9ce4682 100755 --- a/scripts/checkkconfigsymbols.py +++ b/scripts/checkkconfigsymbols.py @@ -34,7 +34,7 @@ REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) REGEX_KCONFIG_DEF = re.compile(DEF) REGEX_KCONFIG_EXPR = re.compile(EXPR) REGEX_KCONFIG_STMT = re.compile(STMT) -REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$") +REGEX_KCONFIG_HELP = re.compile(r"^\s+help\s*$") REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") REGEX_QUOTES = re.compile("(\"(.*?)\")") diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 599b8c4933a7..60d4a79674b6 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -59,7 +59,7 @@ my $spelling_file = "$D/spelling.txt"; my $codespell = 0; my $codespellfile = "/usr/share/codespell/dictionary.txt"; my $conststructsfile = "$D/const_structs.checkpatch"; -my $typedefsfile = ""; +my $typedefsfile; my $color = "auto"; my $allow_c99_comments = 1; # Can be overridden by --ignore C99_COMMENT_TOLERANCE # git output parsing needs US English output, so first set backtick child process LANGUAGE @@ -588,6 +588,8 @@ our @mode_permission_funcs = ( ["__ATTR", 2], ); +my $word_pattern = '\b[A-Z]?[a-z]{2,}\b'; + #Create a search pattern for all these functions to speed up a loop below our $mode_perms_search = ""; foreach my $entry (@mode_permission_funcs) { @@ -756,7 +758,7 @@ sub read_words { next; } - $$wordsRef .= '|' if ($$wordsRef ne ""); + $$wordsRef .= '|' if (defined $$wordsRef); $$wordsRef .= $line; } close($file); @@ -766,16 +768,18 @@ sub read_words { return 0; } -my $const_structs = ""; -read_words(\$const_structs, $conststructsfile) - or warn "No structs that should be const will be found - file '$conststructsfile': $!\n"; +my $const_structs; +if (show_type("CONST_STRUCT")) { + read_words(\$const_structs, $conststructsfile) + or warn "No structs that should be const will be found - file '$conststructsfile': $!\n"; +} -my $typeOtherTypedefs = ""; -if (length($typedefsfile)) { +if (defined($typedefsfile)) { + my $typeOtherTypedefs; read_words(\$typeOtherTypedefs, $typedefsfile) or warn "No additional types will be considered - file '$typedefsfile': $!\n"; + $typeTypedefs .= '|' . $typeOtherTypedefs if (defined $typeOtherTypedefs); } -$typeTypedefs .= '|' . $typeOtherTypedefs if ($typeOtherTypedefs ne ""); sub build_types { my $mods = "(?x: \n" . join("|\n ", (@modifierList, @modifierListFile)) . "\n)"; @@ -3041,11 +3045,7 @@ sub process { if ($lines[$ln - 1] =~ /^\+\s*(?:bool|tristate|prompt)\s*["']/) { $is_start = 1; - } elsif ($lines[$ln - 1] =~ /^\+\s*(?:help|---help---)\s*$/) { - if ($lines[$ln - 1] =~ "---help---") { - WARN("CONFIG_DESCRIPTION", - "prefer 'help' over '---help---' for new help texts\n" . $herecurr); - } + } elsif ($lines[$ln - 1] =~ /^\+\s*(?:---)?help(?:---)?$/) { $length = -1; } @@ -3310,6 +3310,42 @@ sub process { } } +# check for repeated words separated by a single space + if ($rawline =~ /^\+/) { + while ($rawline =~ /\b($word_pattern) (?=($word_pattern))/g) { + + my $first = $1; + my $second = $2; + + if ($first =~ /(?:struct|union|enum)/) { + pos($rawline) += length($first) + length($second) + 1; + next; + } + + next if ($first ne $second); + next if ($first eq 'long'); + + if (WARN("REPEATED_WORD", + "Possible repeated word: '$first'\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b$first $second\b/$first/; + } + } + + # if it's a repeated word on consecutive lines in a comment block + if ($prevline =~ /$;+\s*$/ && + $prevrawline =~ /($word_pattern)\s*$/) { + my $last_word = $1; + if ($rawline =~ /^\+\s*\*\s*$last_word /) { + if (WARN("REPEATED_WORD", + "Possible repeated word: '$last_word'\n" . $hereprev) && + $fix) { + $fixed[$fixlinenr] =~ s/(\+\s*\*\s*)$last_word /$1/; + } + } + } + } + # check for space before tabs. if ($rawline =~ /^\+/ && $rawline =~ / \t/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; @@ -5020,8 +5056,30 @@ sub process { my ($s, $c) = ($stat, $cond); if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) { - ERROR("ASSIGN_IN_IF", - "do not use assignment in if condition\n" . $herecurr); + if (ERROR("ASSIGN_IN_IF", + "do not use assignment in if condition\n" . $herecurr) && + $fix && $perl_version_ok) { + if ($rawline =~ /^\+(\s+)if\s*\(\s*(\!)?\s*\(\s*(($Lval)\s*=\s*$LvalOrFunc)\s*\)\s*(?:($Compare)\s*($FuncArg))?\s*\)\s*(\{)?\s*$/) { + my $space = $1; + my $not = $2; + my $statement = $3; + my $assigned = $4; + my $test = $8; + my $against = $9; + my $brace = $15; + fix_delete_line($fixlinenr, $rawline); + fix_insert_line($fixlinenr, "$space$statement;"); + my $newline = "${space}if ("; + $newline .= '!' if defined($not); + $newline .= '(' if (defined $not && defined($test) && defined($against)); + $newline .= "$assigned"; + $newline .= " $test $against" if (defined($test) && defined($against)); + $newline .= ')' if (defined $not && defined($test) && defined($against)); + $newline .= ')'; + $newline .= " {" if (defined($brace)); + fix_insert_line($fixlinenr + 1, $newline); + } + } } # Find out what is on the end of the line after the @@ -6465,6 +6523,12 @@ sub process { } } +# check for IS_ENABLED() without CONFIG_<FOO> ($rawline for comments too) + if ($rawline =~ /\bIS_ENABLED\s*\(\s*(\w+)\s*\)/ && $1 !~ /^CONFIG_/) { + WARN("IS_ENABLED_CONFIG", + "IS_ENABLED($1) is normally used as IS_ENABLED(CONFIG_$1)\n" . $herecurr); + } + # check for #if defined CONFIG_<FOO> || defined CONFIG_<FOO>_MODULE if ($line =~ /^\+\s*#\s*if\s+defined(?:\s*\(?\s*|\s+)(CONFIG_[A-Z_]+)\s*\)?\s*\|\|\s*defined(?:\s*\(?\s*|\s+)\1_MODULE\s*\)?\s*$/) { my $config = $1; @@ -6475,31 +6539,6 @@ sub process { } } -# check for case / default statements not preceded by break/fallthrough/switch - if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) { - my $has_break = 0; - my $has_statement = 0; - my $count = 0; - my $prevline = $linenr; - while ($prevline > 1 && ($file || $count < 3) && !$has_break) { - $prevline--; - my $rline = $rawlines[$prevline - 1]; - my $fline = $lines[$prevline - 1]; - last if ($fline =~ /^\@\@/); - next if ($fline =~ /^\-/); - next if ($fline =~ /^.(?:\s*(?:case\s+(?:$Ident|$Constant)[\s$;]*|default):[\s$;]*)*$/); - $has_break = 1 if ($rline =~ /fall[\s_-]*(through|thru)/i); - next if ($fline =~ /^.[\s$;]*$/); - $has_statement = 1; - $count++; - $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|exit\s*\(\b|return\b|goto\b|continue\b)/); - } - if (!$has_break && $has_statement) { - WARN("MISSING_BREAK", - "Possible switch case/default not preceded by break or fallthrough comment\n" . $herecurr); - } - } - # check for /* fallthrough */ like comment, prefer fallthrough; my @fallthroughs = ( 'fallthrough', @@ -6615,7 +6654,8 @@ sub process { # check for various structs that are normally const (ops, kgdb, device_tree) # and avoid what seem like struct definitions 'struct foo {' - if ($line !~ /\bconst\b/ && + if (defined($const_structs) && + $line !~ /\bconst\b/ && $line =~ /\bstruct\s+($const_structs)\b(?!\s*\{)/) { WARN("CONST_STRUCT", "struct $1 should normally be const\n" . $herecurr); diff --git a/scripts/gdb/linux/rbtree.py b/scripts/gdb/linux/rbtree.py index c4b991607917..fe462855eefd 100644 --- a/scripts/gdb/linux/rbtree.py +++ b/scripts/gdb/linux/rbtree.py @@ -17,7 +17,7 @@ def rb_first(root): raise gdb.GdbError("Must be struct rb_root not {}".format(root.type)) node = root['rb_node'] - if node is 0: + if node == 0: return None while node['rb_left']: @@ -33,7 +33,7 @@ def rb_last(root): raise gdb.GdbError("Must be struct rb_root not {}".format(root.type)) node = root['rb_node'] - if node is 0: + if node == 0: return None while node['rb_right']: diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 6dc3078649fa..0096cd965332 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -109,6 +109,7 @@ static bool is_ignored_symbol(const char *name, char type) ".LASANPC", /* s390 kasan local symbols */ "__crc_", /* modversions */ "__efistub_", /* arm64 EFI stub namespace */ + "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ NULL }; diff --git a/scripts/kconfig/images.c b/scripts/kconfig/images.c index b4fa0e4a63a5..2f9afffa5d79 100644 --- a/scripts/kconfig/images.c +++ b/scripts/kconfig/images.c @@ -5,7 +5,7 @@ #include "images.h" -const char *xpm_load[] = { +const char * const xpm_load[] = { "22 22 5 1", ". c None", "# c #000000", @@ -35,7 +35,7 @@ const char *xpm_load[] = { "###############.......", "......................"}; -const char *xpm_save[] = { +const char * const xpm_save[] = { "22 22 5 1", ". c None", "# c #000000", @@ -65,7 +65,7 @@ const char *xpm_save[] = { "..##################..", "......................"}; -const char *xpm_back[] = { +const char * const xpm_back[] = { "22 22 3 1", ". c None", "# c #000083", @@ -93,7 +93,7 @@ const char *xpm_back[] = { "......................", "......................"}; -const char *xpm_tree_view[] = { +const char * const xpm_tree_view[] = { "22 22 2 1", ". c None", "# c #000000", @@ -120,7 +120,7 @@ const char *xpm_tree_view[] = { "......................", "......................"}; -const char *xpm_single_view[] = { +const char * const xpm_single_view[] = { "22 22 2 1", ". c None", "# c #000000", @@ -147,7 +147,7 @@ const char *xpm_single_view[] = { "......................", "......................"}; -const char *xpm_split_view[] = { +const char * const xpm_split_view[] = { "22 22 2 1", ". c None", "# c #000000", @@ -174,7 +174,7 @@ const char *xpm_split_view[] = { "......................", "......................"}; -const char *xpm_symbol_no[] = { +const char * const xpm_symbol_no[] = { "12 12 2 1", " c white", ". c black", @@ -191,7 +191,7 @@ const char *xpm_symbol_no[] = { " .......... ", " "}; -const char *xpm_symbol_mod[] = { +const char * const xpm_symbol_mod[] = { "12 12 2 1", " c white", ". c black", @@ -208,7 +208,7 @@ const char *xpm_symbol_mod[] = { " .......... ", " "}; -const char *xpm_symbol_yes[] = { +const char * const xpm_symbol_yes[] = { "12 12 2 1", " c white", ". c black", @@ -225,7 +225,7 @@ const char *xpm_symbol_yes[] = { " .......... ", " "}; -const char *xpm_choice_no[] = { +const char * const xpm_choice_no[] = { "12 12 2 1", " c white", ". c black", @@ -242,7 +242,7 @@ const char *xpm_choice_no[] = { " .... ", " "}; -const char *xpm_choice_yes[] = { +const char * const xpm_choice_yes[] = { "12 12 2 1", " c white", ". c black", @@ -259,7 +259,7 @@ const char *xpm_choice_yes[] = { " .... ", " "}; -const char *xpm_menu[] = { +const char * const xpm_menu[] = { "12 12 2 1", " c white", ". c black", @@ -276,7 +276,7 @@ const char *xpm_menu[] = { " .......... ", " "}; -const char *xpm_menu_inv[] = { +const char * const xpm_menu_inv[] = { "12 12 2 1", " c white", ". c black", @@ -293,7 +293,7 @@ const char *xpm_menu_inv[] = { " .......... ", " "}; -const char *xpm_menuback[] = { +const char * const xpm_menuback[] = { "12 12 2 1", " c white", ". c black", @@ -310,7 +310,7 @@ const char *xpm_menuback[] = { " .......... ", " "}; -const char *xpm_void[] = { +const char * const xpm_void[] = { "12 12 2 1", " c white", ". c black", diff --git a/scripts/kconfig/images.h b/scripts/kconfig/images.h index d8ff614bd087..7212dec2006c 100644 --- a/scripts/kconfig/images.h +++ b/scripts/kconfig/images.h @@ -10,21 +10,21 @@ extern "C" { #endif -extern const char *xpm_load[]; -extern const char *xpm_save[]; -extern const char *xpm_back[]; -extern const char *xpm_tree_view[]; -extern const char *xpm_single_view[]; -extern const char *xpm_split_view[]; -extern const char *xpm_symbol_no[]; -extern const char *xpm_symbol_mod[]; -extern const char *xpm_symbol_yes[]; -extern const char *xpm_choice_no[]; -extern const char *xpm_choice_yes[]; -extern const char *xpm_menu[]; -extern const char *xpm_menu_inv[]; -extern const char *xpm_menuback[]; -extern const char *xpm_void[]; +extern const char * const xpm_load[]; +extern const char * const xpm_save[]; +extern const char * const xpm_back[]; +extern const char * const xpm_tree_view[]; +extern const char * const xpm_single_view[]; +extern const char * const xpm_split_view[]; +extern const char * const xpm_symbol_no[]; +extern const char * const xpm_symbol_mod[]; +extern const char * const xpm_symbol_yes[]; +extern const char * const xpm_choice_no[]; +extern const char * const xpm_choice_yes[]; +extern const char * const xpm_menu[]; +extern const char * const xpm_menu_inv[]; +extern const char * const xpm_menuback[]; +extern const char * const xpm_void[]; #ifdef __cplusplus } diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l index 6354c905b006..240109f965ae 100644 --- a/scripts/kconfig/lexer.l +++ b/scripts/kconfig/lexer.l @@ -36,7 +36,7 @@ struct buffer { YY_BUFFER_STATE state; }; -struct buffer *current_buf; +static struct buffer *current_buf; static int last_ts, first_ts; @@ -105,7 +105,7 @@ n [A-Za-z0-9_-] "endchoice" return T_ENDCHOICE; "endif" return T_ENDIF; "endmenu" return T_ENDMENU; -"help"|"---help---" return T_HELP; +"help" return T_HELP; "hex" return T_HEX; "if" return T_IF; "imply" return T_IMPLY; diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 23d1cb01a41a..bc390df49f1f 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -31,11 +31,6 @@ static ConfigSettings *configSettings; QAction *ConfigMainWindow::saveAction; -static inline QString qgettext(const char* str) -{ - return QString::fromLocal8Bit(str); -} - ConfigSettings::ConfigSettings() : QSettings("kernel.org", "qconf") { @@ -79,6 +74,13 @@ bool ConfigSettings::writeSizes(const QString& key, const QList<int>& value) return true; } +QIcon ConfigItem::symbolYesIcon; +QIcon ConfigItem::symbolModIcon; +QIcon ConfigItem::symbolNoIcon; +QIcon ConfigItem::choiceYesIcon; +QIcon ConfigItem::choiceNoIcon; +QIcon ConfigItem::menuIcon; +QIcon ConfigItem::menubackIcon; /* * set the new data @@ -102,14 +104,14 @@ void ConfigItem::updateMenu(void) list = listView(); if (goParent) { - setPixmap(promptColIdx, list->menuBackPix); + setIcon(promptColIdx, menubackIcon); prompt = ".."; goto set_prompt; } sym = menu->sym; prop = menu->prompt; - prompt = qgettext(menu_get_prompt(menu)); + prompt = menu_get_prompt(menu); if (prop) switch (prop->type) { case P_MENU: @@ -119,15 +121,15 @@ void ConfigItem::updateMenu(void) */ if (sym && list->rootEntry == menu) break; - setPixmap(promptColIdx, list->menuPix); + setIcon(promptColIdx, menuIcon); } else { if (sym) break; - setPixmap(promptColIdx, QIcon()); + setIcon(promptColIdx, QIcon()); } goto set_prompt; case P_COMMENT: - setPixmap(promptColIdx, QIcon()); + setIcon(promptColIdx, QIcon()); goto set_prompt; default: ; @@ -135,7 +137,7 @@ void ConfigItem::updateMenu(void) if (!sym) goto set_prompt; - setText(nameColIdx, QString::fromLocal8Bit(sym->name)); + setText(nameColIdx, sym->name); type = sym_get_type(sym); switch (type) { @@ -144,7 +146,7 @@ void ConfigItem::updateMenu(void) char ch; if (!sym_is_changeable(sym) && list->optMode == normalOpt) { - setPixmap(promptColIdx, QIcon()); + setIcon(promptColIdx, QIcon()); setText(noColIdx, QString()); setText(modColIdx, QString()); setText(yesColIdx, QString()); @@ -154,22 +156,22 @@ void ConfigItem::updateMenu(void) switch (expr) { case yes: if (sym_is_choice_value(sym) && type == S_BOOLEAN) - setPixmap(promptColIdx, list->choiceYesPix); + setIcon(promptColIdx, choiceYesIcon); else - setPixmap(promptColIdx, list->symbolYesPix); + setIcon(promptColIdx, symbolYesIcon); setText(yesColIdx, "Y"); ch = 'Y'; break; case mod: - setPixmap(promptColIdx, list->symbolModPix); + setIcon(promptColIdx, symbolModIcon); setText(modColIdx, "M"); ch = 'M'; break; default: if (sym_is_choice_value(sym) && type == S_BOOLEAN) - setPixmap(promptColIdx, list->choiceNoPix); + setIcon(promptColIdx, choiceNoIcon); else - setPixmap(promptColIdx, list->symbolNoPix); + setIcon(promptColIdx, symbolNoIcon); setText(noColIdx, "N"); ch = 'N'; break; @@ -265,7 +267,7 @@ void ConfigLineEdit::show(ConfigItem* i) { item = i; if (sym_get_string_value(item->menu->sym)) - setText(QString::fromLocal8Bit(sym_get_string_value(item->menu->sym))); + setText(sym_get_string_value(item->menu->sym)); else setText(QString()); Parent::show(); @@ -280,7 +282,7 @@ void ConfigLineEdit::keyPressEvent(QKeyEvent* e) case Qt::Key_Return: case Qt::Key_Enter: sym_set_string_value(item->menu->sym, text().toLatin1()); - parent()->updateList(item); + parent()->updateList(); break; default: Parent::keyPressEvent(e); @@ -294,9 +296,6 @@ void ConfigLineEdit::keyPressEvent(QKeyEvent* e) ConfigList::ConfigList(ConfigView* p, const char *name) : Parent(p), updateAll(false), - symbolYesPix(xpm_symbol_yes), symbolModPix(xpm_symbol_mod), symbolNoPix(xpm_symbol_no), - choiceYesPix(xpm_choice_yes), choiceNoPix(xpm_choice_no), - menuPix(xpm_menu), menuInvPix(xpm_menu_inv), menuBackPix(xpm_menuback), voidPix(xpm_void), showName(false), showRange(false), showData(false), mode(singleMode), optMode(normalOpt), rootEntry(0), headerPopup(0) { @@ -322,7 +321,7 @@ ConfigList::ConfigList(ConfigView* p, const char *name) connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); } - addColumn(promptColIdx); + showColumn(promptColIdx); reinit(); } @@ -340,21 +339,33 @@ bool ConfigList::menuSkip(struct menu *menu) void ConfigList::reinit(void) { - removeColumn(dataColIdx); - removeColumn(yesColIdx); - removeColumn(modColIdx); - removeColumn(noColIdx); - removeColumn(nameColIdx); + hideColumn(dataColIdx); + hideColumn(yesColIdx); + hideColumn(modColIdx); + hideColumn(noColIdx); + hideColumn(nameColIdx); if (showName) - addColumn(nameColIdx); + showColumn(nameColIdx); if (showRange) { - addColumn(noColIdx); - addColumn(modColIdx); - addColumn(yesColIdx); + showColumn(noColIdx); + showColumn(modColIdx); + showColumn(yesColIdx); } if (showData) - addColumn(dataColIdx); + showColumn(dataColIdx); + + updateListAll(); +} + +void ConfigList::setOptionMode(QAction *action) +{ + if (action == showNormalAction) + optMode = normalOpt; + else if (action == showAllAction) + optMode = allOpt; + else + optMode = promptOpt; updateListAll(); } @@ -404,15 +415,15 @@ void ConfigList::updateSelection(void) emit menuSelected(menu); } -void ConfigList::updateList(ConfigItem* item) +void ConfigList::updateList() { ConfigItem* last = 0; + ConfigItem *item; if (!rootEntry) { if (mode != listMode) goto update; QTreeWidgetItemIterator it(this); - ConfigItem* item; while (*it) { item = (ConfigItem*)(*it); @@ -446,7 +457,7 @@ void ConfigList::updateList(ConfigItem* item) return; } update: - updateMenuList(this, rootEntry); + updateMenuList(rootEntry); update(); resizeColumnToContents(0); } @@ -471,7 +482,7 @@ void ConfigList::setValue(ConfigItem* item, tristate val) return; if (oldval == no && item->menu->list) item->setExpanded(true); - parent()->updateList(item); + parent()->updateList(); break; } } @@ -505,7 +516,7 @@ void ConfigList::changeValue(ConfigItem* item) item->setExpanded(true); } if (oldexpr != newexpr) - parent()->updateList(item); + parent()->updateList(); break; case S_INT: case S_HEX: @@ -524,7 +535,7 @@ void ConfigList::setRootMenu(struct menu *menu) type = menu && menu->prompt ? menu->prompt->type : P_UNKNOWN; if (type != P_MENU) return; - updateMenuList(this, 0); + updateMenuList(0); rootEntry = menu; updateListAll(); if (currentItem()) { @@ -628,7 +639,7 @@ hide: } } -void ConfigList::updateMenuList(ConfigList *parent, struct menu* menu) +void ConfigList::updateMenuList(struct menu *menu) { struct menu* child; ConfigItem* item; @@ -637,19 +648,19 @@ void ConfigList::updateMenuList(ConfigList *parent, struct menu* menu) enum prop_type type; if (!menu) { - while (parent->topLevelItemCount() > 0) + while (topLevelItemCount() > 0) { - delete parent->takeTopLevelItem(0); + delete takeTopLevelItem(0); } return; } - last = (ConfigItem*)parent->topLevelItem(0); + last = (ConfigItem *)topLevelItem(0); if (last && !last->goParent) last = 0; for (child = menu->list; child; child = child->next) { - item = last ? last->nextSibling() : (ConfigItem*)parent->topLevelItem(0); + item = last ? last->nextSibling() : (ConfigItem *)topLevelItem(0); type = child->prompt ? child->prompt->type : P_UNKNOWN; switch (mode) { @@ -670,7 +681,7 @@ void ConfigList::updateMenuList(ConfigList *parent, struct menu* menu) if (!child->sym && !child->list && !child->prompt) continue; if (!item || item->menu != child) - item = new ConfigItem(parent, last, child, visible); + item = new ConfigItem(this, last, child, visible); else item->testUpdateMenu(visible); @@ -683,7 +694,7 @@ void ConfigList::updateMenuList(ConfigList *parent, struct menu* menu) } hide: if (item && item->menu == child) { - last = (ConfigItem*)parent->topLevelItem(0); + last = (ConfigItem *)topLevelItem(0); if (last == item) last = 0; else while (last->nextSibling() != item) @@ -774,7 +785,7 @@ void ConfigList::mouseReleaseEvent(QMouseEvent* e) idx = header()->logicalIndexAt(x); switch (idx) { case promptColIdx: - icon = item->pixmap(promptColIdx); + icon = item->icon(promptColIdx); if (!icon.isNull()) { int off = header()->sectionPosition(0) + visualRect(indexAt(p)).x() + 4; // 4 is Hardcoded image offset. There might be a way to do it properly. if (x >= off && x < off + icon.availableSizes().first().width()) { @@ -785,7 +796,8 @@ void ConfigList::mouseReleaseEvent(QMouseEvent* e) break; ptype = menu->prompt ? menu->prompt->type : P_UNKNOWN; if (ptype == P_MENU && rootEntry != menu && - mode != fullMode && mode != menuMode) + mode != fullMode && mode != menuMode && + mode != listMode) emit menuSelected(menu); else changeValue(item); @@ -835,7 +847,7 @@ void ConfigList::mouseDoubleClickEvent(QMouseEvent* e) if (!menu) goto skip; ptype = menu->prompt ? menu->prompt->type : P_UNKNOWN; - if (ptype == P_MENU) { + if (ptype == P_MENU && mode != listMode) { if (mode == singleMode) emit itemSelected(menu); else if (mode == symbolMode) @@ -864,46 +876,46 @@ void ConfigList::focusInEvent(QFocusEvent *e) void ConfigList::contextMenuEvent(QContextMenuEvent *e) { - if (e->y() <= header()->geometry().bottom()) { - if (!headerPopup) { - QAction *action; - - headerPopup = new QMenu(this); - action = new QAction("Show Name", this); - action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), - parent(), SLOT(setShowName(bool))); - connect(parent(), SIGNAL(showNameChanged(bool)), - action, SLOT(setOn(bool))); - action->setChecked(showName); - headerPopup->addAction(action); - action = new QAction("Show Range", this); - action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), - parent(), SLOT(setShowRange(bool))); - connect(parent(), SIGNAL(showRangeChanged(bool)), - action, SLOT(setOn(bool))); - action->setChecked(showRange); - headerPopup->addAction(action); - action = new QAction("Show Data", this); - action->setCheckable(true); - connect(action, SIGNAL(toggled(bool)), - parent(), SLOT(setShowData(bool))); - connect(parent(), SIGNAL(showDataChanged(bool)), - action, SLOT(setOn(bool))); - action->setChecked(showData); - headerPopup->addAction(action); - } - headerPopup->exec(e->globalPos()); - e->accept(); - } else - e->ignore(); + if (!headerPopup) { + QAction *action; + + headerPopup = new QMenu(this); + action = new QAction("Show Name", this); + action->setCheckable(true); + connect(action, SIGNAL(toggled(bool)), + parent(), SLOT(setShowName(bool))); + connect(parent(), SIGNAL(showNameChanged(bool)), + action, SLOT(setOn(bool))); + action->setChecked(showName); + headerPopup->addAction(action); + + action = new QAction("Show Range", this); + action->setCheckable(true); + connect(action, SIGNAL(toggled(bool)), + parent(), SLOT(setShowRange(bool))); + connect(parent(), SIGNAL(showRangeChanged(bool)), + action, SLOT(setOn(bool))); + action->setChecked(showRange); + headerPopup->addAction(action); + + action = new QAction("Show Data", this); + action->setCheckable(true); + connect(action, SIGNAL(toggled(bool)), + parent(), SLOT(setShowData(bool))); + connect(parent(), SIGNAL(showDataChanged(bool)), + action, SLOT(setOn(bool))); + action->setChecked(showData); + headerPopup->addAction(action); + } + + headerPopup->exec(e->globalPos()); + e->accept(); } ConfigView*ConfigView::viewList; -QAction *ConfigView::showNormalAction; -QAction *ConfigView::showAllAction; -QAction *ConfigView::showPromptAction; +QAction *ConfigList::showNormalAction; +QAction *ConfigList::showAllAction; +QAction *ConfigList::showPromptAction; ConfigView::ConfigView(QWidget* parent, const char *name) : Parent(parent) @@ -934,18 +946,6 @@ ConfigView::~ConfigView(void) } } -void ConfigView::setOptionMode(QAction *act) -{ - if (act == showNormalAction) - list->optMode = normalOpt; - else if (act == showAllAction) - list->optMode = allOpt; - else - list->optMode = promptOpt; - - list->updateListAll(); -} - void ConfigView::setShowName(bool b) { if (list->showName != b) { @@ -984,12 +984,12 @@ void ConfigList::setAllOpen(bool open) } } -void ConfigView::updateList(ConfigItem* item) +void ConfigView::updateList() { ConfigView* v; for (v = viewList; v; v = v->nextView) - v->list->updateList(item); + v->list->updateList(); } void ConfigView::updateListAll(void) @@ -1287,16 +1287,17 @@ void ConfigInfoView::contextMenuEvent(QContextMenuEvent *e) Parent::contextMenuEvent(e); } -ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow* parent, const char *name) +ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent) : Parent(parent), result(NULL) { - setObjectName(name); + setObjectName("search"); setWindowTitle("Search Config"); QVBoxLayout* layout1 = new QVBoxLayout(this); layout1->setContentsMargins(11, 11, 11, 11); layout1->setSpacing(6); - QHBoxLayout* layout2 = new QHBoxLayout(0); + + QHBoxLayout* layout2 = new QHBoxLayout(); layout2->setContentsMargins(0, 0, 0, 0); layout2->setSpacing(6); layout2->addWidget(new QLabel("Find:", this)); @@ -1311,9 +1312,9 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow* parent, const char *nam split = new QSplitter(this); split->setOrientation(Qt::Vertical); - list = new ConfigView(split, name); + list = new ConfigView(split, "search"); list->list->mode = listMode; - info = new ConfigInfoView(split, name); + info = new ConfigInfoView(split, "search"); connect(list->list, SIGNAL(menuChanged(struct menu *)), info, SLOT(setInfo(struct menu *))); connect(list->list, SIGNAL(menuChanged(struct menu *)), @@ -1321,25 +1322,23 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow* parent, const char *nam layout1->addWidget(split); - if (name) { - QVariant x, y; - int width, height; - bool ok; + QVariant x, y; + int width, height; + bool ok; - configSettings->beginGroup(name); - width = configSettings->value("/window width", parent->width() / 2).toInt(); - height = configSettings->value("/window height", parent->height() / 2).toInt(); - resize(width, height); - x = configSettings->value("/window x"); - y = configSettings->value("/window y"); - if ((x.isValid())&&(y.isValid())) - move(x.toInt(), y.toInt()); - QList<int> sizes = configSettings->readSizes("/split", &ok); - if (ok) - split->setSizes(sizes); - configSettings->endGroup(); - connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); - } + configSettings->beginGroup("search"); + width = configSettings->value("/window width", parent->width() / 2).toInt(); + height = configSettings->value("/window height", parent->height() / 2).toInt(); + resize(width, height); + x = configSettings->value("/window x"); + y = configSettings->value("/window y"); + if (x.isValid() && y.isValid()) + move(x.toInt(), y.toInt()); + QList<int> sizes = configSettings->readSizes("/split", &ok); + if (ok) + split->setSizes(sizes); + configSettings->endGroup(); + connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); } void ConfigSearchWindow::saveSettings(void) @@ -1381,7 +1380,6 @@ void ConfigSearchWindow::search(void) ConfigMainWindow::ConfigMainWindow(void) : searchWindow(0) { - QMenuBar* menu; bool ok = true; QVariant x, y; int width, height; @@ -1402,6 +1400,15 @@ ConfigMainWindow::ConfigMainWindow(void) if ((x.isValid())&&(y.isValid())) move(x.toInt(), y.toInt()); + // set up icons + ConfigItem::symbolYesIcon = QIcon(QPixmap(xpm_symbol_yes)); + ConfigItem::symbolModIcon = QIcon(QPixmap(xpm_symbol_mod)); + ConfigItem::symbolNoIcon = QIcon(QPixmap(xpm_symbol_no)); + ConfigItem::choiceYesIcon = QIcon(QPixmap(xpm_choice_yes)); + ConfigItem::choiceNoIcon = QIcon(QPixmap(xpm_choice_no)); + ConfigItem::menuIcon = QIcon(QPixmap(xpm_menu)); + ConfigItem::menubackIcon = QIcon(QPixmap(xpm_menuback)); + QWidget *widget = new QWidget(this); QVBoxLayout *layout = new QVBoxLayout(widget); setCentralWidget(widget); @@ -1432,10 +1439,6 @@ ConfigMainWindow::ConfigMainWindow(void) setTabOrder(configList, helpText); configList->setFocus(); - menu = menuBar(); - toolBar = new QToolBar("Tools", this); - addToolBar(toolBar); - backAction = new QAction(QPixmap(xpm_back), "Back", this); connect(backAction, SIGNAL(triggered(bool)), SLOT(goBack())); @@ -1485,17 +1488,17 @@ ConfigMainWindow::ConfigMainWindow(void) QActionGroup *optGroup = new QActionGroup(this); optGroup->setExclusive(true); - connect(optGroup, SIGNAL(triggered(QAction*)), configView, + connect(optGroup, SIGNAL(triggered(QAction*)), configList, SLOT(setOptionMode(QAction *))); - connect(optGroup, SIGNAL(triggered(QAction *)), menuView, + connect(optGroup, SIGNAL(triggered(QAction *)), menuList, SLOT(setOptionMode(QAction *))); - configView->showNormalAction = new QAction("Show Normal Options", optGroup); - configView->showAllAction = new QAction("Show All Options", optGroup); - configView->showPromptAction = new QAction("Show Prompt Options", optGroup); - configView->showNormalAction->setCheckable(true); - configView->showAllAction->setCheckable(true); - configView->showPromptAction->setCheckable(true); + ConfigList::showNormalAction = new QAction("Show Normal Options", optGroup); + ConfigList::showNormalAction->setCheckable(true); + ConfigList::showAllAction = new QAction("Show All Options", optGroup); + ConfigList::showAllAction->setCheckable(true); + ConfigList::showPromptAction = new QAction("Show Prompt Options", optGroup); + ConfigList::showPromptAction->setCheckable(true); QAction *showDebugAction = new QAction("Show Debug Info", this); showDebugAction->setCheckable(true); @@ -1508,6 +1511,7 @@ ConfigMainWindow::ConfigMainWindow(void) connect(showAboutAction, SIGNAL(triggered(bool)), SLOT(showAbout())); // init tool bar + QToolBar *toolBar = addToolBar("Tools"); toolBar->addAction(backAction); toolBar->addSeparator(); toolBar->addAction(loadAction); @@ -1517,33 +1521,32 @@ ConfigMainWindow::ConfigMainWindow(void) toolBar->addAction(splitViewAction); toolBar->addAction(fullViewAction); - // create config menu - QMenu* config = menu->addMenu("&File"); - config->addAction(loadAction); - config->addAction(saveAction); - config->addAction(saveAsAction); - config->addSeparator(); - config->addAction(quitAction); + // create file menu + QMenu *menu = menuBar()->addMenu("&File"); + menu->addAction(loadAction); + menu->addAction(saveAction); + menu->addAction(saveAsAction); + menu->addSeparator(); + menu->addAction(quitAction); // create edit menu - QMenu* editMenu = menu->addMenu("&Edit"); - editMenu->addAction(searchAction); + menu = menuBar()->addMenu("&Edit"); + menu->addAction(searchAction); // create options menu - QMenu* optionMenu = menu->addMenu("&Option"); - optionMenu->addAction(showNameAction); - optionMenu->addAction(showRangeAction); - optionMenu->addAction(showDataAction); - optionMenu->addSeparator(); - optionMenu->addActions(optGroup->actions()); - optionMenu->addSeparator(); - optionMenu->addAction(showDebugAction); + menu = menuBar()->addMenu("&Option"); + menu->addAction(showNameAction); + menu->addAction(showRangeAction); + menu->addAction(showDataAction); + menu->addSeparator(); + menu->addActions(optGroup->actions()); + menu->addSeparator(); + menu->addAction(showDebugAction); // create help menu - menu->addSeparator(); - QMenu* helpMenu = menu->addMenu("&Help"); - helpMenu->addAction(showIntroAction); - helpMenu->addAction(showAboutAction); + menu = menuBar()->addMenu("&Help"); + menu->addAction(showIntroAction); + menu->addAction(showAboutAction); connect (helpText, SIGNAL (anchorClicked (const QUrl &)), helpText, SLOT (clicked (const QUrl &)) ); @@ -1646,7 +1649,7 @@ void ConfigMainWindow::saveConfigAs(void) void ConfigMainWindow::searchConfig(void) { if (!searchWindow) - searchWindow = new ConfigSearchWindow(this, "search"); + searchWindow = new ConfigSearchWindow(this); searchWindow->show(); } diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h index 5eeab4a8bb43..461df6419f15 100644 --- a/scripts/kconfig/qconf.h +++ b/scripts/kconfig/qconf.h @@ -69,11 +69,13 @@ protected: public slots: void setRootMenu(struct menu *menu); - void updateList(ConfigItem *item); + void updateList(); void setValue(ConfigItem* item, tristate val); void changeValue(ConfigItem* item); void updateSelection(void); void saveSettings(void); + void setOptionMode(QAction *action); + signals: void menuChanged(struct menu *menu); void menuSelected(struct menu *menu); @@ -85,35 +87,19 @@ public: void updateListAll(void) { updateAll = true; - updateList(NULL); + updateList(); updateAll = false; } - ConfigList* listView() - { - return this; - } - void addColumn(colIdx idx) - { - showColumn(idx); - } - void removeColumn(colIdx idx) - { - hideColumn(idx); - } void setAllOpen(bool open); void setParentMenu(void); bool menuSkip(struct menu *); void updateMenuList(ConfigItem *parent, struct menu*); - void updateMenuList(ConfigList *parent, struct menu*); + void updateMenuList(struct menu *menu); bool updateAll; - QPixmap symbolYesPix, symbolModPix, symbolNoPix; - QPixmap choiceYesPix, choiceNoPix; - QPixmap menuPix, menuInvPix, menuBackPix, voidPix; - bool showName, showRange, showData; enum listMode mode; enum optionMode optMode; @@ -121,6 +107,8 @@ public: QPalette disabledColorGroup; QPalette inactivedColorGroup; QMenu* headerPopup; + + static QAction *showNormalAction, *showAllAction, *showPromptAction; }; class ConfigItem : public QTreeWidgetItem { @@ -168,28 +156,16 @@ public: return ret; } - void setText(colIdx idx, const QString& text) - { - Parent::setText(idx, text); - } - QString text(colIdx idx) const - { - return Parent::text(idx); - } - void setPixmap(colIdx idx, const QIcon &icon) - { - Parent::setIcon(idx, icon); - } - const QIcon pixmap(colIdx idx) const - { - return icon(idx); - } // TODO: Implement paintCell ConfigItem* nextItem; struct menu *menu; bool visible; bool goParent; + + static QIcon symbolYesIcon, symbolModIcon, symbolNoIcon; + static QIcon choiceYesIcon, choiceNoIcon; + static QIcon menuIcon, menubackIcon; }; class ConfigLineEdit : public QLineEdit { @@ -214,7 +190,7 @@ class ConfigView : public QWidget { public: ConfigView(QWidget* parent, const char *name = 0); ~ConfigView(void); - static void updateList(ConfigItem* item); + static void updateList(); static void updateListAll(void); bool showName(void) const { return list->showName; } @@ -224,7 +200,6 @@ public slots: void setShowName(bool); void setShowRange(bool); void setShowData(bool); - void setOptionMode(QAction *); signals: void showNameChanged(bool); void showRangeChanged(bool); @@ -235,10 +210,6 @@ public: static ConfigView* viewList; ConfigView* nextView; - - static QAction *showNormalAction; - static QAction *showAllAction; - static QAction *showPromptAction; }; class ConfigInfoView : public QTextBrowser { @@ -276,7 +247,7 @@ class ConfigSearchWindow : public QDialog { Q_OBJECT typedef class QDialog Parent; public: - ConfigSearchWindow(ConfigMainWindow* parent, const char *name = 0); + ConfigSearchWindow(ConfigMainWindow *parent); public slots: void saveSettings(void); @@ -326,7 +297,6 @@ protected: ConfigView *configView; ConfigList *configList; ConfigInfoView *helpText; - QToolBar *toolBar; QAction *backAction; QAction *singleViewAction; QAction *splitViewAction; diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 9363e37b8870..ffa3ec65cc90 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -15,15 +15,21 @@ struct symbol symbol_yes = { .name = "y", .curr = { "y", yes }, .flags = SYMBOL_CONST|SYMBOL_VALID, -}, symbol_mod = { +}; + +struct symbol symbol_mod = { .name = "m", .curr = { "m", mod }, .flags = SYMBOL_CONST|SYMBOL_VALID, -}, symbol_no = { +}; + +struct symbol symbol_no = { .name = "n", .curr = { "n", no }, .flags = SYMBOL_CONST|SYMBOL_VALID, -}, symbol_empty = { +}; + +static struct symbol symbol_empty = { .name = "", .curr = { "", no }, .flags = SYMBOL_VALID, @@ -31,7 +37,7 @@ struct symbol symbol_yes = { struct symbol *sym_defconfig_list; struct symbol *modules_sym; -tristate modules_val; +static tristate modules_val; enum symbol_type sym_get_type(struct symbol *sym) { diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 53b3e1f5f227..dc4ecc0b2038 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -914,7 +914,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, * (represented by bprm). 'current' is the process doing * the execve(). */ - if (get_user_pages_remote(current, bprm->mm, pos, 1, + if (get_user_pages_remote(bprm->mm, pos, 1, FOLL_FORCE, &page, NULL, NULL) <= 0) return false; #else diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index c9dba7543dba..3b1aad7535dd 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -11,6 +11,7 @@ static int do_pin(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts); + union bpf_iter_link_info linfo; const char *objfile, *path; struct bpf_program *prog; struct bpf_object *obj; @@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv) map_fd = map_parse_fd(&argc, &argv); if (map_fd < 0) return -1; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + iter_opts.link_info = &linfo; + iter_opts.link_info_len = sizeof(linfo); } } @@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv) goto close_obj; } - if (map_fd >= 0) - iter_opts.map_fd = map_fd; - link = bpf_program__attach_iter(prog, &iter_opts); if (IS_ERR(link)) { err = PTR_ERR(link); diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 52d883325a23..4d9ecb975862 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -566,6 +566,7 @@ static int sets_patch(struct object *obj) next = rb_next(next); } + return 0; } static int symbols_patch(struct object *obj) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index eab14c97c15d..0750681057c2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; attr.link_create.flags = OPTS_GET(opts, flags, 0); + attr.link_create.iter_info = + ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); + attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0); return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 28855fd5b5f4..015d13f25fcc 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */ struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; + union bpf_iter_link_info *iter_info; + __u32 iter_info_len; }; -#define bpf_link_create_opts__last_field flags +#define bpf_link_create_opts__last_field iter_info_len LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 856b09a04563..4843e44916f7 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -564,8 +564,8 @@ done: struct btf *btf__parse_raw(const char *path) { + struct btf *btf = NULL; void *data = NULL; - struct btf *btf; FILE *f = NULL; __u16 magic; int err = 0; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7be04e45d29c..0a06124f7999 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8306,10 +8306,8 @@ bpf_program__attach_iter(struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_iter_attach_opts)) return ERR_PTR(-EINVAL); - if (OPTS_HAS(opts, map_fd)) { - target_fd = opts->map_fd; - link_create_opts.flags = BPF_ITER_LINK_MAP_FD; - } + link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); + link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3ed1399bfbbc..5ecb4069a9f0 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ - __u32 map_fd; + union bpf_iter_link_info *link_info; + __u32 link_info_len; }; -#define bpf_iter_attach_opts__last_field map_fd +#define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e7a8cf83ba48..a83b5827532f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -102,7 +102,7 @@ endif OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) - $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) endef include ../lib.mk @@ -123,17 +123,21 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +$(OUTPUT)/%.o: %.c + $(call msg,CC,,$@) + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) - $(CC) -c $(CFLAGS) -o $@ $< + $(Q)$(CC) -c $(CFLAGS) -o $@ $< VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -142,7 +146,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -$(OUTPUT)/runqslower: $(BPFOBJ) +DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool + +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ @@ -164,7 +170,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool @@ -180,15 +185,15 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): $(call msg,MKDIR,,$@) - mkdir -p $@ + $(Q)mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) - $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else $(call msg,CP,,$@) - cp "$(VMLINUX_H)" $@ + $(Q)cp "$(VMLINUX_H)" $@ endif $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ @@ -237,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -300,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): $$(call msg,MKDIR,,$$@) - mkdir -p $$@ + $(Q)mkdir -p $$@ endif # ensure we set up BPF objects generation rule just once for a given @@ -320,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ | $(BPFTOOL) $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $$(BPFTOOL) gen skeleton $$< > $$@ + $(Q)$$(BPFTOOL) gen skeleton $$< > $$@ endif # ensure we set up tests.h header generation rule just once @@ -344,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) - cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ @@ -352,13 +357,13 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) - $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ # only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) - cp -a $$^ $(TRUNNER_OUTPUT)/ + $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ @@ -366,8 +371,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(RESOLVE_BTFIDS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) - $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ + $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ + $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ endef @@ -420,17 +425,17 @@ verifier/tests.h: verifier/*.c ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) - $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(call msg,CC,,$@) - $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ @@ -443,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 4ffefdc1130f..7375d9a6d242 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -468,6 +468,7 @@ static void test_bpf_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_hash_map *skel; int err, i, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u64 val, expected_val = 0; struct bpf_link *link; struct key_t { @@ -490,13 +491,16 @@ static void test_bpf_hash_map(void) goto out; /* iterator with hashmap2 and hashmap3 should fail */ - opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap2 unexpected succeeded\n")) goto out; - opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap3 unexpected succeeded\n")) @@ -519,7 +523,7 @@ static void test_bpf_hash_map(void) goto out; } - opts.map_fd = map_fd; + linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_hash_map *skel; int err, i, j, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u32 expected_val = 0; struct bpf_link *link; struct key_t { @@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -649,6 +657,7 @@ static void test_bpf_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); __u32 expected_key = 0, res_first_key; struct bpf_iter_bpf_array_map *skel; + union bpf_iter_link_info linfo; int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; @@ -673,7 +682,10 @@ static void test_bpf_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_array_map *skel; __u32 expected_key = 0, expected_val = 0; + union bpf_iter_link_info linfo; int err, i, j, map_fd, iter_fd; struct bpf_link *link; char buf[64]; @@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, i, len, map_fd, iter_fd, num_sockets; struct bpf_iter_bpf_sk_storage_map *skel; + union bpf_iter_link_info linfo; int sock_fd[3] = {-1, -1, -1}; __u32 val, expected_val = 0; struct bpf_link *link; @@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_test_kern5 *skel; + union bpf_iter_link_info linfo; struct bpf_link *link; skel = bpf_iter_test_kern5__open_and_load(); @@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void) "skeleton open_and_load failed\n")) return; - opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) bpf_link__destroy(link); diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 504abb7bfb95..7043e6ded0e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr, close(pipe_p2c[1]); /* close write */ /* notify parent signal handler is installed */ - write(pipe_c2p[1], buf, 1); + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* make sure parent enabled bpf program to send_signal */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* wait a little for signal handler */ sleep(1); - if (sigusr1_received) - write(pipe_c2p[1], "2", 1); - else - write(pipe_c2p[1], "0", 1); + buf[0] = sigusr1_received ? '2' : '0'; + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for parent notification and exit */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); close(pipe_c2p[1]); close(pipe_p2c[0]); @@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, } /* wait until child signal handler installed */ - read(pipe_c2p[0], buf, 1); + CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* trigger the bpf send_signal */ skel->bss->pid = pid; @@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for result */ err = read(pipe_c2p[0], buf, 1); @@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, CHECK(buf[0] != '2', test_name, "incorrect result\n"); /* notify child safe to exit */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); disable_pmu: close(pmu_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f002e3090d92..11a769e18f5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void) { __u64 sample_freq = 5000; /* fallback to 5000 on error */ FILE *f; + __u32 duration = 0; f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); if (f == NULL) return sample_freq; - fscanf(f, "%llu", &sample_freq); + CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate", + "return default value: 5000,err %d\n", -errno); fclose(f); return sample_freq; } diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/bpf/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 8549b31716ab..73da7fe8c152 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -124,17 +124,24 @@ int main(int argc, char **argv) sprintf(test_script, "iptables -A INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } sprintf(test_script, "nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", TESTPORT); - system(test_script); + if (system(test_script)) + printf("execute command: %s, err %d\n", test_script, -errno); sprintf(test_script, "iptables -D INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g); if (rv != 0) { diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 5224dae216e5..0941aa16157e 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -18,6 +18,15 @@ #include "cgroup_util.h" +/* + * Memory cgroup charging and vmstat data aggregation is performed using + * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum + * discrepancy between charge and vmstat entries is number of cpus multiplied + * by 32 pages multiplied by 2. + */ +#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs()) + + static int alloc_dcache(const char *cgroup, void *arg) { unsigned long i; @@ -180,7 +189,7 @@ static int test_kmem_memcg_deletion(const char *root) goto cleanup; sum = slab + anon + file + kernel_stack; - if (abs(sum - current) < 4096 * 32 * 2 * get_nprocs()) { + if (abs(sum - current) < MAX_VMSTAT_ERROR) { ret = KSFT_PASS; } else { printf("memory.current = %ld\n", current); @@ -331,6 +340,64 @@ cleanup: return ret; } +/* + * This test creates a sub-tree with 1000 memory cgroups. + * Then it checks that the memory.current on the parent level + * is greater than 0 and approximates matches the percpu value + * from memory.stat. + */ +static int test_percpu_basic(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child; + long current, percpu; + int i; + + parent = cg_name(root, "percpu_basic_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + for (i = 0; i < 1000; i++) { + child = cg_name_indexed(parent, "child", i); + if (!child) + return -1; + + if (cg_create(child)) + goto cleanup_children; + + free(child); + } + + current = cg_read_long(parent, "memory.current"); + percpu = cg_read_key_long(parent, "memory.stat", "percpu "); + + if (current > 0 && percpu > 0 && abs(current - percpu) < + MAX_VMSTAT_ERROR) + ret = KSFT_PASS; + else + printf("memory.current %ld\npercpu %ld\n", + current, percpu); + +cleanup_children: + for (i = 0; i < 1000; i++) { + child = cg_name_indexed(parent, "child", i); + cg_destroy(child); + free(child); + } + +cleanup: + cg_destroy(parent); + free(parent); + + return ret; +} + #define T(x) { x, #x } struct kmem_test { int (*fn)(const char *root); @@ -341,6 +408,7 @@ struct kmem_test { T(test_kmem_proc_kpagecgroup), T(test_kmem_kernel_stacks), T(test_kmem_dead_cgroups), + T(test_percpu_basic), }; #undef T diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index ea2147248ebe..afd42387e8b2 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -343,7 +343,7 @@ kmod_test_0001_driver() kmod_defaults_driver config_num_threads 1 - printf '\000' >"$DIR"/config_test_driver + printf $NAME >"$DIR"/config_test_driver config_trigger ${FUNCNAME[0]} config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND } @@ -354,7 +354,7 @@ kmod_test_0001_fs() kmod_defaults_fs config_num_threads 1 - printf '\000' >"$DIR"/config_test_fs + printf $NAME >"$DIR"/config_test_fs config_trigger ${FUNCNAME[0]} config_expect_result ${FUNCNAME[0]} -EINVAL } diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2499824d9e1c..8df5cb8f71ff 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,6 @@ CONFIG_MPTCP=y CONFIG_MPTCP_IPV6=y +CONFIG_INET_DIAG=m +CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cad6f73a5fd0..090620c3e10c 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -406,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) /* ... but we still receive. * Close our write side, ev. give some time - * for address notification + * for address notification and/or checking + * the current status */ - if (cfg_join) - usleep(400000); + if (cfg_wait) + usleep(cfg_wait); shutdown(peerfd, SHUT_WR); } else { if (errno == EINTR) @@ -427,7 +428,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_wait) + if (cfg_join) usleep(cfg_wait); close(peerfd); diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 91d38a29956b..93fc5cadce61 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -942,6 +942,41 @@ TEST_F(hmm, migrate_fault) } /* + * Migrate anonymous shared memory to device private memory. + */ +TEST_F(hmm, migrate_shared) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Migrate memory to device. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ASSERT_EQ(ret, -ENOENT); + + hmm_buffer_free(buffer); +} + +/* * Try to migrate various memory types to device private memory. */ TEST_F(hmm2, migrate_mixed) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 390f758d5a27..dd777688d14a 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -61,7 +61,7 @@ static void async_pf_execute(struct work_struct *work) * access remotely. */ mmap_read_lock(mm); - get_user_pages_remote(NULL, mm, addr, 1, FOLL_WRITE, NULL, NULL, + get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, NULL, &locked); if (locked) mmap_read_unlock(mm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2c2c0254c2d8..737666db02de 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1893,7 +1893,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * not call the fault handler, so do it here. */ bool unlocked = false; - r = fixup_user_fault(current, current->mm, addr, + r = fixup_user_fault(current->mm, addr, (write_fault ? FAULT_FLAG_WRITE : 0), &unlocked); if (unlocked) |