diff options
358 files changed, 9496 insertions, 2303 deletions
diff --git a/Documentation/filesystems/zonefs.txt b/Documentation/filesystems/zonefs.txt new file mode 100644 index 000000000000..935bf22031ca --- /dev/null +++ b/Documentation/filesystems/zonefs.txt @@ -0,0 +1,404 @@ +ZoneFS - Zone filesystem for Zoned block devices + +Introduction +============ + +zonefs is a very simple file system exposing each zone of a zoned block device +as a file. Unlike a regular POSIX-compliant file system with native zoned block +device support (e.g. f2fs), zonefs does not hide the sequential write +constraint of zoned block devices to the user. Files representing sequential +write zones of the device must be written sequentially starting from the end +of the file (append only writes). + +As such, zonefs is in essence closer to a raw block device access interface +than to a full-featured POSIX file system. The goal of zonefs is to simplify +the implementation of zoned block device support in applications by replacing +raw block device file accesses with a richer file API, avoiding relying on +direct block device file ioctls which may be more obscure to developers. One +example of this approach is the implementation of LSM (log-structured merge) +tree structures (such as used in RocksDB and LevelDB) on zoned block devices +by allowing SSTables to be stored in a zone file similarly to a regular file +system rather than as a range of sectors of the entire disk. The introduction +of the higher level construct "one file is one zone" can help reducing the +amount of changes needed in the application as well as introducing support for +different application programming languages. + +Zoned block devices +------------------- + +Zoned storage devices belong to a class of storage devices with an address +space that is divided into zones. A zone is a group of consecutive LBAs and all +zones are contiguous (there are no LBA gaps). Zones may have different types. +* Conventional zones: there are no access constraints to LBAs belonging to + conventional zones. Any read or write access can be executed, similarly to a + regular block device. +* Sequential zones: these zones accept random reads but must be written + sequentially. Each sequential zone has a write pointer maintained by the + device that keeps track of the mandatory start LBA position of the next write + to the device. As a result of this write constraint, LBAs in a sequential zone + cannot be overwritten. Sequential zones must first be erased using a special + command (zone reset) before rewriting. + +Zoned storage devices can be implemented using various recording and media +technologies. The most common form of zoned storage today uses the SCSI Zoned +Block Commands (ZBC) and Zoned ATA Commands (ZAC) interfaces on Shingled +Magnetic Recording (SMR) HDDs. + +Solid State Disks (SSD) storage devices can also implement a zoned interface +to, for instance, reduce internal write amplification due to garbage collection. +The NVMe Zoned NameSpace (ZNS) is a technical proposal of the NVMe standard +committee aiming at adding a zoned storage interface to the NVMe protocol. + +Zonefs Overview +=============== + +Zonefs exposes the zones of a zoned block device as files. The files +representing zones are grouped by zone type, which are themselves represented +by sub-directories. This file structure is built entirely using zone information +provided by the device and so does not require any complex on-disk metadata +structure. + +On-disk metadata +---------------- + +zonefs on-disk metadata is reduced to an immutable super block which +persistently stores a magic number and optional feature flags and values. On +mount, zonefs uses blkdev_report_zones() to obtain the device zone configuration +and populates the mount point with a static file tree solely based on this +information. File sizes come from the device zone type and write pointer +position managed by the device itself. + +The super block is always written on disk at sector 0. The first zone of the +device storing the super block is never exposed as a zone file by zonefs. If +the zone containing the super block is a sequential zone, the mkzonefs format +tool always "finishes" the zone, that is, it transitions the zone to a full +state to make it read-only, preventing any data write. + +Zone type sub-directories +------------------------- + +Files representing zones of the same type are grouped together under the same +sub-directory automatically created on mount. + +For conventional zones, the sub-directory "cnv" is used. This directory is +however created if and only if the device has usable conventional zones. If +the device only has a single conventional zone at sector 0, the zone will not +be exposed as a file as it will be used to store the zonefs super block. For +such devices, the "cnv" sub-directory will not be created. + +For sequential write zones, the sub-directory "seq" is used. + +These two directories are the only directories that exist in zonefs. Users +cannot create other directories and cannot rename nor delete the "cnv" and +"seq" sub-directories. + +The size of the directories indicated by the st_size field of struct stat, +obtained with the stat() or fstat() system calls, indicates the number of files +existing under the directory. + +Zone files +---------- + +Zone files are named using the number of the zone they represent within the set +of zones of a particular type. That is, both the "cnv" and "seq" directories +contain files named "0", "1", "2", ... The file numbers also represent +increasing zone start sector on the device. + +All read and write operations to zone files are not allowed beyond the file +maximum size, that is, beyond the zone size. Any access exceeding the zone +size is failed with the -EFBIG error. + +Creating, deleting, renaming or modifying any attribute of files and +sub-directories is not allowed. + +The number of blocks of a file as reported by stat() and fstat() indicates the +size of the file zone, or in other words, the maximum file size. + +Conventional zone files +----------------------- + +The size of conventional zone files is fixed to the size of the zone they +represent. Conventional zone files cannot be truncated. + +These files can be randomly read and written using any type of I/O operation: +buffered I/Os, direct I/Os, memory mapped I/Os (mmap), etc. There are no I/O +constraint for these files beyond the file size limit mentioned above. + +Sequential zone files +--------------------- + +The size of sequential zone files grouped in the "seq" sub-directory represents +the file's zone write pointer position relative to the zone start sector. + +Sequential zone files can only be written sequentially, starting from the file +end, that is, write operations can only be append writes. Zonefs makes no +attempt at accepting random writes and will fail any write request that has a +start offset not corresponding to the end of the file, or to the end of the last +write issued and still in-flight (for asynchrnous I/O operations). + +Since dirty page writeback by the page cache does not guarantee a sequential +write pattern, zonefs prevents buffered writes and writeable shared mappings +on sequential files. Only direct I/O writes are accepted for these files. +zonefs relies on the sequential delivery of write I/O requests to the device +implemented by the block layer elevator. An elevator implementing the sequential +write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature) +must be used. This type of elevator (e.g. mq-deadline) is the set by default +for zoned block devices on device initialization. + +There are no restrictions on the type of I/O used for read operations in +sequential zone files. Buffered I/Os, direct I/Os and shared read mappings are +all accepted. + +Truncating sequential zone files is allowed only down to 0, in which case, the +zone is reset to rewind the file zone write pointer position to the start of +the zone, or up to the zone size, in which case the file's zone is transitioned +to the FULL state (finish zone operation). + +Format options +-------------- + +Several optional features of zonefs can be enabled at format time. +* Conventional zone aggregation: ranges of contiguous conventional zones can be + aggregated into a single larger file instead of the default one file per zone. +* File ownership: The owner UID and GID of zone files is by default 0 (root) + but can be changed to any valid UID/GID. +* File access permissions: the default 640 access permissions can be changed. + +IO error handling +----------------- + +Zoned block devices may fail I/O requests for reasons similar to regular block +devices, e.g. due to bad sectors. However, in addition to such known I/O +failure pattern, the standards governing zoned block devices behavior define +additional conditions that result in I/O errors. + +* A zone may transition to the read-only condition (BLK_ZONE_COND_READONLY): + While the data already written in the zone is still readable, the zone can + no longer be written. No user action on the zone (zone management command or + read/write access) can change the zone condition back to a normal read/write + state. While the reasons for the device to transition a zone to read-only + state are not defined by the standards, a typical cause for such transition + would be a defective write head on an HDD (all zones under this head are + changed to read-only). + +* A zone may transition to the offline condition (BLK_ZONE_COND_OFFLINE): + An offline zone cannot be read nor written. No user action can transition an + offline zone back to an operational good state. Similarly to zone read-only + transitions, the reasons for a drive to transition a zone to the offline + condition are undefined. A typical cause would be a defective read-write head + on an HDD causing all zones on the platter under the broken head to be + inaccessible. + +* Unaligned write errors: These errors result from the host issuing write + requests with a start sector that does not correspond to a zone write pointer + position when the write request is executed by the device. Even though zonefs + enforces sequential file write for sequential zones, unaligned write errors + may still happen in the case of a partial failure of a very large direct I/O + operation split into multiple BIOs/requests or asynchronous I/O operations. + If one of the write request within the set of sequential write requests + issued to the device fails, all write requests after queued after it will + become unaligned and fail. + +* Delayed write errors: similarly to regular block devices, if the device side + write cache is enabled, write errors may occur in ranges of previously + completed writes when the device write cache is flushed, e.g. on fsync(). + Similarly to the previous immediate unaligned write error case, delayed write + errors can propagate through a stream of cached sequential data for a zone + causing all data to be dropped after the sector that caused the error. + +All I/O errors detected by zonefs are notified to the user with an error code +return for the system call that trigered or detected the error. The recovery +actions taken by zonefs in response to I/O errors depend on the I/O type (read +vs write) and on the reason for the error (bad sector, unaligned writes or zone +condition change). + +* For read I/O errors, zonefs does not execute any particular recovery action, + but only if the file zone is still in a good condition and there is no + inconsistency between the file inode size and its zone write pointer position. + If a problem is detected, I/O error recovery is executed (see below table). + +* For write I/O errors, zonefs I/O error recovery is always executed. + +* A zone condition change to read-only or offline also always triggers zonefs + I/O error recovery. + +Zonefs minimal I/O error recovery may change a file size and a file access +permissions. + +* File size changes: + Immediate or delayed write errors in a sequential zone file may cause the file + inode size to be inconsistent with the amount of data successfully written in + the file zone. For instance, the partial failure of a multi-BIO large write + operation will cause the zone write pointer to advance partially, even though + the entire write operation will be reported as failed to the user. In such + case, the file inode size must be advanced to reflect the zone write pointer + change and eventually allow the user to restart writing at the end of the + file. + A file size may also be reduced to reflect a delayed write error detected on + fsync(): in this case, the amount of data effectively written in the zone may + be less than originally indicated by the file inode size. After such I/O + error, zonefs always fixes a file inode size to reflect the amount of data + persistently stored in the file zone. + +* Access permission changes: + A zone condition change to read-only is indicated with a change in the file + access permissions to render the file read-only. This disables changes to the + file attributes and data modification. For offline zones, all permissions + (read and write) to the file are disabled. + +Further action taken by zonefs I/O error recovery can be controlled by the user +with the "errors=xxx" mount option. The table below summarizes the result of +zonefs I/O error processing depending on the mount option and on the zone +conditions. + + +--------------+-----------+-----------------------------------------+ + | | | Post error state | + | "errors=xxx" | device | access permissions | + | mount | zone | file file device zone | + | option | condition | size read write read write | + +--------------+-----------+-----------------------------------------+ + | | good | fixed yes no yes yes | + | remount-ro | read-only | fixed yes no yes no | + | (default) | offline | 0 no no no no | + +--------------+-----------+-----------------------------------------+ + | | good | fixed yes no yes yes | + | zone-ro | read-only | fixed yes no yes no | + | | offline | 0 no no no no | + +--------------+-----------+-----------------------------------------+ + | | good | 0 no no yes yes | + | zone-offline | read-only | 0 no no yes no | + | | offline | 0 no no no no | + +--------------+-----------+-----------------------------------------+ + | | good | fixed yes yes yes yes | + | repair | read-only | fixed yes no yes no | + | | offline | 0 no no no no | + +--------------+-----------+-----------------------------------------+ + +Further notes: +* The "errors=remount-ro" mount option is the default behavior of zonefs I/O + error processing if no errors mount option is specified. +* With the "errors=remount-ro" mount option, the change of the file access + permissions to read-only applies to all files. The file system is remounted + read-only. +* Access permission and file size changes due to the device transitioning zones + to the offline condition are permanent. Remounting or reformating the device + with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good + state. +* File access permission changes to read-only due to the device transitioning + zones to the read-only condition are permanent. Remounting or reformating + the device will not re-enable file write access. +* File access permission changes implied by the remount-ro, zone-ro and + zone-offline mount options are temporary for zones in a good condition. + Unmounting and remounting the file system will restore the previous default + (format time values) access rights to the files affected. +* The repair mount option triggers only the minimal set of I/O error recovery + actions, that is, file size fixes for zones in a good condition. Zones + indicated as being read-only or offline by the device still imply changes to + the zone file access permissions as noted in the table above. + +Mount options +------------- + +zonefs define the "errors=<behavior>" mount option to allow the user to specify +zonefs behavior in response to I/O errors, inode size inconsistencies or zone +condition chages. The defined behaviors are as follow: +* remount-ro (default) +* zone-ro +* zone-offline +* repair + +The I/O error actions defined for each behavior is detailed in the previous +section. + +Zonefs User Space Tools +======================= + +The mkzonefs tool is used to format zoned block devices for use with zonefs. +This tool is available on Github at: + +https://github.com/damien-lemoal/zonefs-tools + +zonefs-tools also includes a test suite which can be run against any zoned +block device, including null_blk block device created with zoned mode. + +Examples +-------- + +The following formats a 15TB host-managed SMR HDD with 256 MB zones +with the conventional zones aggregation feature enabled. + +# mkzonefs -o aggr_cnv /dev/sdX +# mount -t zonefs /dev/sdX /mnt +# ls -l /mnt/ +total 0 +dr-xr-xr-x 2 root root 1 Nov 25 13:23 cnv +dr-xr-xr-x 2 root root 55356 Nov 25 13:23 seq + +The size of the zone files sub-directories indicate the number of files +existing for each type of zones. In this example, there is only one +conventional zone file (all conventional zones are aggregated under a single +file). + +# ls -l /mnt/cnv +total 137101312 +-rw-r----- 1 root root 140391743488 Nov 25 13:23 0 + +This aggregated conventional zone file can be used as a regular file. + +# mkfs.ext4 /mnt/cnv/0 +# mount -o loop /mnt/cnv/0 /data + +The "seq" sub-directory grouping files for sequential write zones has in this +example 55356 zones. + +# ls -lv /mnt/seq +total 14511243264 +-rw-r----- 1 root root 0 Nov 25 13:23 0 +-rw-r----- 1 root root 0 Nov 25 13:23 1 +-rw-r----- 1 root root 0 Nov 25 13:23 2 +... +-rw-r----- 1 root root 0 Nov 25 13:23 55354 +-rw-r----- 1 root root 0 Nov 25 13:23 55355 + +For sequential write zone files, the file size changes as data is appended at +the end of the file, similarly to any regular file system. + +# dd if=/dev/zero of=/mnt/seq/0 bs=4096 count=1 conv=notrunc oflag=direct +1+0 records in +1+0 records out +4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00044121 s, 9.3 MB/s + +# ls -l /mnt/seq/0 +-rw-r----- 1 root root 4096 Nov 25 13:23 /mnt/seq/0 + +The written file can be truncated to the zone size, preventing any further +write operation. + +# truncate -s 268435456 /mnt/seq/0 +# ls -l /mnt/seq/0 +-rw-r----- 1 root root 268435456 Nov 25 13:49 /mnt/seq/0 + +Truncation to 0 size allows freeing the file zone storage space and restart +append-writes to the file. + +# truncate -s 0 /mnt/seq/0 +# ls -l /mnt/seq/0 +-rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0 + +Since files are statically mapped to zones on the disk, the number of blocks of +a file as reported by stat() and fstat() indicates the size of the file zone. + +# stat /mnt/seq/0 + File: /mnt/seq/0 + Size: 0 Blocks: 524288 IO Block: 4096 regular empty file +Device: 870h/2160d Inode: 50431 Links: 1 +Access: (0640/-rw-r-----) Uid: ( 0/ root) Gid: ( 0/ root) +Access: 2019-11-25 13:23:57.048971997 +0900 +Modify: 2019-11-25 13:52:25.553805765 +0900 +Change: 2019-11-25 13:52:25.553805765 +0900 + Birth: - + +The number of blocks of the file ("Blocks") in units of 512B blocks gives the +maximum file size of 524288 * 512 B = 256 MB, corresponding to the device zone +size in this example. Of note is that the "IO block" field always indicates the +minimum I/O size for writes and corresponds to the device physical sector size. diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index d7e6534a8505..0e0eb2c8da7d 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -28,7 +28,6 @@ This document describes the Linux kernel Makefiles. --- 4.3 Using C++ for host programs --- 4.4 Controlling compiler options for host programs --- 4.5 When host programs are actually built - --- 4.6 Using hostprogs-$(CONFIG_FOO) === 5 Kbuild clean infrastructure @@ -595,11 +594,11 @@ compilation stage. Two steps are required in order to use a host executable. The first step is to tell kbuild that a host program exists. This is -done utilising the variable hostprogs-y. +done utilising the variable "hostprogs". The second step is to add an explicit dependency to the executable. This can be done in two ways. Either add the dependency in a rule, -or utilise the variable $(always). +or utilise the variable "always-y". Both possibilities are described in the following. 4.1 Simple Host Program @@ -612,7 +611,7 @@ Both possibilities are described in the following. Example:: - hostprogs-y := bin2hex + hostprogs := bin2hex Kbuild assumes in the above example that bin2hex is made from a single c-source file named bin2hex.c located in the same directory as @@ -630,7 +629,7 @@ Both possibilities are described in the following. Example:: #scripts/lxdialog/Makefile - hostprogs-y := lxdialog + hostprogs := lxdialog lxdialog-objs := checklist.o lxdialog.o Objects with extension .o are compiled from the corresponding .c @@ -650,7 +649,7 @@ Both possibilities are described in the following. Example:: #scripts/kconfig/Makefile - hostprogs-y := qconf + hostprogs := qconf qconf-cxxobjs := qconf.o In the example above the executable is composed of the C++ file @@ -662,7 +661,7 @@ Both possibilities are described in the following. Example:: #scripts/kconfig/Makefile - hostprogs-y := qconf + hostprogs := qconf qconf-cxxobjs := qconf.o qconf-objs := check.o @@ -710,7 +709,7 @@ Both possibilities are described in the following. Example:: #drivers/pci/Makefile - hostprogs-y := gen-devlist + hostprogs := gen-devlist $(obj)/devlist.h: $(src)/pci.ids $(obj)/gen-devlist ( cd $(obj); ./gen-devlist ) < $< @@ -718,47 +717,31 @@ Both possibilities are described in the following. $(obj)/gen-devlist is updated. Note that references to the host programs in special rules must be prefixed with $(obj). - (2) Use $(always) + (2) Use always-y When there is no suitable special rule, and the host program - shall be built when a makefile is entered, the $(always) + shall be built when a makefile is entered, the always-y variable shall be used. Example:: #scripts/lxdialog/Makefile - hostprogs-y := lxdialog - always := $(hostprogs-y) + hostprogs := lxdialog + always-y := $(hostprogs) This will tell kbuild to build lxdialog even if not referenced in any rule. -4.6 Using hostprogs-$(CONFIG_FOO) ---------------------------------- - - A typical pattern in a Kbuild file looks like this: - - Example:: - - #scripts/Makefile - hostprogs-$(CONFIG_KALLSYMS) += kallsyms - - Kbuild knows about both 'y' for built-in and 'm' for module. - So if a config symbol evaluates to 'm', kbuild will still build - the binary. In other words, Kbuild handles hostprogs-m exactly - like hostprogs-y. But only hostprogs-y is recommended to be used - when no CONFIG symbols are involved. - 5 Kbuild clean infrastructure ============================= "make clean" deletes most generated files in the obj tree where the kernel is compiled. This includes generated files such as host programs. -Kbuild knows targets listed in $(hostprogs-y), $(hostprogs-m), $(always), -$(extra-y) and $(targets). They are all deleted during "make clean". -Files matching the patterns "*.[oas]", "*.ko", plus some additional files -generated by kbuild are deleted all over the kernel src tree when -"make clean" is executed. +Kbuild knows targets listed in $(hostprogs), $(always-y), $(always-m), +$(always-), $(extra-y), $(extra-) and $(targets). They are all deleted +during "make clean". Files matching the patterns "*.[oas]", "*.ko", plus +some additional files generated by kbuild are deleted all over the kernel +source tree when "make clean" is executed. Additional files or directories can be specified in kbuild makefiles by use of $(clean-files). @@ -1269,12 +1252,12 @@ When kbuild executes, the following steps are followed (roughly): Example:: #arch/x86/kernel/Makefile - always := vmlinux.lds + extra-y := vmlinux.lds #Makefile export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) - The assignment to $(always) is used to tell kbuild to build the + The assignment to extra-y is used to tell kbuild to build the target vmlinux.lds. The assignment to $(CPPFLAGS_vmlinux.lds) tells kbuild to use the specified options when building the target vmlinux.lds. @@ -7,7 +7,7 @@ bounds-file := include/generated/bounds.h -always := $(bounds-file) +always-y := $(bounds-file) targets := kernel/bounds.s $(bounds-file): kernel/bounds.s FORCE @@ -28,7 +28,7 @@ $(timeconst-file): kernel/time/timeconst.bc FORCE offsets-file := include/generated/asm-offsets.h -always += $(offsets-file) +always-y += $(offsets-file) targets += arch/$(SRCARCH)/kernel/asm-offsets.s arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file) @@ -39,7 +39,7 @@ $(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE ##### # Check for missing system calls -always += missing-syscalls +always-y += missing-syscalls quiet_cmd_syscalls = CALL $< cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags) @@ -50,7 +50,7 @@ missing-syscalls: scripts/checksyscalls.sh $(offsets-file) FORCE ##### # Check atomic headers are up-to-date -always += old-atomics +always-y += old-atomics quiet_cmd_atomics = CALL $< cmd_atomics = $(CONFIG_SHELL) $< diff --git a/MAINTAINERS b/MAINTAINERS index c74e4ea714a5..a0d86490c2c6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2796,11 +2796,11 @@ F: drivers/block/aoe/ ATHEROS 71XX/9XXX GPIO DRIVER M: Alban Bedel <[email protected]> +S: Maintained W: https://github.com/AlbanBedel/linux T: git git://github.com/AlbanBedel/linux -S: Maintained -F: drivers/gpio/gpio-ath79.c F: Documentation/devicetree/bindings/gpio/gpio-ath79.txt +F: drivers/gpio/gpio-ath79.c ATHEROS 71XX/9XXX USB PHY DRIVER M: Alban Bedel <[email protected]> @@ -3422,8 +3422,8 @@ BROADCOM BRCMSTB GPIO DRIVER M: Gregory Fong <[email protected]> S: Supported -F: drivers/gpio/gpio-brcmstb.c F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt +F: drivers/gpio/gpio-brcmstb.c BROADCOM BRCMSTB I2C DRIVER M: Kamal Dasu <[email protected]> @@ -3481,8 +3481,8 @@ BROADCOM KONA GPIO DRIVER M: Ray Jui <[email protected]> S: Supported -F: drivers/gpio/gpio-bcm-kona.c F: Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt +F: drivers/gpio/gpio-bcm-kona.c BROADCOM NETXTREME-E ROCE DRIVER M: Selvin Xavier <[email protected]> @@ -3597,8 +3597,8 @@ F: sound/pci/bt87x.c BT8XXGPIO DRIVER M: Michael Buesch <[email protected]> -W: http://bu3sch.de/btgpio.php S: Maintained +W: http://bu3sch.de/btgpio.php F: drivers/gpio/gpio-bt8xx.c BTRFS FILE SYSTEM @@ -7143,18 +7143,18 @@ GPIO SUBSYSTEM M: Linus Walleij <[email protected]> M: Bartosz Golaszewski <[email protected]> -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git +F: Documentation/ABI/obsolete/sysfs-gpio +F: Documentation/ABI/testing/gpio-cdev +F: Documentation/admin-guide/gpio/ F: Documentation/devicetree/bindings/gpio/ F: Documentation/driver-api/gpio/ -F: Documentation/admin-guide/gpio/ -F: Documentation/ABI/testing/gpio-cdev -F: Documentation/ABI/obsolete/sysfs-gpio F: drivers/gpio/ +F: include/asm-generic/gpio.h F: include/linux/gpio/ F: include/linux/gpio.h F: include/linux/of_gpio.h -F: include/asm-generic/gpio.h F: include/uapi/linux/gpio.h F: tools/gpio/ @@ -8055,8 +8055,8 @@ F: drivers/scsi/ips.* ICH LPC AND GPIO DRIVER M: Peter Tyser <[email protected]> S: Maintained -F: drivers/mfd/lpc_ich.c F: drivers/gpio/gpio-ich.c +F: drivers/mfd/lpc_ich.c ICY I2C DRIVER M: Max Staudt <[email protected]> @@ -13360,7 +13360,7 @@ S: Maintained F: fs/timerfd.c F: include/linux/timer* F: include/linux/time_namespace.h -F: kernel/time_namespace.c +F: kernel/time/namespace.c F: kernel/time/*timer* POWER MANAGEMENT CORE @@ -16075,8 +16075,8 @@ F: Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt SYNOPSYS CREG GPIO DRIVER M: Eugeniy Paltsev <[email protected]> S: Maintained -F: drivers/gpio/gpio-creg-snps.c F: Documentation/devicetree/bindings/gpio/snps,creg-gpio.txt +F: drivers/gpio/gpio-creg-snps.c SYNOPSYS DESIGNWARE 8250 UART DRIVER R: Andy Shevchenko <[email protected]> @@ -16087,8 +16087,8 @@ SYNOPSYS DESIGNWARE APB GPIO DRIVER M: Hoan Tran <[email protected]> S: Maintained -F: drivers/gpio/gpio-dwapb.c F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt +F: drivers/gpio/gpio-dwapb.c SYNOPSYS DESIGNWARE AXI DMAC DRIVER M: Eugeniy Paltsev <[email protected]> @@ -17791,6 +17791,12 @@ F: include/linux/vbox_utils.h F: include/uapi/linux/vbox*.h F: drivers/virt/vboxguest/ +VIRTUAL BOX SHARED FOLDER VFS DRIVER: +M: Hans de Goede <[email protected]> +S: Maintained +F: fs/vboxsf/* + VIRTUAL SERIO DEVICE DRIVER M: Stephen Chandler Paul <[email protected]> S: Maintained @@ -18408,8 +18414,8 @@ M: Nandor Han <[email protected]> M: Semi Malinen <[email protected]> S: Maintained -F: drivers/gpio/gpio-xra1403.c F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt +F: drivers/gpio/gpio-xra1403.c XTENSA XTFPGA PLATFORM SUPPORT M: Max Filippov <[email protected]> @@ -18490,6 +18496,16 @@ L: [email protected] S: Maintained F: arch/x86/kernel/cpu/zhaoxin.c +ZONEFS FILESYSTEM +M: Damien Le Moal <[email protected]> +M: Naohiro Aota <[email protected]> +R: Johannes Thumshirn <[email protected]> +T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git +S: Maintained +F: fs/zonefs/ +F: Documentation/filesystems/zonefs.txt + ZPOOL COMPRESSED PAGE STORAGE API M: Dan Streetman <[email protected]> @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 -PATCHLEVEL = 5 +PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Kleptomaniac Octopus # *DOCUMENTATION* @@ -1679,7 +1679,7 @@ PHONY += descend $(build-dirs) descend: $(build-dirs) $(build-dirs): prepare $(Q)$(MAKE) $(build)=$@ \ - single-build=$(if $(filter-out $@/, $(single-no-ko)),1) \ + single-build=$(if $(filter-out $@/, $(filter $@/%, $(single-no-ko))),1) \ need-builtin=1 need-modorder=1 clean-dirs := $(addprefix _clean_, $(clean-dirs)) diff --git a/arch/alpha/boot/Makefile b/arch/alpha/boot/Makefile index 991e023a6fc4..d8dba85e606c 100644 --- a/arch/alpha/boot/Makefile +++ b/arch/alpha/boot/Makefile @@ -8,7 +8,7 @@ # Copyright (C) 1994 by Linus Torvalds # -hostprogs-y := tools/mkbb tools/objstrip +hostprogs := tools/mkbb tools/objstrip targets := vmlinux.gz vmlinux \ vmlinux.nh tools/lxboot tools/bootlx tools/bootph \ tools/bootpzh bootloader bootpheader bootpzheader diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index b5752f0e8936..c815477b4303 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -326,16 +326,16 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr) #define gits_write_cwriter(v, c) __gic_writeq_nonatomic(v, c) /* - * GITS_VPROPBASER - hi and lo bits may be accessed independently. + * GICR_VPROPBASER - hi and lo bits may be accessed independently. */ -#define gits_read_vpropbaser(c) __gic_readq_nonatomic(c) -#define gits_write_vpropbaser(v, c) __gic_writeq_nonatomic(v, c) +#define gicr_read_vpropbaser(c) __gic_readq_nonatomic(c) +#define gicr_write_vpropbaser(v, c) __gic_writeq_nonatomic(v, c) /* - * GITS_VPENDBASER - the Valid bit must be cleared before changing + * GICR_VPENDBASER - the Valid bit must be cleared before changing * anything else. */ -static inline void gits_write_vpendbaser(u64 val, void __iomem *addr) +static inline void gicr_write_vpendbaser(u64 val, void __iomem *addr) { u32 tmp; @@ -352,7 +352,7 @@ static inline void gits_write_vpendbaser(u64 val, void __iomem *addr) __gic_writeq_nonatomic(val, addr); } -#define gits_read_vpendbaser(c) __gic_readq_nonatomic(c) +#define gicr_read_vpendbaser(c) __gic_readq_nonatomic(c) static inline bool gic_prio_masking_enabled(void) { diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 1babb392e70a..d3c9f03e7e79 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -5,7 +5,7 @@ ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32 include $(srctree)/lib/vdso/Makefile -hostprogs-y := vdsomunge +hostprogs := vdsomunge obj-vdso := vgettimeofday.o datapage.o note.o diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 4750fc8030c3..25fec4bde43a 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -140,11 +140,11 @@ static inline u32 gic_read_rpr(void) #define gicr_write_pendbaser(v, c) writeq_relaxed(v, c) #define gicr_read_pendbaser(c) readq_relaxed(c) -#define gits_write_vpropbaser(v, c) writeq_relaxed(v, c) -#define gits_read_vpropbaser(c) readq_relaxed(c) +#define gicr_write_vpropbaser(v, c) writeq_relaxed(v, c) +#define gicr_read_vpropbaser(c) readq_relaxed(c) -#define gits_write_vpendbaser(v, c) writeq_relaxed(v, c) -#define gits_read_vpendbaser(c) readq_relaxed(c) +#define gicr_write_vpendbaser(v, c) writeq_relaxed(v, c) +#define gicr_read_vpendbaser(c) readq_relaxed(c) static inline bool gic_prio_masking_enabled(void) { diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index b87c6e276ab1..7a6e81ca23a8 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -33,7 +33,6 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void enter_from_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); -void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, @@ -47,7 +46,4 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_cp15instr(unsigned int esr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); -void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, - struct pt_regs *regs); - #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 102404dc1e13..9083d6992603 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -18,6 +18,10 @@ * See: * https://lore.kernel.org/lkml/[email protected] */ -#define vcpu_is_preempted(cpu) false +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return false; +} #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 53b8a4ee64ff..91a83104c6e8 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/types.h> +#include <asm/archrandom.h> #include <asm/cacheflush.h> #include <asm/fixmap.h> #include <asm/kernel-pgtable.h> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index bbb0f0c145f6..00626057a384 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || test_tsk_thread_flag(next, TIF_SSBD)) @@ -608,8 +615,6 @@ long get_tagged_addr_ctrl(void) * only prevents the tagged address ABI enabling via prctl() and does not * disable it for tasks that already opted in to the relaxed ABI. */ -static int zero; -static int one = 1; static struct ctl_table tagged_addr_sysctl_table[] = { { @@ -618,8 +623,8 @@ static struct ctl_table tagged_addr_sysctl_table[] = { .data = &tagged_addr_disabled, .maxlen = sizeof(int), .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { } }; diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 73f06d4b3aae..eebbc8d7123e 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -23,7 +23,7 @@ #include <linux/irq.h> #include <linux/delay.h> #include <linux/clocksource.h> -#include <linux/clk-provider.h> +#include <linux/of_clk.h> #include <linux/acpi.h> #include <clocksource/arm_arch_timer.h> diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 76b327f88fbb..04df57b43cb1 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -115,9 +115,9 @@ VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) # Borrow vdsomunge.c from the arm vDSO # We have to use a relative path because scripts/Makefile.host prefixes -# $(hostprogs-y) with $(obj) +# $(hostprogs) with $(obj) munge := ../../../arm/vdso/vdsomunge -hostprogs-y := $(munge) +hostprogs := $(munge) c-obj-vdso := note.o c-obj-vdso-gettimeofday := vgettimeofday.o diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 4ed45ade32a1..a3da2c5d63c2 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -21,7 +21,7 @@ endif drop-sections := .reginfo .mdebug .comment .note .pdr .options .MIPS.options strip-flags := $(addprefix --remove-section=,$(drop-sections)) -hostprogs-y := elf2ecoff +hostprogs := elf2ecoff suffix-y := bin suffix-$(CONFIG_KERNEL_BZIP2) := bz2 diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index d859f079b771..0df0ee8a298d 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -84,7 +84,7 @@ $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE) # Calculate the load address of the compressed kernel image -hostprogs-y := calc_vmlinuz_load_addr +hostprogs := calc_vmlinuz_load_addr ifneq ($(zload-y),) VMLINUZ_LOAD_ADDRESS := $(zload-y) @@ -112,7 +112,7 @@ ifdef CONFIG_MACH_DECSTATION endif # elf2ecoff can only handle 32bit image -hostprogs-y += ../elf2ecoff +hostprogs += ../elf2ecoff ifdef CONFIG_32BIT VMLINUZ = vmlinuz diff --git a/arch/mips/boot/tools/Makefile b/arch/mips/boot/tools/Makefile index 5f8e737348eb..592e05a51a4a 100644 --- a/arch/mips/boot/tools/Makefile +++ b/arch/mips/boot/tools/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -hostprogs-y += relocs +hostprogs += relocs relocs-objs += relocs_32.o relocs-objs += relocs_64.o relocs-objs += relocs_main.o diff --git a/arch/mips/tools/Makefile b/arch/mips/tools/Makefile index aaef688749f5..b851e5dcc65a 100644 --- a/arch/mips/tools/Makefile +++ b/arch/mips/tools/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -hostprogs-y := elf-entry +hostprogs := elf-entry PHONY += elf-entry elf-entry: $(obj)/elf-entry @: -hostprogs-$(CONFIG_CPU_LOONGSON3_WORKAROUNDS) += loongson3-llsc-check +hostprogs += loongson3-llsc-check PHONY += loongson3-llsc-check loongson3-llsc-check: $(obj)/loongson3-llsc-check @: diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index b2a2e032dc99..aa89a41dc5dd 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -100,7 +100,7 @@ $(obj)/%.so.raw: OBJCOPYFLAGS := -S $(obj)/%.so.raw: $(obj)/%.so.dbg.raw FORCE $(call if_changed,objcopy) -hostprogs-y := genvdso +hostprogs := genvdso quiet_cmd_genvdso = GENVDSO $@ define cmd_genvdso diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index dfbd7f22eef5..0556bf4fc9e9 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -224,7 +224,7 @@ $(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S FORCE $(obj)/wrapper.a: $(obj-wlib) FORCE $(call if_changed,bootar) -hostprogs-y := addnote hack-coff mktree +hostprogs := addnote hack-coff mktree targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ @@ -464,7 +464,7 @@ WRAPPER_BINDIR := /usr/sbin INSTALL := install extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(extra-y)) -hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs-y)) +hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs)) wrapper-installed := $(DESTDIR)$(WRAPPER_BINDIR)/wrapper dts-installed := $(patsubst $(dtstree)/%, $(DESTDIR)$(WRAPPER_DTSDIR)/%, $(wildcard $(dtstree)/*.dts)) diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile index b5e35e8f999a..f9dd47ff9ac4 100644 --- a/arch/s390/tools/Makefile +++ b/arch/s390/tools/Makefile @@ -10,8 +10,8 @@ PHONY += kapi kapi: $(kapi-hdrs-y) -hostprogs-y += gen_facilities -hostprogs-y += gen_opcode_table +hostprogs += gen_facilities +hostprogs += gen_opcode_table HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE) diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile index ec8cd703b708..380e2b018992 100644 --- a/arch/sparc/boot/Makefile +++ b/arch/sparc/boot/Makefile @@ -7,7 +7,7 @@ ROOT_IMG := /usr/src/root.img ELFTOAOUT := elftoaout -hostprogs-y := piggyback +hostprogs := piggyback targets := tftpboot.img image zImage vmlinux.aout clean-files := System.map diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 997ffe46e953..708cb6304c2d 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -41,7 +41,7 @@ $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -hostprogs-y += vdso2c +hostprogs += vdso2c quiet_cmd_vdso2c = VDSO2C $@ cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 748b6d28a91d..012b82fc8617 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -45,8 +45,8 @@ setup-y += video-vesa.o setup-y += video-bios.o targets += $(setup-y) -hostprogs-y := tools/build -hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr +hostprogs := tools/build +hostprogs += mkcpustr HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ -include include/generated/autoconf.h \ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 56aa5fa0a66b..26050ae0b27e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -58,7 +58,7 @@ KBUILD_LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \ endif LDFLAGS_vmlinux := -T -hostprogs-y := mkpiggy +hostprogs := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 25019d42ae93..ef2ad7253cd5 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -393,7 +393,13 @@ int count_immovable_mem_regions(void) table = table_addr + sizeof(struct acpi_table_srat); while (table + sizeof(struct acpi_subtable_header) < table_end) { + sub_table = (struct acpi_subtable_header *)table; + if (!sub_table->length) { + debug_putstr("Invalid zero length SRAT subtable.\n"); + return 0; + } + if (sub_table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) { struct acpi_srat_mem_affinity *ma; diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 2b75e80f6b41..433a1259f61d 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -59,7 +59,7 @@ $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso_and_check) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi -hostprogs-y += vdso2c +hostprogs += vdso2c quiet_cmd_vdso2c = VDSO2C $@ cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index caaf4dce99bf..b0da5320bcff 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -21,11 +21,15 @@ #include <linux/hyperv.h> #include <linux/slab.h> #include <linux/cpuhotplug.h> +#include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); +/* Storage to save the hypercall page temporarily for hibernation */ +static void *hv_hypercall_pg_saved; + u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); @@ -246,6 +250,48 @@ static int __init hv_pci_init(void) return 1; } +static int hv_suspend(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + + /* + * Reset the hypercall page as it is going to be invalidated + * accross hibernation. Setting hv_hypercall_pg to NULL ensures + * that any subsequent hypercall operation fails safely instead of + * crashing due to an access of an invalid page. The hypercall page + * pointer is restored on resume. + */ + hv_hypercall_pg_saved = hv_hypercall_pg; + hv_hypercall_pg = NULL; + + /* Disable the hypercall page in the hypervisor */ + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.enable = 0; + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + return 0; +} + +static void hv_resume(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + + /* Re-enable the hypercall page */ + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.enable = 1; + hypercall_msr.guest_physical_address = + vmalloc_to_pfn(hv_hypercall_pg_saved); + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + hv_hypercall_pg = hv_hypercall_pg_saved; + hv_hypercall_pg_saved = NULL; +} + +static struct syscore_ops hv_syscore_ops = { + .suspend = hv_suspend, + .resume = hv_resume, +}; + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -330,6 +376,8 @@ void __init hyperv_init(void) x86_init.pci.arch_init = hv_pci_init; + register_syscore_ops(&hv_syscore_ops); + return; remove_cpuhp_state: @@ -349,6 +397,8 @@ void hyperv_cleanup(void) { union hv_x64_msr_hypercall_contents hypercall_msr; + unregister_syscore_ops(&hv_syscore_ops); + /* Reset our OS id */ wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2ebc17d9c72c..19e94af9cc5d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -140,6 +140,7 @@ extern void apic_soft_disable(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); +extern void apic_intr_mode_select(void); extern void apic_intr_mode_init(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); @@ -188,6 +189,7 @@ static inline void disable_local_APIC(void) { } # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } static inline void init_bsp_APIC(void) { } +static inline void apic_intr_mode_select(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } @@ -452,6 +454,14 @@ static inline void ack_APIC_irq(void) apic_eoi(); } + +static inline bool lapic_vector_set_in_irr(unsigned int vector) +{ + u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + + return !!(irr & (1U << (vector % 32))); +} + static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 19435858df5f..96d9cd208610 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,12 +51,14 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup + * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); + void (*intr_mode_select)(void); void (*intr_mode_init)(void); }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 28446fa6bf18..5f973fed3c9f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -830,8 +830,17 @@ bool __init apic_needs_pit(void) if (!tsc_khz || !cpu_khz) return true; - /* Is there an APIC at all? */ - if (!boot_cpu_has(X86_FEATURE_APIC)) + /* Is there an APIC at all or is it disabled? */ + if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) + return true; + + /* + * If interrupt delivery mode is legacy PIC or virtual wire without + * configuration, the local APIC timer wont be set up. Make sure + * that the PIT is initialized. + */ + if (apic_intr_mode == APIC_PIC || + apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) return true; /* Virt guests may lack ARAT, but still have DEADLINE */ @@ -1322,7 +1331,7 @@ void __init sync_Arb_IDs(void) enum apic_intr_mode_id apic_intr_mode __ro_after_init; -static int __init apic_intr_mode_select(void) +static int __init __apic_intr_mode_select(void) { /* Check kernel option */ if (disable_apic) { @@ -1384,6 +1393,12 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* Select the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_select(void) +{ + apic_intr_mode = __apic_intr_mode_select(); +} + /* * An initial setup of the virtual wire mode. */ @@ -1440,8 +1455,6 @@ void __init apic_intr_mode_init(void) { bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); - apic_intr_mode = apic_intr_mode_select(); - switch (apic_intr_mode) { case APIC_PIC: pr_info("APIC: Keep in PIC mode(8259)\n"); @@ -2626,6 +2639,13 @@ static int lapic_suspend(void) #endif local_irq_save(flags); + + /* + * Mask IOAPIC before disabling the local APIC to prevent stale IRR + * entries on some implementations. + */ + mask_ioapic_entries(); + disable_local_APIC(); irq_remapping_disable(); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 7f7533462474..159bd0cb8548 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -23,10 +23,8 @@ static struct irq_domain *msi_default_domain; -static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) @@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) MSI_DATA_VECTOR(cfg->vector); } +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg); +} + +static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) +{ + struct msi_msg msg[2] = { [1] = { }, }; + + __irq_msi_compose_msg(cfg, msg); + irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); +} + +static int +msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) +{ + struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd); + struct irq_data *parent = irqd->parent_data; + unsigned int cpu; + int ret; + + /* Save the current configuration */ + cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); + old_cfg = *cfg; + + /* Allocate a new target vector */ + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + /* + * For non-maskable and non-remapped MSI interrupts the migration + * to a different destination CPU and a different vector has to be + * done careful to handle the possible stray interrupt which can be + * caused by the non-atomic update of the address/data pair. + * + * Direct update is possible when: + * - The MSI is maskable (remapped MSI does not use this code path)). + * The quirk bit is not set in this case. + * - The new vector is the same as the old vector + * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The new destination CPU is the same as the old destination CPU + */ + if (!irqd_msi_nomask_quirk(irqd) || + cfg->vector == old_cfg.vector || + old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + cfg->dest_apicid == old_cfg.dest_apicid) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Paranoia: Validate that the interrupt target is the local + * CPU. + */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Redirect the interrupt to the new vector on the current CPU + * first. This might cause a spurious interrupt on this vector if + * the device raises an interrupt right between this update and the + * update to the final destination CPU. + * + * If the vector is in use then the installed device handler will + * denote it as spurious which is no harm as this is a rare event + * and interrupt handlers have to cope with spurious interrupts + * anyway. If the vector is unused, then it is marked so it won't + * trigger the 'No irq handler for vector' warning in do_IRQ(). + * + * This requires to hold vector lock to prevent concurrent updates to + * the affected vector. + */ + lock_vector_lock(); + + /* + * Mark the new target vector on the local CPU if it is currently + * unused. Reuse the VECTOR_RETRIGGERED state which is also used in + * the CPU hotplug path for a similar purpose. This cannot be + * undone here as the current CPU has interrupts disabled and + * cannot handle the interrupt before the whole set_affinity() + * section is done. In the CPU unplug case, the current CPU is + * about to vanish and will not handle any interrupts anymore. The + * vector is cleaned up when the CPU comes online again. + */ + if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) + this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); + + /* Redirect it to the new vector on the local CPU temporarily */ + old_cfg.vector = cfg->vector; + irq_msi_update_msg(irqd, &old_cfg); + + /* Now transition it to the target CPU */ + irq_msi_update_msg(irqd, cfg); + + /* + * All interrupts after this point are now targeted at the new + * vector/CPU. + * + * Drop vector lock before testing whether the temporary assignment + * to the local CPU was hit by an interrupt raised in the device, + * because the retrigger function acquires vector lock again. + */ + unlock_vector_lock(); + + /* + * Check whether the transition raced with a device interrupt and + * is pending in the local APICs IRR. It is safe to do this outside + * of vector lock as the irq_desc::lock of this interrupt is still + * held and interrupts are disabled: The check is not accessing the + * underlying vector store. It's just checking the local APIC's + * IRR. + */ + if (lapic_vector_set_in_irr(cfg->vector)) + irq_data_get_irq_chip(irqd)->irq_retrigger(irqd); + + return ret; +} + /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. @@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_set_affinity = msi_set_affinity, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent) } if (!msi_default_domain) pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); + else + msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; } #ifdef CONFIG_IRQ_REMAP diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 7ce29cee9f9e..d8673d8a779b 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -91,10 +91,18 @@ void __init hpet_time_init(void) static __init void x86_late_time_init(void) { + /* + * Before PIT/HPET init, select the interrupt mode. This is required + * to make the decision whether PIT should be initialized correct. + */ + x86_init.irqs.intr_mode_select(); + + /* Setup the legacy timers */ x86_init.timers.timer_init(); + /* - * After PIT/HPET timers init, select and setup - * the final interrupt mode for delivering IRQs. + * After PIT/HPET timers init, set up the final interrupt mode for + * delivering IRQs. */ x86_init.irqs.intr_mode_init(); tsc_init(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 23e25f3034c2..85f1a90c55cd 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -80,6 +80,7 @@ struct x86_init_ops x86_init __initdata = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .trap_init = x86_init_noop, + .intr_mode_select = apic_intr_mode_select, .intr_mode_init = apic_intr_mode_init }, diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 59f7f6d60cf6..ae923ee8e2b4 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -308,7 +308,7 @@ static void __init efi_clean_memmap(void) .phys_map = efi.memmap.phys_map, .desc_version = efi.memmap.desc_version, .desc_size = efi.memmap.desc_size, - .size = data.desc_size * (efi.memmap.nr_map - n_removal), + .size = efi.memmap.desc_size * (efi.memmap.nr_map - n_removal), .flags = 0, }; diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index f60501a384f9..99b6332ba540 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -12,7 +12,7 @@ OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n -always := realmode.bin realmode.relocs +always-y := realmode.bin realmode.relocs wakeup-objs := wakeup_asm.o wakemain.o video-mode.o wakeup-objs += copy.o bioscall.o regs.o diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 09af7ff53044..55b1ab378974 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -26,7 +26,7 @@ posttest: $(obj)/insn_decoder_test vmlinux $(obj)/insn_sanity $(call cmd,posttest) $(call cmd,sanitytest) -hostprogs-y += insn_decoder_test insn_sanity +hostprogs += insn_decoder_test insn_sanity # -I needed for generated C source and C source which in the kernel tree. HOSTCFLAGS_insn_decoder_test.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/ @@ -39,7 +39,7 @@ $(obj)/insn_decoder_test.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/l $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c HOST_EXTRACFLAGS += -I$(srctree)/tools/include -hostprogs-y += relocs +hostprogs += relocs relocs-objs := relocs_32.o relocs_64.o relocs_common.o PHONY += relocs relocs: $(obj)/relocs diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ae4a41ca19f6..1f756ffffe8b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1205,6 +1205,7 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_platform.get_nmi_reason = xen_get_nmi_reason; x86_init.resources.memory_setup = xen_memory_setup; + x86_init.irqs.intr_mode_select = x86_init_noop; x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; diff --git a/crypto/Kconfig b/crypto/Kconfig index cdb51d4272d0..c24a47406f8f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -136,8 +136,6 @@ config CRYPTO_USER Userspace configuration for cryptographic instantiations such as cbc(aes). -if CRYPTO_MANAGER2 - config CRYPTO_MANAGER_DISABLE_TESTS bool "Disable run-time self tests" default y @@ -155,8 +153,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS This is intended for developer use only, as these tests take much longer to run than the normal self tests. -endif # if CRYPTO_MANAGER2 - config CRYPTO_GF128MUL tristate diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h index 67f282e9e0af..6ad0517553d5 100644 --- a/drivers/acpi/acpica/achware.h +++ b/drivers/acpi/acpica/achware.h @@ -101,6 +101,8 @@ acpi_status acpi_hw_enable_all_runtime_gpes(void); acpi_status acpi_hw_enable_all_wakeup_gpes(void); +u8 acpi_hw_check_all_gpes(void); + acpi_status acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, struct acpi_gpe_block_info *gpe_block, diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c index 2c39ff2a7406..f2de66bfd8a7 100644 --- a/drivers/acpi/acpica/evxfgpe.c +++ b/drivers/acpi/acpica/evxfgpe.c @@ -795,6 +795,38 @@ acpi_status acpi_enable_all_wakeup_gpes(void) ACPI_EXPORT_SYMBOL(acpi_enable_all_wakeup_gpes) +/****************************************************************************** + * + * FUNCTION: acpi_any_gpe_status_set + * + * PARAMETERS: None + * + * RETURN: Whether or not the status bit is set for any GPE + * + * DESCRIPTION: Check the status bits of all enabled GPEs and return TRUE if any + * of them is set or FALSE otherwise. + * + ******************************************************************************/ +u32 acpi_any_gpe_status_set(void) +{ + acpi_status status; + u8 ret; + + ACPI_FUNCTION_TRACE(acpi_any_gpe_status_set); + + status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS); + if (ACPI_FAILURE(status)) { + return (FALSE); + } + + ret = acpi_hw_check_all_gpes(); + (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS); + + return (ret); +} + +ACPI_EXPORT_SYMBOL(acpi_any_gpe_status_set) + /******************************************************************************* * * FUNCTION: acpi_install_gpe_block diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c index 1b4252bdcd0b..f4c285c2f595 100644 --- a/drivers/acpi/acpica/hwgpe.c +++ b/drivers/acpi/acpica/hwgpe.c @@ -446,6 +446,53 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, /****************************************************************************** * + * FUNCTION: acpi_hw_get_gpe_block_status + * + * PARAMETERS: gpe_xrupt_info - GPE Interrupt info + * gpe_block - Gpe Block info + * + * RETURN: Success + * + * DESCRIPTION: Produce a combined GPE status bits mask for the given block. + * + ******************************************************************************/ + +static acpi_status +acpi_hw_get_gpe_block_status(struct acpi_gpe_xrupt_info *gpe_xrupt_info, + struct acpi_gpe_block_info *gpe_block, + void *ret_ptr) +{ + struct acpi_gpe_register_info *gpe_register_info; + u64 in_enable, in_status; + acpi_status status; + u8 *ret = ret_ptr; + u32 i; + + /* Examine each GPE Register within the block */ + + for (i = 0; i < gpe_block->register_count; i++) { + gpe_register_info = &gpe_block->register_info[i]; + + status = acpi_hw_read(&in_enable, + &gpe_register_info->enable_address); + if (ACPI_FAILURE(status)) { + continue; + } + + status = acpi_hw_read(&in_status, + &gpe_register_info->status_address); + if (ACPI_FAILURE(status)) { + continue; + } + + *ret |= in_enable & in_status; + } + + return (AE_OK); +} + +/****************************************************************************** + * * FUNCTION: acpi_hw_disable_all_gpes * * PARAMETERS: None @@ -510,4 +557,28 @@ acpi_status acpi_hw_enable_all_wakeup_gpes(void) return_ACPI_STATUS(status); } +/****************************************************************************** + * + * FUNCTION: acpi_hw_check_all_gpes + * + * PARAMETERS: None + * + * RETURN: Combined status of all GPEs + * + * DESCRIPTION: Check all enabled GPEs in all GPE blocks and return TRUE if the + * status bit is set for at least one of them of FALSE otherwise. + * + ******************************************************************************/ + +u8 acpi_hw_check_all_gpes(void) +{ + u8 ret = 0; + + ACPI_FUNCTION_TRACE(acpi_hw_check_all_gpes); + + (void)acpi_ev_walk_gpe_list(acpi_hw_get_gpe_block_status, &ret); + + return (ret != 0); +} + #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 08bc9751fe66..d1f1cf5d4bf0 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -179,6 +179,7 @@ EXPORT_SYMBOL(first_ec); static struct acpi_ec *boot_ec; static bool boot_ec_is_ecdt = false; +static struct workqueue_struct *ec_wq; static struct workqueue_struct *ec_query_wq; static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */ @@ -469,7 +470,7 @@ static void acpi_ec_submit_query(struct acpi_ec *ec) ec_dbg_evt("Command(%s) submitted/blocked", acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY)); ec->nr_pending_queries++; - schedule_work(&ec->work); + queue_work(ec_wq, &ec->work); } } @@ -535,7 +536,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec) #ifdef CONFIG_PM_SLEEP static void __acpi_ec_flush_work(void) { - flush_scheduled_work(); /* flush ec->work */ + drain_workqueue(ec_wq); /* flush ec->work */ flush_workqueue(ec_query_wq); /* flush queries */ } @@ -556,8 +557,8 @@ static void acpi_ec_disable_event(struct acpi_ec *ec) void acpi_ec_flush_work(void) { - /* Without ec_query_wq there is nothing to flush. */ - if (!ec_query_wq) + /* Without ec_wq there is nothing to flush. */ + if (!ec_wq) return; __acpi_ec_flush_work(); @@ -2107,25 +2108,33 @@ static struct acpi_driver acpi_ec_driver = { .drv.pm = &acpi_ec_pm, }; -static inline int acpi_ec_query_init(void) +static void acpi_ec_destroy_workqueues(void) { - if (!ec_query_wq) { - ec_query_wq = alloc_workqueue("kec_query", 0, - ec_max_queries); - if (!ec_query_wq) - return -ENODEV; + if (ec_wq) { + destroy_workqueue(ec_wq); + ec_wq = NULL; } - return 0; -} - -static inline void acpi_ec_query_exit(void) -{ if (ec_query_wq) { destroy_workqueue(ec_query_wq); ec_query_wq = NULL; } } +static int acpi_ec_init_workqueues(void) +{ + if (!ec_wq) + ec_wq = alloc_ordered_workqueue("kec", 0); + + if (!ec_query_wq) + ec_query_wq = alloc_workqueue("kec_query", 0, ec_max_queries); + + if (!ec_wq || !ec_query_wq) { + acpi_ec_destroy_workqueues(); + return -ENODEV; + } + return 0; +} + static const struct dmi_system_id acpi_ec_no_wakeup[] = { { .ident = "Thinkpad X1 Carbon 6th", @@ -2156,8 +2165,7 @@ int __init acpi_ec_init(void) int result; int ecdt_fail, dsdt_fail; - /* register workqueue for _Qxx evaluations */ - result = acpi_ec_query_init(); + result = acpi_ec_init_workqueues(); if (result) return result; @@ -2188,6 +2196,6 @@ static void __exit acpi_ec_exit(void) { acpi_bus_unregister_driver(&acpi_ec_driver); - acpi_ec_query_exit(); + acpi_ec_destroy_workqueues(); } #endif /* 0 */ diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 439880629839..152f7fc0b200 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -990,21 +990,34 @@ static void acpi_s2idle_sync(void) acpi_os_wait_events_complete(); /* synchronize Notify handling */ } -static void acpi_s2idle_wake(void) +static bool acpi_s2idle_wake(void) { - /* - * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has - * not triggered while suspended, so bail out. - */ - if (!acpi_sci_irq_valid() || - irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) - return; + if (!acpi_sci_irq_valid()) + return pm_wakeup_pending(); + + while (pm_wakeup_pending()) { + /* + * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the + * SCI has not triggered while suspended, so bail out (the + * wakeup is pending anyway and the SCI is not the source of + * it). + */ + if (irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) + return true; + + /* + * If there are no EC events to process and at least one of the + * other enabled GPEs is active, the wakeup is regarded as a + * genuine one. + * + * Note that the checks below must be carried out in this order + * to avoid returning prematurely due to a change of the EC GPE + * status bit from unset to set between the checks with the + * status bits of all the other GPEs unset. + */ + if (acpi_any_gpe_status_set() && !acpi_ec_dispatch_gpe()) + return true; - /* - * If there are EC events to process, the wakeup may be a spurious one - * coming from the EC. - */ - if (acpi_ec_dispatch_gpe()) { /* * Cancel the wakeup and process all pending events in case * there are any wakeup ones in there. @@ -1017,8 +1030,19 @@ static void acpi_s2idle_wake(void) acpi_s2idle_sync(); + /* + * The SCI is in the "suspended" state now and it cannot produce + * new wakeup events till the rearming below, so if any of them + * are pending here, they must be resulting from the processing + * of EC events above or coming from somewhere else. + */ + if (pm_wakeup_pending()) + return true; + rearm_wake_irq(acpi_sci_irq); } + + return false; } static void acpi_s2idle_restore_early(void) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 4adac3a8c265..cbe6c94bf158 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -105,6 +105,8 @@ bool have_governor_per_policy(void) } EXPORT_SYMBOL_GPL(have_governor_per_policy); +static struct kobject *cpufreq_global_kobject; + struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) { if (have_governor_per_policy()) @@ -2745,9 +2747,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) } EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); -struct kobject *cpufreq_global_kobject; -EXPORT_SYMBOL(cpufreq_global_kobject); - static int __init cpufreq_core_init(void) { if (cpufreq_disabled()) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 26a654dbc69a..0aa4b6bc5101 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -61,7 +61,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) { if (!blk_queue_dax(bdev->bd_queue)) return NULL; - return fs_dax_get_by_host(bdev->bd_disk->disk_name); + return dax_get_by_host(bdev->bd_disk->disk_name); } EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); #endif diff --git a/drivers/gpio/gpio-bd71828.c b/drivers/gpio/gpio-bd71828.c index 04aade9e0a4d..3dbbc638e9a9 100644 --- a/drivers/gpio/gpio-bd71828.c +++ b/drivers/gpio/gpio-bd71828.c @@ -10,16 +10,6 @@ #define GPIO_OUT_REG(off) (BD71828_REG_GPIO_CTRL1 + (off)) #define HALL_GPIO_OFFSET 3 -/* - * These defines can be removed when - * "gpio: Add definition for GPIO direction" - * (9208b1e77d6e8e9776f34f46ef4079ecac9c3c25 in GPIO tree) gets merged, - */ -#ifndef GPIO_LINE_DIRECTION_IN - #define GPIO_LINE_DIRECTION_IN 1 - #define GPIO_LINE_DIRECTION_OUT 0 -#endif - struct bd71828_gpio { struct rohm_regmap_dev chip; struct gpio_chip gpio; diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c index 147a1bd04515..c54dd08f2cbf 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c @@ -35,7 +35,7 @@ struct sifive_gpio { void __iomem *base; struct gpio_chip gc; struct regmap *regs; - u32 irq_state; + unsigned long irq_state; unsigned int trigger[SIFIVE_GPIO_MAX]; unsigned int irq_parent[SIFIVE_GPIO_MAX]; }; @@ -94,7 +94,7 @@ static void sifive_gpio_irq_enable(struct irq_data *d) spin_unlock_irqrestore(&gc->bgpio_lock, flags); /* Enable interrupts */ - assign_bit(offset, (unsigned long *)&chip->irq_state, 1); + assign_bit(offset, &chip->irq_state, 1); sifive_gpio_set_ie(chip, offset); } @@ -104,7 +104,7 @@ static void sifive_gpio_irq_disable(struct irq_data *d) struct sifive_gpio *chip = gpiochip_get_data(gc); int offset = irqd_to_hwirq(d) % SIFIVE_GPIO_MAX; - assign_bit(offset, (unsigned long *)&chip->irq_state, 0); + assign_bit(offset, &chip->irq_state, 0); sifive_gpio_set_ie(chip, offset); irq_chip_disable_parent(d); } diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index a9748b5198e6..67f9f82e0db0 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -147,9 +147,10 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, for (i = 0; i < gc->ngpio; i++) { if (*mask == 0) break; + /* Once finished with an index write it out to the register */ if (index != xgpio_index(chip, i)) { xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET + - xgpio_regoffset(chip, i), + index * XGPIO_CHANNEL_OFFSET, chip->gpio_state[index]); spin_unlock_irqrestore(&chip->gpio_lock[index], flags); index = xgpio_index(chip, i); @@ -165,7 +166,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, } xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET + - xgpio_regoffset(chip, i), chip->gpio_state[index]); + index * XGPIO_CHANNEL_OFFSET, chip->gpio_state[index]); spin_unlock_irqrestore(&chip->gpio_lock[index], flags); } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 753283486037..4d0106ceeba7 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3035,13 +3035,33 @@ EXPORT_SYMBOL_GPL(gpiochip_free_own_desc); * rely on gpio_request() having been called beforehand. */ -static int gpio_set_config(struct gpio_chip *gc, unsigned int offset, - enum pin_config_param mode) +static int gpio_do_set_config(struct gpio_chip *gc, unsigned int offset, + unsigned long config) { if (!gc->set_config) return -ENOTSUPP; - return gc->set_config(gc, offset, mode); + return gc->set_config(gc, offset, config); +} + +static int gpio_set_config(struct gpio_chip *gc, unsigned int offset, + enum pin_config_param mode) +{ + unsigned long config; + unsigned arg; + + switch (mode) { + case PIN_CONFIG_BIAS_PULL_DOWN: + case PIN_CONFIG_BIAS_PULL_UP: + arg = 1; + break; + + default: + arg = 0; + } + + config = PIN_CONF_PACKED(mode, arg); + return gpio_do_set_config(gc, offset, config); } static int gpio_set_bias(struct gpio_chip *chip, struct gpio_desc *desc) @@ -3277,7 +3297,7 @@ int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) chip = desc->gdev->chip; config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce); - return gpio_set_config(chip, gpio_chip_hwgpio(desc), config); + return gpio_do_set_config(chip, gpio_chip_hwgpio(desc), config); } EXPORT_SYMBOL_GPL(gpiod_set_debounce); @@ -3311,7 +3331,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE, !transitory); gpio = gpio_chip_hwgpio(desc); - rc = gpio_set_config(chip, gpio, packed); + rc = gpio_do_set_config(chip, gpio, packed); if (rc == -ENOTSUPP) { dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n", gpio); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 07914e34bc25..1311d6aec5d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -52,7 +52,7 @@ static int amdgpu_perf_event_init(struct perf_event *event) return -ENOENT; /* update the hw_perf_event struct with config data */ - hwc->conf = event->attr.config; + hwc->config = event->attr.config; return 0; } @@ -74,9 +74,9 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: if (!(flags & PERF_EF_RELOAD)) - pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 1); - pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0); break; default: break; @@ -101,7 +101,7 @@ static void amdgpu_perf_read(struct perf_event *event) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->conf, + pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->config, &count); break; default: @@ -126,7 +126,7 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 0); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0); break; default: break; @@ -156,7 +156,8 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - retval = pe->adev->df.funcs->pmc_start(pe->adev, hwc->conf, 1); + retval = pe->adev->df.funcs->pmc_start(pe->adev, + hwc->config, 1); break; default: return 0; @@ -184,7 +185,7 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) switch (pe->pmu_perf_type) { case PERF_TYPE_AMDGPU_DF: - pe->adev->df.funcs->pmc_stop(pe->adev, hwc->conf, 1); + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index d6deb0eb1e15..6fe057329de2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -179,6 +179,7 @@ struct amdgpu_vcn_inst { struct amdgpu_irq_src irq; struct amdgpu_vcn_reg external; struct amdgpu_bo *dpg_sram_bo; + struct dpg_pause_state pause_state; void *dpg_sram_cpu_addr; uint64_t dpg_sram_gpu_addr; uint32_t *dpg_sram_curr_addr; @@ -190,8 +191,6 @@ struct amdgpu_vcn { const struct firmware *fw; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; - struct dpg_pause_state pause_state; - bool indirect_sram; uint8_t num_vcn_inst; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 90f64b8bc358..b33a4eb39193 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4374,9 +4374,17 @@ static int gfx_v9_0_ecc_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - r = gfx_v9_0_do_edc_gds_workarounds(adev); - if (r) - return r; + /* + * Temp workaround to fix the issue that CP firmware fails to + * update read pointer when CPDMA is writing clearing operation + * to GDS in suspend/resume sequence on several cards. So just + * limit this operation in cold boot sequence. + */ + if (!adev->in_suspend) { + r = gfx_v9_0_do_edc_gds_workarounds(adev); + if (r) + return r; + } /* requires IBs so do in late init after IB pool is initialized */ r = gfx_v9_0_do_edc_gpr_workarounds(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 1a24fadd30e2..71f61afdc655 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1207,9 +1207,10 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, struct amdgpu_ring *ring; /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + adev->vcn.inst[inst_idx].pause_state.fw_based, + adev->vcn.inst[inst_idx].pause_state.jpeg, new_state->fw_based, new_state->jpeg); reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & @@ -1258,13 +1259,14 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); } - adev->vcn.pause_state.fw_based = new_state->fw_based; + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.jpeg != new_state->jpeg) { + if (adev->vcn.inst[inst_idx].pause_state.jpeg != new_state->jpeg) { DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + adev->vcn.inst[inst_idx].pause_state.fw_based, + adev->vcn.inst[inst_idx].pause_state.jpeg, new_state->fw_based, new_state->jpeg); reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & @@ -1318,7 +1320,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); } - adev->vcn.pause_state.jpeg = new_state->jpeg; + adev->vcn.inst[inst_idx].pause_state.jpeg = new_state->jpeg; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 4f7216788f11..c387c81f8695 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1137,9 +1137,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, int ret_code; /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { DRM_DEBUG("dpg pause state changed %d -> %d", - adev->vcn.pause_state.fw_based, new_state->fw_based); + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); @@ -1185,7 +1185,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); } - adev->vcn.pause_state.fw_based = new_state->fw_based; + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 70fae7977f8f..2d64ba1adf99 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1367,9 +1367,9 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, int ret_code; /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { DRM_DEBUG("dpg pause state changed %d -> %d", - adev->vcn.pause_state.fw_based, new_state->fw_based); + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) & (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); @@ -1407,14 +1407,14 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, - 0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); } } else { /* unpause dpg, no need to wait */ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); } - adev->vcn.pause_state.fw_based = new_state->fw_based; + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 279541517a99..63e8a12a74bc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8408,7 +8408,6 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) /* Calculate number of static frames before generating interrupt to * enter PSR. */ - unsigned int frame_time_microsec = 1000000 / vsync_rate_hz; // Init fail safe of 2 frames static unsigned int num_frames_static = 2; @@ -8423,8 +8422,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream) * Calculate number of frames such that at least 30 ms of time has * passed. */ - if (vsync_rate_hz != 0) + if (vsync_rate_hz != 0) { + unsigned int frame_time_microsec = 1000000 / vsync_rate_hz; num_frames_static = (30000 / frame_time_microsec) + 1; + } params.triggers.cursor_update = true; params.triggers.overlay_update = true; diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 629a07a2719b..c4ba6e84db65 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -711,10 +711,6 @@ static void enable_disp_power_gating_dmcub( power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING; power_gating.power_gating.pwr = *pwr; - /* ATOM_ENABLE is old API in DMUB */ - if (power_gating.power_gating.pwr.enable == ATOM_ENABLE) - power_gating.power_gating.pwr.enable = ATOM_INIT; - dc_dmub_srv_cmd_queue(dmcub, &power_gating.header); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 3cd283195091..c0f6a8c7de7d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -87,6 +87,12 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN20) ############################################################################### CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o +# prevent build errors regarding soft-float vs hard-float FP ABI tags +# this code is currently unused on ppc64, as it applies to Renoir APUs only +ifdef CONFIG_PPC64 +CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) +endif + AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 495f01e9f2ca..49ce46b543ea 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -117,7 +117,7 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, prev_dppclk_khz = clk_mgr->base.ctx->dc->current_state->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; - if (safe_to_lower || prev_dppclk_khz < dppclk_khz) { + if ((prev_dppclk_khz > dppclk_khz && safe_to_lower) || prev_dppclk_khz < dppclk_khz) { clk_mgr->dccg->funcs->update_dpp_dto( clk_mgr->dccg, dpp_inst, dppclk_khz); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 7ae4c06232dd..9ef3f7b91a1d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -151,6 +151,12 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, rn_vbios_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz); } + // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + if (new_clocks->dppclk_khz < 100000) + new_clocks->dppclk_khz = 100000; + } + if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) dpp_clock_lowered = true; @@ -412,19 +418,19 @@ void build_watermark_ranges(struct clk_bw_params *bw_params, struct pp_smu_wm_ra ranges->reader_wm_sets[num_valid_sets].wm_inst = bw_params->wm_table.entries[i].wm_inst; ranges->reader_wm_sets[num_valid_sets].wm_type = bw_params->wm_table.entries[i].wm_type; - /* We will not select WM based on dcfclk, so leave it as unconstrained */ - ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; - ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; - /* fclk wil be used to select WM*/ + /* We will not select WM based on fclk, so leave it as unconstrained */ + ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN; + ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX; + /* dcfclk wil be used to select WM*/ if (ranges->reader_wm_sets[num_valid_sets].wm_type == WM_TYPE_PSTATE_CHG) { if (i == 0) - ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = 0; + ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = 0; else { /* add 1 to make it non-overlapping with next lvl */ - ranges->reader_wm_sets[num_valid_sets].min_fill_clk_mhz = bw_params->clk_table.entries[i - 1].fclk_mhz + 1; + ranges->reader_wm_sets[num_valid_sets].min_drain_clk_mhz = bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1; } - ranges->reader_wm_sets[num_valid_sets].max_fill_clk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + ranges->reader_wm_sets[num_valid_sets].max_drain_clk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; } else { /* unconstrained for memory retraining */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index f1a5d2c6aa37..68c4049cbc2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -400,7 +400,7 @@ static bool acquire( { enum gpio_result result; - if (!is_engine_available(engine)) + if ((engine == NULL) || !is_engine_available(engine)) return false; result = dal_ddc_open(ddc, GPIO_MODE_HARDWARE, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index cfbbaffa8654..a444fed94184 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -572,7 +572,6 @@ void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) dpp->funcs->dpp_dppclk_control(dpp, false, false); hubp->power_gated = true; - dc->optimized_required = false; /* We're powering off, no need to optimize */ hws->funcs.plane_atomic_power_down(dc, pipe_ctx->plane_res.dpp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 0d506d30d6b6..33d0a176841a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -60,6 +60,7 @@ #include "dcn20/dcn20_dccg.h" #include "dcn21_hubbub.h" #include "dcn10/dcn10_resource.h" +#include "dce110/dce110_resource.h" #include "dcn20/dcn20_dwb.h" #include "dcn20/dcn20_mmhubbub.h" @@ -856,6 +857,7 @@ static const struct dc_debug_options debug_defaults_diags = { enum dcn20_clk_src_array_id { DCN20_CLK_SRC_PLL0, DCN20_CLK_SRC_PLL1, + DCN20_CLK_SRC_PLL2, DCN20_CLK_SRC_TOTAL_DCN21 }; @@ -1718,6 +1720,10 @@ static bool dcn21_resource_construct( dcn21_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL2] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h index b2f96a101124..7a63cf8e85ed 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h @@ -39,21 +39,39 @@ #define SMU_11_0_PP_OVERDRIVE_VERSION 0x0800 #define SMU_11_0_PP_POWERSAVINGCLOCK_VERSION 0x0100 +enum SMU_11_0_ODFEATURE_CAP { + SMU_11_0_ODCAP_GFXCLK_LIMITS = 0, + SMU_11_0_ODCAP_GFXCLK_CURVE, + SMU_11_0_ODCAP_UCLK_MAX, + SMU_11_0_ODCAP_POWER_LIMIT, + SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT, + SMU_11_0_ODCAP_FAN_SPEED_MIN, + SMU_11_0_ODCAP_TEMPERATURE_FAN, + SMU_11_0_ODCAP_TEMPERATURE_SYSTEM, + SMU_11_0_ODCAP_MEMORY_TIMING_TUNE, + SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL, + SMU_11_0_ODCAP_AUTO_UV_ENGINE, + SMU_11_0_ODCAP_AUTO_OC_ENGINE, + SMU_11_0_ODCAP_AUTO_OC_MEMORY, + SMU_11_0_ODCAP_FAN_CURVE, + SMU_11_0_ODCAP_COUNT, +}; + enum SMU_11_0_ODFEATURE_ID { - SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << 0, //GFXCLK Limit feature - SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << 1, //GFXCLK Curve feature - SMU_11_0_ODFEATURE_UCLK_MAX = 1 << 2, //UCLK Limit feature - SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << 3, //Power Limit feature - SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << 4, //Fan Acoustic RPM feature - SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << 5, //Minimum Fan Speed feature - SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << 6, //Fan Target Temperature Limit feature - SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << 7, //Operating Temperature Limit feature - SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << 8, //AC Timing Tuning feature - SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << 9, //Zero RPM feature - SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << 10, //Auto Under Volt GFXCLK feature - SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << 11, //Auto Over Clock GFXCLK feature - SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << 12, //Auto Over Clock MCLK feature - SMU_11_0_ODFEATURE_FAN_CURVE = 1 << 13, //VICTOR TODO + SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << SMU_11_0_ODCAP_GFXCLK_LIMITS, //GFXCLK Limit feature + SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << SMU_11_0_ODCAP_GFXCLK_CURVE, //GFXCLK Curve feature + SMU_11_0_ODFEATURE_UCLK_MAX = 1 << SMU_11_0_ODCAP_UCLK_MAX, //UCLK Limit feature + SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << SMU_11_0_ODCAP_POWER_LIMIT, //Power Limit feature + SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT, //Fan Acoustic RPM feature + SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << SMU_11_0_ODCAP_FAN_SPEED_MIN, //Minimum Fan Speed feature + SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << SMU_11_0_ODCAP_TEMPERATURE_FAN, //Fan Target Temperature Limit feature + SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << SMU_11_0_ODCAP_TEMPERATURE_SYSTEM, //Operating Temperature Limit feature + SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << SMU_11_0_ODCAP_MEMORY_TIMING_TUNE, //AC Timing Tuning feature + SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL, //Zero RPM feature + SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << SMU_11_0_ODCAP_AUTO_UV_ENGINE, //Auto Under Volt GFXCLK feature + SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << SMU_11_0_ODCAP_AUTO_OC_ENGINE, //Auto Over Clock GFXCLK feature + SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << SMU_11_0_ODCAP_AUTO_OC_MEMORY, //Auto Over Clock MCLK feature + SMU_11_0_ODFEATURE_FAN_CURVE = 1 << SMU_11_0_ODCAP_FAN_CURVE, //Fan Curve feature SMU_11_0_ODFEATURE_COUNT = 14, }; #define SMU_11_0_MAX_ODFEATURE 32 //Maximum Number of OD Features diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 19a9846b730e..0d73a49166af 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -736,9 +736,9 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu return dpm_desc->SnapToDiscrete == 0 ? true : false; } -static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature) +static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_CAP cap) { - return od_table->cap[feature]; + return od_table->cap[cap]; } static void navi10_od_setting_get_range(struct smu_11_0_overdrive_table *od_table, @@ -846,7 +846,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_OD_SCLK: if (!smu->od_enabled || !od_table || !od_settings) break; - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) break; size += sprintf(buf + size, "OD_SCLK:\n"); size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); @@ -854,7 +854,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_OD_MCLK: if (!smu->od_enabled || !od_table || !od_settings) break; - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) break; size += sprintf(buf + size, "OD_MCLK:\n"); size += sprintf(buf + size, "1: %uMHz\n", od_table->UclkFmax); @@ -862,7 +862,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_OD_VDDC_CURVE: if (!smu->od_enabled || !od_table || !od_settings) break; - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) break; size += sprintf(buf + size, "OD_VDDC_CURVE:\n"); for (i = 0; i < 3; i++) { @@ -887,7 +887,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, break; size = sprintf(buf, "%s:\n", "OD_RANGE"); - if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) { + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) { navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN, &min_value, NULL); navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMAX, @@ -896,14 +896,14 @@ static int navi10_print_clk_levels(struct smu_context *smu, min_value, max_value); } - if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) { + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) { navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, &min_value, &max_value); size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", min_value, max_value); } - if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) { + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) { navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1, &min_value, &max_value); size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", @@ -2056,7 +2056,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL switch (type) { case PP_OD_EDIT_SCLK_VDDC_TABLE: - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) { + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) { pr_warn("GFXCLK_LIMITS not supported!\n"); return -ENOTSUPP; } @@ -2102,7 +2102,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL } break; case PP_OD_EDIT_MCLK_VDDC_TABLE: - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) { + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) { pr_warn("UCLK_MAX not supported!\n"); return -ENOTSUPP; } @@ -2143,7 +2143,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL } break; case PP_OD_EDIT_VDDC_CURVE: - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) { + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) { pr_warn("GFXCLK_CURVE not supported!\n"); return -ENOTSUPP; } diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 20cdaf3146b8..cce0b1bba591 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3838,7 +3838,8 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY) guid = msg->u.resource_stat.guid; - mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid); + if (guid) + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid); } else { mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad); } diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 99769d6c9f84..805fb004c8eb 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3211,7 +3211,7 @@ static u8 *drm_find_cea_extension(const struct edid *edid) return cea; } -static const struct drm_display_mode *cea_mode_for_vic(u8 vic) +static __always_inline const struct drm_display_mode *cea_mode_for_vic(u8 vic) { BUILD_BUG_ON(1 + ARRAY_SIZE(edid_cea_modes_1) - 1 != 127); BUILD_BUG_ON(193 + ARRAY_SIZE(edid_cea_modes_193) - 1 != 219); diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 8beac06e3f10..ef4017a1baba 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -357,14 +357,16 @@ parse_generic_dtd(struct drm_i915_private *dev_priv, panel_fixed_mode->hdisplay + dtd->hfront_porch; panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start + dtd->hsync; - panel_fixed_mode->htotal = panel_fixed_mode->hsync_end; + panel_fixed_mode->htotal = + panel_fixed_mode->hdisplay + dtd->hblank; panel_fixed_mode->vdisplay = dtd->vactive; panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay + dtd->vfront_porch; panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start + dtd->vsync; - panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end; + panel_fixed_mode->vtotal = + panel_fixed_mode->vdisplay + dtd->vblank; panel_fixed_mode->clock = dtd->pixel_clock; panel_fixed_mode->width_mm = dtd->width_mm; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 19ea842cfd84..064dd99bbc49 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12366,6 +12366,7 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) /* Copy parameters to slave plane */ linked_state->ctl = plane_state->ctl | PLANE_CTL_YUV420_Y_PLANE; linked_state->color_ctl = plane_state->color_ctl; + linked_state->view = plane_state->view; memcpy(linked_state->color_plane, plane_state->color_plane, sizeof(linked_state->color_plane)); @@ -14476,37 +14477,23 @@ static int intel_atomic_check_crtcs(struct intel_atomic_state *state) return 0; } -static bool intel_cpu_transcoder_needs_modeset(struct intel_atomic_state *state, - enum transcoder transcoder) +static bool intel_cpu_transcoders_need_modeset(struct intel_atomic_state *state, + u8 transcoders) { - struct intel_crtc_state *new_crtc_state; + const struct intel_crtc_state *new_crtc_state; struct intel_crtc *crtc; int i; - for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) - if (new_crtc_state->cpu_transcoder == transcoder) - return needs_modeset(new_crtc_state); + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->hw.enable && + transcoders & BIT(new_crtc_state->cpu_transcoder) && + needs_modeset(new_crtc_state)) + return true; + } return false; } -static void -intel_modeset_synced_crtcs(struct intel_atomic_state *state, - u8 transcoders) -{ - struct intel_crtc_state *new_crtc_state; - struct intel_crtc *crtc; - int i; - - for_each_new_intel_crtc_in_state(state, crtc, - new_crtc_state, i) { - if (transcoders & BIT(new_crtc_state->cpu_transcoder)) { - new_crtc_state->uapi.mode_changed = true; - new_crtc_state->update_pipe = false; - } - } -} - static int intel_modeset_all_tiles(struct intel_atomic_state *state, int tile_grp_id) { @@ -14662,15 +14649,20 @@ static int intel_atomic_check(struct drm_device *dev, if (intel_dp_mst_is_slave_trans(new_crtc_state)) { enum transcoder master = new_crtc_state->mst_master_transcoder; - if (intel_cpu_transcoder_needs_modeset(state, master)) { + if (intel_cpu_transcoders_need_modeset(state, BIT(master))) { new_crtc_state->uapi.mode_changed = true; new_crtc_state->update_pipe = false; } - } else if (is_trans_port_sync_mode(new_crtc_state)) { + } + + if (is_trans_port_sync_mode(new_crtc_state)) { u8 trans = new_crtc_state->sync_mode_slaves_mask | BIT(new_crtc_state->master_transcoder); - intel_modeset_synced_crtcs(state, trans); + if (intel_cpu_transcoders_need_modeset(state, trans)) { + new_crtc_state->uapi.mode_changed = true; + new_crtc_state->update_pipe = false; + } } } diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 89fb0d90b694..04f953ba8f00 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -384,6 +384,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) return data; } +#ifdef CONFIG_ACPI static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) { struct i2c_adapter_lookup *lookup = data; @@ -393,8 +394,7 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) acpi_handle adapter_handle; acpi_status status; - if (intel_dsi->i2c_bus_num >= 0 || - !i2c_acpi_get_i2c_resource(ares, &sb)) + if (!i2c_acpi_get_i2c_resource(ares, &sb)) return 1; if (lookup->slave_addr != sb->slave_address) @@ -413,14 +413,41 @@ static int i2c_adapter_lookup(struct acpi_resource *ares, void *data) return 1; } -static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) +static void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, + const u16 slave_addr) { struct drm_device *drm_dev = intel_dsi->base.base.dev; struct device *dev = &drm_dev->pdev->dev; - struct i2c_adapter *adapter; struct acpi_device *acpi_dev; struct list_head resource_list; struct i2c_adapter_lookup lookup; + + acpi_dev = ACPI_COMPANION(dev); + if (acpi_dev) { + memset(&lookup, 0, sizeof(lookup)); + lookup.slave_addr = slave_addr; + lookup.intel_dsi = intel_dsi; + lookup.dev_handle = acpi_device_handle(acpi_dev); + + INIT_LIST_HEAD(&resource_list); + acpi_dev_get_resources(acpi_dev, &resource_list, + i2c_adapter_lookup, + &lookup); + acpi_dev_free_resource_list(&resource_list); + } +} +#else +static inline void i2c_acpi_find_adapter(struct intel_dsi *intel_dsi, + const u16 slave_addr) +{ +} +#endif + +static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) +{ + struct drm_device *drm_dev = intel_dsi->base.base.dev; + struct device *dev = &drm_dev->pdev->dev; + struct i2c_adapter *adapter; struct i2c_msg msg; int ret; u8 vbt_i2c_bus_num = *(data + 2); @@ -431,20 +458,7 @@ static const u8 *mipi_exec_i2c(struct intel_dsi *intel_dsi, const u8 *data) if (intel_dsi->i2c_bus_num < 0) { intel_dsi->i2c_bus_num = vbt_i2c_bus_num; - - acpi_dev = ACPI_COMPANION(dev); - if (acpi_dev) { - memset(&lookup, 0, sizeof(lookup)); - lookup.slave_addr = slave_addr; - lookup.intel_dsi = intel_dsi; - lookup.dev_handle = acpi_device_handle(acpi_dev); - - INIT_LIST_HEAD(&resource_list); - acpi_dev_get_resources(acpi_dev, &resource_list, - i2c_adapter_lookup, - &lookup); - acpi_dev_free_resource_list(&resource_list); - } + i2c_acpi_find_adapter(intel_dsi, slave_addr); } adapter = i2c_get_adapter(intel_dsi->i2c_bus_num); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d5a0f5ae4a8b..60c984e10c4a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1981,9 +1981,20 @@ static int __eb_parse(struct dma_fence_work *work) pw->trampoline); } +static void __eb_parse_release(struct dma_fence_work *work) +{ + struct eb_parse_work *pw = container_of(work, typeof(*pw), base); + + if (pw->trampoline) + i915_active_release(&pw->trampoline->active); + i915_active_release(&pw->shadow->active); + i915_active_release(&pw->batch->active); +} + static const struct dma_fence_work_ops eb_parse_ops = { .name = "eb_parse", .work = __eb_parse, + .release = __eb_parse_release, }; static int eb_parse_pipeline(struct i915_execbuffer *eb, @@ -1997,6 +2008,20 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, if (!pw) return -ENOMEM; + err = i915_active_acquire(&eb->batch->active); + if (err) + goto err_free; + + err = i915_active_acquire(&shadow->active); + if (err) + goto err_batch; + + if (trampoline) { + err = i915_active_acquire(&trampoline->active); + if (err) + goto err_shadow; + } + dma_fence_work_init(&pw->base, &eb_parse_ops); pw->engine = eb->engine; @@ -2006,7 +2031,9 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, pw->shadow = shadow; pw->trampoline = trampoline; - dma_resv_lock(pw->batch->resv, NULL); + err = dma_resv_lock_interruptible(pw->batch->resv, NULL); + if (err) + goto err_trampoline; err = dma_resv_reserve_shared(pw->batch->resv, 1); if (err) @@ -2034,6 +2061,14 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, err_batch_unlock: dma_resv_unlock(pw->batch->resv); +err_trampoline: + if (trampoline) + i915_active_release(&trampoline->active); +err_shadow: + i915_active_release(&shadow->active); +err_batch: + i915_active_release(&eb->batch->active); +err_free: kfree(pw); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index b9fdac2f9003..0b6a442108de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -455,10 +455,11 @@ out: void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) { - struct i915_mmap_offset *mmo; + struct i915_mmap_offset *mmo, *mn; spin_lock(&obj->mmo.lock); - list_for_each_entry(mmo, &obj->mmo.offsets, offset) { + rbtree_postorder_for_each_entry_safe(mmo, mn, + &obj->mmo.offsets, offset) { /* * vma_node_unmap for GTT mmaps handled already in * __i915_gem_object_release_mmap_gtt @@ -488,6 +489,67 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) } static struct i915_mmap_offset * +lookup_mmo(struct drm_i915_gem_object *obj, + enum i915_mmap_type mmap_type) +{ + struct rb_node *rb; + + spin_lock(&obj->mmo.lock); + rb = obj->mmo.offsets.rb_node; + while (rb) { + struct i915_mmap_offset *mmo = + rb_entry(rb, typeof(*mmo), offset); + + if (mmo->mmap_type == mmap_type) { + spin_unlock(&obj->mmo.lock); + return mmo; + } + + if (mmo->mmap_type < mmap_type) + rb = rb->rb_right; + else + rb = rb->rb_left; + } + spin_unlock(&obj->mmo.lock); + + return NULL; +} + +static struct i915_mmap_offset * +insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo) +{ + struct rb_node *rb, **p; + + spin_lock(&obj->mmo.lock); + rb = NULL; + p = &obj->mmo.offsets.rb_node; + while (*p) { + struct i915_mmap_offset *pos; + + rb = *p; + pos = rb_entry(rb, typeof(*pos), offset); + + if (pos->mmap_type == mmo->mmap_type) { + spin_unlock(&obj->mmo.lock); + drm_vma_offset_remove(obj->base.dev->vma_offset_manager, + &mmo->vma_node); + kfree(mmo); + return pos; + } + + if (pos->mmap_type < mmo->mmap_type) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&mmo->offset, rb, p); + rb_insert_color(&mmo->offset, &obj->mmo.offsets); + spin_unlock(&obj->mmo.lock); + + return mmo; +} + +static struct i915_mmap_offset * mmap_offset_attach(struct drm_i915_gem_object *obj, enum i915_mmap_type mmap_type, struct drm_file *file) @@ -496,20 +558,22 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo; int err; + mmo = lookup_mmo(obj, mmap_type); + if (mmo) + goto out; + mmo = kmalloc(sizeof(*mmo), GFP_KERNEL); if (!mmo) return ERR_PTR(-ENOMEM); mmo->obj = obj; - mmo->dev = obj->base.dev; - mmo->file = file; mmo->mmap_type = mmap_type; drm_vma_node_reset(&mmo->vma_node); - err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node, - obj->base.size / PAGE_SIZE); + err = drm_vma_offset_add(obj->base.dev->vma_offset_manager, + &mmo->vma_node, obj->base.size / PAGE_SIZE); if (likely(!err)) - goto out; + goto insert; /* Attempt to reap some mmap space from dead objects */ err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); @@ -517,19 +581,17 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, goto err; i915_gem_drain_freed_objects(i915); - err = drm_vma_offset_add(mmo->dev->vma_offset_manager, &mmo->vma_node, - obj->base.size / PAGE_SIZE); + err = drm_vma_offset_add(obj->base.dev->vma_offset_manager, + &mmo->vma_node, obj->base.size / PAGE_SIZE); if (err) goto err; +insert: + mmo = insert_mmo(obj, mmo); + GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo); out: if (file) drm_vma_node_allow(&mmo->vma_node, file); - - spin_lock(&obj->mmo.lock); - list_add(&mmo->offset, &obj->mmo.offsets); - spin_unlock(&obj->mmo.lock); - return mmo; err: @@ -745,60 +807,43 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) struct drm_vma_offset_node *node; struct drm_file *priv = filp->private_data; struct drm_device *dev = priv->minor->dev; + struct drm_i915_gem_object *obj = NULL; struct i915_mmap_offset *mmo = NULL; - struct drm_gem_object *obj = NULL; struct file *anon; if (drm_dev_is_unplugged(dev)) return -ENODEV; + rcu_read_lock(); drm_vma_offset_lock_lookup(dev->vma_offset_manager); node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, vma->vm_pgoff, vma_pages(vma)); - if (likely(node)) { - mmo = container_of(node, struct i915_mmap_offset, - vma_node); - /* - * In our dependency chain, the drm_vma_offset_node - * depends on the validity of the mmo, which depends on - * the gem object. However the only reference we have - * at this point is the mmo (as the parent of the node). - * Try to check if the gem object was at least cleared. - */ - if (!mmo || !mmo->obj) { - drm_vma_offset_unlock_lookup(dev->vma_offset_manager); - return -EINVAL; - } + if (node && drm_vma_node_is_allowed(node, priv)) { /* * Skip 0-refcnted objects as it is in the process of being * destroyed and will be invalid when the vma manager lock * is released. */ - obj = &mmo->obj->base; - if (!kref_get_unless_zero(&obj->refcount)) - obj = NULL; + mmo = container_of(node, struct i915_mmap_offset, vma_node); + obj = i915_gem_object_get_rcu(mmo->obj); } drm_vma_offset_unlock_lookup(dev->vma_offset_manager); + rcu_read_unlock(); if (!obj) - return -EINVAL; - - if (!drm_vma_node_is_allowed(node, priv)) { - drm_gem_object_put_unlocked(obj); - return -EACCES; - } + return node ? -EACCES : -EINVAL; - if (i915_gem_object_is_readonly(to_intel_bo(obj))) { + if (i915_gem_object_is_readonly(obj)) { if (vma->vm_flags & VM_WRITE) { - drm_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return -EINVAL; } vma->vm_flags &= ~VM_MAYWRITE; } - anon = mmap_singleton(to_i915(obj->dev)); + anon = mmap_singleton(to_i915(dev)); if (IS_ERR(anon)) { - drm_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return PTR_ERR(anon); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 46bacc82ddc4..35985218bd85 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -63,7 +63,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->lut_list); spin_lock_init(&obj->mmo.lock); - INIT_LIST_HEAD(&obj->mmo.offsets); + obj->mmo.offsets = RB_ROOT; init_rcu_head(&obj->rcu); @@ -100,8 +100,8 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_gem_object *obj = to_intel_bo(gem); struct drm_i915_file_private *fpriv = file->driver_priv; + struct i915_mmap_offset *mmo, *mn; struct i915_lut_handle *lut, *ln; - struct i915_mmap_offset *mmo; LIST_HEAD(close); i915_gem_object_lock(obj); @@ -117,14 +117,8 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) i915_gem_object_unlock(obj); spin_lock(&obj->mmo.lock); - list_for_each_entry(mmo, &obj->mmo.offsets, offset) { - if (mmo->file != file) - continue; - - spin_unlock(&obj->mmo.lock); + rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) drm_vma_node_revoke(&mmo->vma_node, file); - spin_lock(&obj->mmo.lock); - } spin_unlock(&obj->mmo.lock); list_for_each_entry_safe(lut, ln, &close, obj_link) { @@ -203,12 +197,14 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, i915_gem_object_release_mmap(obj); - list_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset) { + rbtree_postorder_for_each_entry_safe(mmo, mn, + &obj->mmo.offsets, + offset) { drm_vma_offset_remove(obj->base.dev->vma_offset_manager, &mmo->vma_node); kfree(mmo); } - INIT_LIST_HEAD(&obj->mmo.offsets); + obj->mmo.offsets = RB_ROOT; GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index db70a3306e59..9c86f2dea947 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -70,14 +70,22 @@ i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) } static inline struct drm_i915_gem_object * +i915_gem_object_get_rcu(struct drm_i915_gem_object *obj) +{ + if (obj && !kref_get_unless_zero(&obj->base.refcount)) + obj = NULL; + + return obj; +} + +static inline struct drm_i915_gem_object * i915_gem_object_lookup(struct drm_file *file, u32 handle) { struct drm_i915_gem_object *obj; rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, handle); - if (obj && !kref_get_unless_zero(&obj->base.refcount)) - obj = NULL; + obj = i915_gem_object_get_rcu(obj); rcu_read_unlock(); return obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 88e268633fdc..f64ad77e6b1e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -71,13 +71,11 @@ enum i915_mmap_type { }; struct i915_mmap_offset { - struct drm_device *dev; struct drm_vma_offset_node vma_node; struct drm_i915_gem_object *obj; - struct drm_file *file; enum i915_mmap_type mmap_type; - struct list_head offset; + struct rb_node offset; }; struct drm_i915_gem_object { @@ -137,7 +135,7 @@ struct drm_i915_gem_object { struct { spinlock_t lock; /* Protects access to mmo offsets */ - struct list_head offsets; + struct rb_root offsets; } mmo; I915_SELFTEST_DECLARE(struct list_head st_link); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 23137b2a8689..57e8a051ddc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -67,21 +67,18 @@ static int intel_context_active_acquire(struct intel_context *ce) { int err; - err = i915_active_acquire(&ce->active); - if (err) - return err; + __i915_active_acquire(&ce->active); + + if (intel_context_is_barrier(ce)) + return 0; /* Preallocate tracking nodes */ - if (!intel_context_is_barrier(ce)) { - err = i915_active_acquire_preallocate_barrier(&ce->active, - ce->engine); - if (err) { - i915_active_release(&ce->active); - return err; - } - } + err = i915_active_acquire_preallocate_barrier(&ce->active, + ce->engine); + if (err) + i915_active_release(&ce->active); - return 0; + return err; } static void intel_context_active_release(struct intel_context *ce) @@ -101,13 +98,19 @@ int __intel_context_do_pin(struct intel_context *ce) return err; } - if (mutex_lock_interruptible(&ce->pin_mutex)) - return -EINTR; + err = i915_active_acquire(&ce->active); + if (err) + return err; + + if (mutex_lock_interruptible(&ce->pin_mutex)) { + err = -EINTR; + goto out_release; + } - if (likely(!atomic_read(&ce->pin_count))) { + if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { err = intel_context_active_acquire(ce); if (unlikely(err)) - goto err; + goto out_unlock; err = ce->ops->pin(ce); if (unlikely(err)) @@ -117,18 +120,19 @@ int __intel_context_do_pin(struct intel_context *ce) ce->ring->head, ce->ring->tail); smp_mb__before_atomic(); /* flush pin before it is visible */ + atomic_inc(&ce->pin_count); } - atomic_inc(&ce->pin_count); GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ - - mutex_unlock(&ce->pin_mutex); - return 0; + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + goto out_unlock; err_active: intel_context_active_release(ce); -err: +out_unlock: mutex_unlock(&ce->pin_mutex); +out_release: + i915_active_release(&ce->active); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f451ef376548..06ff7695fa29 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -671,6 +671,7 @@ void intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) { INIT_LIST_HEAD(&engine->active.requests); + INIT_LIST_HEAD(&engine->active.hold); spin_lock_init(&engine->active.lock); lockdep_set_subclass(&engine->active.lock, subclass); @@ -1422,6 +1423,17 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq) } } +static unsigned long list_count(struct list_head *list) +{ + struct list_head *pos; + unsigned long count = 0; + + list_for_each(pos, list) + count++; + + return count; +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1491,6 +1503,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); } } + drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold)); spin_unlock_irqrestore(&engine->active.lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 350da59e605b..92be41a6903c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -295,6 +295,7 @@ struct intel_engine_cs { struct { spinlock_t lock; struct list_head requests; + struct list_head hold; /* ready requests, but on hold */ } active; struct llist_head barrier_tasks; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0cf0f6fae675..a13a8c4b65ab 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -985,6 +985,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); list_move(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + active = rq; } else { struct intel_engine_cs *owner = rq->context->engine; @@ -1535,7 +1537,8 @@ static bool can_merge_rq(const struct i915_request *prev, return true; if (unlikely((prev->fence.flags ^ next->fence.flags) & - (I915_FENCE_FLAG_NOPREEMPT | I915_FENCE_FLAG_SENTINEL))) + (BIT(I915_FENCE_FLAG_NOPREEMPT) | + BIT(I915_FENCE_FLAG_SENTINEL)))) return false; if (!can_merge_ctx(prev->context, next->context)) @@ -1632,8 +1635,8 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) !i915_request_completed(rq)); GEM_BUG_ON(i915_request_is_active(w)); - if (list_empty(&w->sched.link)) - continue; /* Not yet submitted; unready */ + if (!i915_request_is_ready(w)) + continue; if (rq_prio(w) < rq_prio(rq)) continue; @@ -2351,6 +2354,310 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) } } +static void __execlists_hold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + if (i915_request_is_active(rq)) + __i915_request_unsubmit(rq); + + RQ_TRACE(rq, "on hold\n"); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_move_tail(&rq->sched.link, &rq->engine->active.hold); + i915_request_set_hold(rq); + + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + if (!i915_request_is_ready(w)) + continue; + + if (i915_request_completed(w)) + continue; + + if (i915_request_on_hold(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static bool execlists_hold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + if (i915_request_completed(rq)) { /* too late! */ + rq = NULL; + goto unlock; + } + + if (rq->engine != engine) { /* preempted virtual engine */ + struct virtual_engine *ve = to_virtual_engine(rq->engine); + + /* + * intel_context_inflight() is only protected by virtue + * of process_csb() being called only by the tasklet (or + * directly from inside reset while the tasklet is suspended). + * Assert that neither of those are allowed to run while we + * poke at the request queues. + */ + GEM_BUG_ON(!reset_in_progress(&engine->execlists)); + + /* + * An unsubmitted request along a virtual engine will + * remain on the active (this) engine until we are able + * to process the context switch away (and so mark the + * context as no longer in flight). That cannot have happened + * yet, otherwise we would not be hanging! + */ + spin_lock(&ve->base.active.lock); + GEM_BUG_ON(intel_context_inflight(rq->context) != engine); + GEM_BUG_ON(ve->request != rq); + ve->request = NULL; + spin_unlock(&ve->base.active.lock); + i915_request_put(rq); + + rq->engine = engine; + } + + /* + * Transfer this request onto the hold queue to prevent it + * being resumbitted to HW (and potentially completed) before we have + * released it. Since we may have already submitted following + * requests, we need to remove those as well. + */ + GEM_BUG_ON(i915_request_on_hold(rq)); + GEM_BUG_ON(rq->engine != engine); + __execlists_hold(rq); + +unlock: + spin_unlock_irq(&engine->active.lock); + return rq; +} + +static bool hold_request(const struct i915_request *rq) +{ + struct i915_dependency *p; + + /* + * If one of our ancestors is on hold, we must also be on hold, + * otherwise we will bypass it and execute before it. + */ + list_for_each_entry(p, &rq->sched.signalers_list, signal_link) { + const struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (s->engine != rq->engine) + continue; + + if (i915_request_on_hold(s)) + return true; + } + + return false; +} + +static void __execlists_unhold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + GEM_BUG_ON(!i915_request_on_hold(rq)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + i915_request_clear_hold(rq); + list_move_tail(&rq->sched.link, + i915_sched_lookup_priolist(rq->engine, + rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + RQ_TRACE(rq, "hold release\n"); + + /* Also release any children on this engine that are ready */ + list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (w->engine != rq->engine) + continue; + + if (!i915_request_on_hold(rq)) + continue; + + /* Check that no other parents are also on hold */ + if (hold_request(rq)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static void execlists_unhold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Move this request back to the priority queue, and all of its + * children and grandchildren that were suspended along with it. + */ + __execlists_unhold(rq); + + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + spin_unlock_irq(&engine->active.lock); +} + +struct execlists_capture { + struct work_struct work; + struct i915_request *rq; + struct i915_gpu_coredump *error; +}; + +static void execlists_capture_work(struct work_struct *work) +{ + struct execlists_capture *cap = container_of(work, typeof(*cap), work); + const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; + struct intel_engine_cs *engine = cap->rq->engine; + struct intel_gt_coredump *gt = cap->error->gt; + struct intel_engine_capture_vma *vma; + + /* Compress all the objects attached to the request, slow! */ + vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); + if (vma) { + struct i915_vma_compress *compress = + i915_vma_capture_prepare(gt); + + intel_engine_coredump_add_vma(gt->engine, vma, compress); + i915_vma_capture_finish(gt, compress); + } + + gt->simulated = gt->engine->simulated; + cap->error->simulated = gt->simulated; + + /* Publish the error state, and announce it to the world */ + i915_error_state_store(cap->error); + i915_gpu_coredump_put(cap->error); + + /* Return this request and all that depend upon it for signaling */ + execlists_unhold(engine, cap->rq); + i915_request_put(cap->rq); + + kfree(cap); +} + +static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) +{ + const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct execlists_capture *cap; + + cap = kmalloc(sizeof(*cap), gfp); + if (!cap) + return NULL; + + cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); + if (!cap->error) + goto err_cap; + + cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); + if (!cap->error->gt) + goto err_gpu; + + cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); + if (!cap->error->gt->engine) + goto err_gt; + + return cap; + +err_gt: + kfree(cap->error->gt); +err_gpu: + kfree(cap->error); +err_cap: + kfree(cap); + return NULL; +} + +static bool execlists_capture(struct intel_engine_cs *engine) +{ + struct execlists_capture *cap; + + if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) + return true; + + /* + * We need to _quickly_ capture the engine state before we reset. + * We are inside an atomic section (softirq) here and we are delaying + * the forced preemption event. + */ + cap = capture_regs(engine); + if (!cap) + return true; + + cap->rq = execlists_active(&engine->execlists); + GEM_BUG_ON(!cap->rq); + + rcu_read_lock(); + cap->rq = active_request(cap->rq->context->timeline, cap->rq); + cap->rq = i915_request_get_rcu(cap->rq); + rcu_read_unlock(); + if (!cap->rq) + goto err_free; + + /* + * Remove the request from the execlists queue, and take ownership + * of the request. We pass it to our worker who will _slowly_ compress + * all the pages the _user_ requested for debugging their batch, after + * which we return it to the queue for signaling. + * + * By removing them from the execlists queue, we also remove the + * requests from being processed by __unwind_incomplete_requests() + * during the intel_engine_reset(), and so they will *not* be replayed + * afterwards. + * + * Note that because we have not yet reset the engine at this point, + * it is possible for the request that we have identified as being + * guilty, did in fact complete and we will then hit an arbitration + * point allowing the outstanding preemption to succeed. The likelihood + * of that is very low (as capturing of the engine registers should be + * fast enough to run inside an irq-off atomic section!), so we will + * simply hold that request accountable for being non-preemptible + * long enough to force the reset. + */ + if (!execlists_hold(engine, cap->rq)) + goto err_rq; + + INIT_WORK(&cap->work, execlists_capture_work); + schedule_work(&cap->work); + return true; + +err_rq: + i915_request_put(cap->rq); +err_free: + i915_gpu_coredump_put(cap->error); + kfree(cap); + return false; +} + static noinline void preempt_reset(struct intel_engine_cs *engine) { const unsigned int bit = I915_RESET_ENGINE + engine->id; @@ -2368,7 +2675,12 @@ static noinline void preempt_reset(struct intel_engine_cs *engine) ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n", READ_ONCE(engine->props.preempt_timeout_ms), jiffies_to_msecs(jiffies - engine->execlists.preempt.expires)); - intel_engine_reset(engine, "preemption time out"); + + ring_set_paused(engine, 1); /* Freeze the current request in place */ + if (execlists_capture(engine)) + intel_engine_reset(engine, "preemption time out"); + else + ring_set_paused(engine, 0); tasklet_enable(&engine->execlists.tasklet); clear_and_wake_up_bit(bit, lock); @@ -2430,11 +2742,12 @@ static void execlists_preempt(struct timer_list *timer) } static void queue_request(struct intel_engine_cs *engine, - struct i915_sched_node *node, - int prio) + struct i915_request *rq) { - GEM_BUG_ON(!list_empty(&node->link)); - list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(engine, rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } static void __submit_queue_imm(struct intel_engine_cs *engine) @@ -2462,6 +2775,13 @@ static void submit_queue(struct intel_engine_cs *engine, __submit_queue_imm(engine); } +static bool ancestor_on_hold(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + GEM_BUG_ON(i915_request_on_hold(rq)); + return !list_empty(&engine->active.hold) && hold_request(rq); +} + static void execlists_submit_request(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; @@ -2470,12 +2790,17 @@ static void execlists_submit_request(struct i915_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->active.lock, flags); - queue_request(engine, &request->sched, rq_prio(request)); + if (unlikely(ancestor_on_hold(engine, request))) { + list_add_tail(&request->sched.link, &engine->active.hold); + i915_request_set_hold(request); + } else { + queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&request->sched.link)); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(list_empty(&request->sched.link)); - submit_queue(engine, request); + submit_queue(engine, request); + } spin_unlock_irqrestore(&engine->active.lock, flags); } @@ -2531,7 +2856,6 @@ static void execlists_context_unpin(struct intel_context *ce) ce->engine); i915_gem_object_unpin_map(ce->state->obj); - intel_ring_reset(ce->ring, ce->ring->tail); } static void @@ -3325,6 +3649,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) i915_priolist_free(p); } + /* On-hold requests will be flushed to timeline upon their release */ + list_for_each_entry(rq, &engine->active.hold, sched.link) + mark_eio(rq); + /* Cancel all attached virtual engines */ while ((rb = rb_first_cached(&execlists->virtual))) { struct virtual_engine *ve = diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index a560b7eee2cd..f2806381733f 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -59,11 +59,26 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->vaddr = (void *)(ring + 1); atomic_set(&ring->pin_count, 1); + ring->vma = i915_vma_alloc(); + if (!ring->vma) { + kfree(ring); + return NULL; + } + i915_active_init(&ring->vma->active, NULL, NULL); + intel_ring_update_space(ring); return ring; } +static void mock_ring_free(struct intel_ring *ring) +{ + i915_active_fini(&ring->vma->active); + i915_vma_free(ring->vma); + + kfree(ring); +} + static struct i915_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -121,7 +136,7 @@ static void mock_context_destroy(struct kref *ref) GEM_BUG_ON(intel_context_is_pinned(ce)); if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { - kfree(ce->ring); + mock_ring_free(ce->ring); mock_timeline_unpin(ce->timeline); } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 15cda024e3e4..65718ca2326e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -285,6 +285,107 @@ static int live_unlite_preempt(void *arg) return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); } +static int live_hold_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + unsigned long heartbeat; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + engine_heartbeat_disable(engine, &heartbeat); + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + /* We have our request executing, now remove it and reset */ + + if (test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)) { + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + i915_request_get(rq); + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_on_hold(rq)); + + intel_engine_reset(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, + >->reset.flags); + + /* Check that we do not resubmit the held request */ + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + i915_request_put(rq); + err = -EIO; + goto out; + } + GEM_BUG_ON(!i915_request_on_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + i915_request_put(rq); + +out: + engine_heartbeat_enable(engine, heartbeat); + intel_context_put(ce); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + static int emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) { @@ -3309,12 +3410,168 @@ static int live_virtual_bond(void *arg) return 0; } +static int reset_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct intel_engine_cs *engine; + struct intel_context *ve; + unsigned long *heartbeat; + struct igt_spinner spin; + struct i915_request *rq; + unsigned int n; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL); + if (!heartbeat) + return -ENOMEM; + + if (igt_spinner_init(&spin, gt)) { + err = -ENOMEM; + goto out_free; + } + + ve = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_spin; + } + + for (n = 0; n < nsibling; n++) + engine_heartbeat_disable(siblings[n], &heartbeat[n]); + + rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_heartbeat; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out_heartbeat; + } + + engine = rq->engine; + GEM_BUG_ON(engine == ve->engine); + + /* Take ownership of the reset and tasklet */ + if (test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) { + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out_heartbeat; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + /* Fake a preemption event; failed of course */ + spin_lock_irq(&engine->active.lock); + __unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->active.lock); + GEM_BUG_ON(rq->engine != ve->engine); + + /* Reset the engine while keeping our active request on hold */ + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_on_hold(rq)); + + intel_engine_reset(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + /* Release our grasp on the engine, letting CS flow again */ + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); + + /* Check that we do not resubmit the held request */ + i915_request_get(rq); + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -EIO; + goto out_rq; + } + GEM_BUG_ON(!i915_request_on_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + +out_rq: + i915_request_put(rq); +out_heartbeat: + for (n = 0; n < nsibling; n++) + engine_heartbeat_enable(siblings[n], heartbeat[n]); + + intel_context_put(ve); +out_spin: + igt_spinner_fini(&spin); +out_free: + kfree(heartbeat); + return err; +} + +static int live_virtual_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class, inst; + + /* + * Check that we handle a reset event within a virtual engine. + * Only the physical engine is reset, but we have to check the flow + * of the virtual requests around the reset, and make sure it is not + * forgotten. + */ + + if (USES_GUC_SUBMISSION(gt->i915)) + return 0; + + if (!intel_has_reset_engine(gt)) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + siblings[nsibling++] = gt->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = reset_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_sanitycheck), SUBTEST(live_unlite_switch), SUBTEST(live_unlite_preempt), + SUBTEST(live_hold_reset), SUBTEST(live_timeslice_preempt), SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), @@ -3333,6 +3590,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_mask), SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_bond), + SUBTEST(live_virtual_reset), }; if (!HAS_EXECLISTS(i915)) diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 049775e8e350..b0c1fda32977 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -146,7 +146,7 @@ void intel_gvt_free_firmware(struct intel_gvt *gvt) clean_firmware_sysfs(gvt); kfree(gvt->firmware.cfg_space); - kfree(gvt->firmware.mmio); + vfree(gvt->firmware.mmio); } static int verify_firmware(struct intel_gvt *gvt, @@ -229,7 +229,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt) firmware->cfg_space = mem; - mem = kmalloc(info->mmio_size, GFP_KERNEL); + mem = vmalloc(info->mmio_size); if (!mem) { kfree(path); kfree(firmware->cfg_space); diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 34cb404ba4b7..4a4828074cb7 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1956,7 +1956,11 @@ void _intel_vgpu_mm_release(struct kref *mm_ref) if (mm->type == INTEL_GVT_MM_PPGTT) { list_del(&mm->ppgtt_mm.list); + + mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); list_del(&mm->ppgtt_mm.lru_list); + mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); + invalidate_ppgtt_mm(mm); } else { vfree(mm->ggtt_mm.virtual_ggtt); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index f3da5c06f331..b0a499753526 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -416,13 +416,15 @@ int i915_active_acquire(struct i915_active *ref) if (err) return err; - if (!atomic_read(&ref->count) && ref->active) - err = ref->active(ref); - if (!err) { - spin_lock_irq(&ref->tree_lock); /* vs __active_retire() */ - debug_active_activate(ref); - atomic_inc(&ref->count); - spin_unlock_irq(&ref->tree_lock); + if (likely(!i915_active_acquire_if_busy(ref))) { + if (ref->active) + err = ref->active(ref); + if (!err) { + spin_lock_irq(&ref->tree_lock); /* __active_retire() */ + debug_active_activate(ref); + atomic_inc(&ref->count); + spin_unlock_irq(&ref->tree_lock); + } } mutex_unlock(&ref->mutex); @@ -605,7 +607,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine) { intel_engine_mask_t tmp, mask = engine->mask; - struct llist_node *pos = NULL, *next; + struct llist_node *first = NULL, *last = NULL; struct intel_gt *gt = engine->gt; int err; @@ -623,6 +625,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, */ for_each_engine_masked(engine, gt, mask, tmp) { u64 idx = engine->kernel_context->timeline->fence_context; + struct llist_node *prev = first; struct active_node *node; node = reuse_idle_barrier(ref, idx); @@ -656,23 +659,23 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); GEM_BUG_ON(barrier_to_engine(node) != engine); - next = barrier_to_ll(node); - next->next = pos; - if (!pos) - pos = next; + first = barrier_to_ll(node); + first->next = prev; + if (!last) + last = first; intel_engine_pm_get(engine); } GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); - llist_add_batch(next, pos, &ref->preallocated_barriers); + llist_add_batch(first, last, &ref->preallocated_barriers); return 0; unwind: - while (pos) { - struct active_node *node = barrier_from_ll(pos); + while (first) { + struct active_node *node = barrier_from_ll(first); - pos = pos->next; + first = first->next; atomic_dec(&ref->count); intel_engine_pm_put(barrier_to_engine(node)); diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index b571f675c795..51e1e854ca55 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -188,6 +188,12 @@ int i915_active_acquire(struct i915_active *ref); bool i915_active_acquire_if_busy(struct i915_active *ref); void i915_active_release(struct i915_active *ref); +static inline void __i915_active_acquire(struct i915_active *ref) +{ + GEM_BUG_ON(!atomic_read(&ref->count)); + atomic_inc(&ref->count); +} + static inline bool i915_active_is_idle(const struct i915_active *ref) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 94f993e4c12f..c2de2f45b459 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -265,7 +265,10 @@ i915_gem_dumb_create(struct drm_file *file, DRM_FORMAT_MOD_LINEAR)) args->pitch = ALIGN(args->pitch, 4096); - args->size = args->pitch * args->height; + if (args->pitch < args->width) + return -EINVAL; + + args->size = mul_u32_u32(args->pitch, args->height); mem_type = INTEL_MEMORY_SYSTEM; if (HAS_LMEM(to_i915(dev))) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4c1836f0a991..594341e27a47 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1681,7 +1681,7 @@ static const char *error_msg(struct i915_gpu_coredump *error) "GPU HANG: ecode %d:%x:%08x", INTEL_GEN(error->i915), engines, generate_ecode(first)); - if (first) { + if (first && first->context.pid) { /* Just show the first executing process, more is confusing */ len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 9109004956bd..e4a6afed3bbf 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -314,8 +314,11 @@ i915_vma_capture_finish(struct intel_gt_coredump *gt, } static inline void -i915_error_state_store(struct drm_i915_private *i915, - struct i915_gpu_coredump *error) +i915_error_state_store(struct i915_gpu_coredump *error) +{ +} + +static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) { } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 28a82c849bac..ec0299490dd4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -637,8 +637,10 @@ static void i915_pmu_enable(struct perf_event *event) container_of(event->pmu, typeof(*i915), pmu.base); unsigned int bit = event_enabled_bit(event); struct i915_pmu *pmu = &i915->pmu; + intel_wakeref_t wakeref; unsigned long flags; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); spin_lock_irqsave(&pmu->lock, flags); /* @@ -648,6 +650,14 @@ static void i915_pmu_enable(struct perf_event *event) BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); GEM_BUG_ON(pmu->enable_count[bit] == ~0); + + if (pmu->enable_count[bit] == 0 && + config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) { + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0; + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sleep_last = ktime_get(); + } + pmu->enable |= BIT_ULL(bit); pmu->enable_count[bit]++; @@ -688,6 +698,8 @@ static void i915_pmu_enable(struct perf_event *event) * an existing non-zero value. */ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); } static void i915_pmu_disable(struct perf_event *event) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index be185886e4fc..78a5f5d3c070 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -221,6 +221,8 @@ static void remove_from_engine(struct i915_request *rq) locked = engine; } list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); spin_unlock_irq(&locked->active.lock); } @@ -408,8 +410,10 @@ bool __i915_request_submit(struct i915_request *request) xfer: /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { list_move_tail(&request->sched.link, &engine->active.requests); + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + } if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 031433691a06..f57eadcf3583 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -71,6 +71,18 @@ enum { I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS, /* + * I915_FENCE_FLAG_PQUEUE - this request is ready for execution + * + * Using the scheduler, when a request is ready for execution it is put + * into the priority queue, and removed from that queue when transferred + * to the HW runlists. We want to track its membership within the + * priority queue so that we can easily check before rescheduling. + * + * See i915_request_in_priority_queue() + */ + I915_FENCE_FLAG_PQUEUE, + + /* * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list * * Internal bookkeeping used by the breadcrumb code to track when @@ -79,6 +91,13 @@ enum { I915_FENCE_FLAG_SIGNAL, /* + * I915_FENCE_FLAG_HOLD - this request is currently on hold + * + * This request has been suspended, pending an ongoing investigation. + */ + I915_FENCE_FLAG_HOLD, + + /* * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted * * The execution of some requests should not be interrupted. This is @@ -361,6 +380,11 @@ static inline bool i915_request_is_active(const struct i915_request *rq) return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); } +static inline bool i915_request_in_priority_queue(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + /** * Returns true if seq1 is later than seq2. */ @@ -454,6 +478,27 @@ static inline bool i915_request_is_running(const struct i915_request *rq) return __i915_request_has_started(rq); } +/** + * i915_request_is_running - check if the request is ready for execution + * @rq: the request + * + * Upon construction, the request is instructed to wait upon various + * signals before it is ready to be executed by the HW. That is, we do + * not want to start execution and read data before it is written. In practice, + * this is controlled with a mixture of interrupts and semaphores. Once + * the submit fence is completed, the backend scheduler will place the + * request into its queue and from there submit it for execution. So we + * can detect when a request is eligible for execution (and is under control + * of the scheduler) by querying where it is in any of the scheduler's lists. + * + * Returns true if the request is ready for execution (it may be inflight), + * false otherwise. + */ +static inline bool i915_request_is_ready(const struct i915_request *rq) +{ + return !list_empty(&rq->sched.link); +} + static inline bool i915_request_completed(const struct i915_request *rq) { if (i915_request_signaled(rq)) @@ -483,6 +528,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq) return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); } +static inline bool i915_request_on_hold(const struct i915_request *rq) +{ + return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags)); +} + +static inline void i915_request_set_hold(struct i915_request *rq) +{ + set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + +static inline void i915_request_clear_hold(struct i915_request *rq) +{ + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); +} + static inline struct intel_timeline * i915_request_timeline(struct i915_request *rq) { diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index bf87c70bfdd9..5d96cfba40f8 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -326,20 +326,18 @@ static void __i915_schedule(struct i915_sched_node *node, node->attr.priority = prio; - if (list_empty(&node->link)) { - /* - * If the request is not in the priolist queue because - * it is not yet runnable, then it doesn't contribute - * to our preemption decisions. On the other hand, - * if the request is on the HW, it too is not in the - * queue; but in that case we may still need to reorder - * the inflight requests. - */ + /* + * Once the request is ready, it will be placed into the + * priority lists and then onto the HW runlist. Before the + * request is ready, it does not contribute to our preemption + * decisions and we can safely ignore it, as it will, and + * any preemption required, be dealt with upon submission. + * See engine->submit_request() + */ + if (list_empty(&node->link)) continue; - } - if (!intel_engine_is_virtual(engine) && - !i915_request_is_active(node_to_request(node))) { + if (i915_request_in_priority_queue(node_to_request(node))) { if (!cache.priolist) cache.priolist = i915_sched_lookup_priolist(engine, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 17d7c525ea5c..4ff380770b32 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1202,16 +1202,26 @@ int __i915_vma_unbind(struct i915_vma *vma) if (ret) return ret; - GEM_BUG_ON(i915_vma_is_active(vma)); if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); return -EAGAIN; } - GEM_BUG_ON(i915_vma_is_active(vma)); + /* + * After confirming that no one else is pinning this vma, wait for + * any laggards who may have crept in during the wait (through + * a residual pin skipping the vm->mutex) to complete. + */ + ret = i915_vma_sync(vma); + if (ret) + return ret; + if (!drm_mm_node_allocated(&vma->node)) return 0; + GEM_BUG_ON(i915_vma_is_pinned(vma)); + GEM_BUG_ON(i915_vma_is_active(vma)); + if (i915_vma_is_map_and_fenceable(vma)) { /* * Check that we have flushed all writes through the GGTT diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index c26219c7a49f..e4b750b0c2d3 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -441,6 +441,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) if (ret) goto err_msm_uninit; + if (!dev->dma_parms) { + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), + GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + } + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + msm_gem_shrinker_init(ddev); switch (get_mdp_ver(pdev)) { diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 6da59f476aba..b7a618db3ee2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -166,6 +166,7 @@ panfrost_lookup_bos(struct drm_device *dev, break; } + atomic_inc(&bo->gpu_usecount); job->mappings[i] = mapping; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index ca1bc9019600..b3517ff9630c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -30,6 +30,12 @@ struct panfrost_gem_object { struct mutex lock; } mappings; + /* + * Count the number of jobs referencing this BO so we don't let the + * shrinker reclaim this object prematurely. + */ + atomic_t gpu_usecount; + bool noexec :1; bool is_heap :1; }; diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index f5dd7b29bc95..288e46c40673 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -41,6 +41,9 @@ static bool panfrost_gem_purge(struct drm_gem_object *obj) struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); struct panfrost_gem_object *bo = to_panfrost_bo(obj); + if (atomic_read(&bo->gpu_usecount)) + return false; + if (!mutex_trylock(&shmem->pages_lock)) return false; diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 7c36ec675b73..7157dfd7dea3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -269,8 +269,13 @@ static void panfrost_job_cleanup(struct kref *ref) dma_fence_put(job->render_done_fence); if (job->mappings) { - for (i = 0; i < job->bo_count; i++) + for (i = 0; i < job->bo_count; i++) { + if (!job->mappings[i]) + break; + + atomic_dec(&job->mappings[i]->obj->gpu_usecount); panfrost_gem_mapping_put(job->mappings[i]); + } kvfree(job->mappings); } diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 92ccd7aed0d4..c693b2ca0329 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -5,7 +5,7 @@ ccflags-y := -Idrivers/gpu/drm/amd/include -hostprogs-y := mkregtable +hostprogs := mkregtable clean-files := rn50_reg_safe.h r100_reg_safe.h r200_reg_safe.h rv515_reg_safe.h r300_reg_safe.h r420_reg_safe.h rs600_reg_safe.h r600_reg_safe.h evergreen_reg_safe.h cayman_reg_safe.h quiet_cmd_mkregtable = MKREGTABLE $@ diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 5ae67d526b1d..328272ff77d8 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -85,7 +85,6 @@ static int sun4i_drv_bind(struct device *dev) } drm_mode_config_init(drm); - drm->mode_config.allow_fb_modifiers = true; ret = component_bind_all(drm->dev, drm); if (ret) { diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 5bd60ded3d81..909eba43664a 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -196,9 +196,10 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, return ERR_CAST(obj); ret = drm_gem_handle_create(file, &obj->base, handle); - drm_gem_object_put_unlocked(&obj->base); - if (ret) + if (ret) { + drm_gem_object_put_unlocked(&obj->base); return ERR_PTR(ret); + } return &obj->base; } @@ -221,7 +222,9 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev, args->size = gem_object->size; args->pitch = pitch; - DRM_DEBUG("Created object of size %lld\n", size); + drm_gem_object_put_unlocked(gem_object); + + DRM_DEBUG("Created object of size %llu\n", args->size); return 0; } diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 1006c694d9fb..6d397732138d 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -438,7 +438,7 @@ config CSKY_MPINTC help Say yes here to enable C-SKY SMP interrupt controller driver used for C-SKY SMP system. - In fact it's not mmio map in hw and it use ld/st to visit the + In fact it's not mmio map in hardware and it uses ld/st to visit the controller's register inside CPU. config CSKY_APB_INTC @@ -446,7 +446,7 @@ config CSKY_APB_INTC depends on CSKY help Say yes here to enable C-SKY APB interrupt controller driver used - by C-SKY single core SOC system. It use mmio map apb-bus to visit + by C-SKY single core SOC system. It uses mmio map apb-bus to visit the controller's register. config IMX_IRQSTEER diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index f71758632f8d..83b1186ffcad 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -661,7 +661,7 @@ static struct its_collection *its_build_invall_cmd(struct its_node *its, struct its_cmd_desc *desc) { its_encode_cmd(cmd, GITS_CMD_INVALL); - its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id); + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id); its_fixup_cmd(cmd); @@ -2376,6 +2376,8 @@ static u64 inherit_vpe_l1_table_from_its(void) continue; /* We have a winner! */ + gic_data_rdist()->vpe_l1_base = its->tables[2].base; + val = GICR_VPROPBASER_4_1_VALID; if (baser & GITS_BASER_INDIRECT) val |= GICR_VPROPBASER_4_1_INDIRECT; @@ -2413,14 +2415,12 @@ static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask) for_each_possible_cpu(cpu) { void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base; - u32 tmp; if (!base || cpu == smp_processor_id()) continue; val = gic_read_typer(base + GICR_TYPER); - tmp = compute_common_aff(val); - if (tmp != aff) + if (aff != compute_common_aff(val)) continue; /* @@ -2429,9 +2429,10 @@ static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask) * ours wrt CommonLPIAff. Let's use its own VPROPBASER. * Make sure we don't write the Z bit in that case. */ - val = gits_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER); + val = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER); val &= ~GICR_VPROPBASER_4_1_Z; + gic_data_rdist()->vpe_l1_base = gic_data_rdist_cpu(cpu)->vpe_l1_base; *mask = gic_data_rdist_cpu(cpu)->vpe_table_mask; return val; @@ -2440,6 +2441,72 @@ static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask) return 0; } +static bool allocate_vpe_l2_table(int cpu, u32 id) +{ + void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base; + unsigned int psz, esz, idx, npg, gpsz; + u64 val; + struct page *page; + __le64 *table; + + if (!gic_rdists->has_rvpeid) + return true; + + val = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER); + + esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val) + 1; + gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val); + npg = FIELD_GET(GICR_VPROPBASER_4_1_SIZE, val) + 1; + + switch (gpsz) { + default: + WARN_ON(1); + /* fall through */ + case GIC_PAGE_SIZE_4K: + psz = SZ_4K; + break; + case GIC_PAGE_SIZE_16K: + psz = SZ_16K; + break; + case GIC_PAGE_SIZE_64K: + psz = SZ_64K; + break; + } + + /* Don't allow vpe_id that exceeds single, flat table limit */ + if (!(val & GICR_VPROPBASER_4_1_INDIRECT)) + return (id < (npg * psz / (esz * SZ_8))); + + /* Compute 1st level table index & check if that exceeds table limit */ + idx = id >> ilog2(psz / (esz * SZ_8)); + if (idx >= (npg * psz / GITS_LVL1_ENTRY_SIZE)) + return false; + + table = gic_data_rdist_cpu(cpu)->vpe_l1_base; + + /* Allocate memory for 2nd level table */ + if (!table[idx]) { + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz)); + if (!page) + return false; + + /* Flush Lvl2 table to PoC if hw doesn't support coherency */ + if (!(val & GICR_VPROPBASER_SHAREABILITY_MASK)) + gic_flush_dcache_to_poc(page_address(page), psz); + + table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID); + + /* Flush Lvl1 entry to PoC if hw doesn't support coherency */ + if (!(val & GICR_VPROPBASER_SHAREABILITY_MASK)) + gic_flush_dcache_to_poc(table + idx, GITS_LVL1_ENTRY_SIZE); + + /* Ensure updated table contents are visible to RD hardware */ + dsb(sy); + } + + return true; +} + static int allocate_vpe_l1_table(void) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -2457,8 +2524,8 @@ static int allocate_vpe_l1_table(void) * effect of making sure no doorbell will be generated and we can * then safely clear VPROPBASER.Valid. */ - if (gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER) & GICR_VPENDBASER_Valid) - gits_write_vpendbaser(GICR_VPENDBASER_PendingLast, + if (gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER) & GICR_VPENDBASER_Valid) + gicr_write_vpendbaser(GICR_VPENDBASER_PendingLast, vlpi_base + GICR_VPENDBASER); /* @@ -2481,8 +2548,8 @@ static int allocate_vpe_l1_table(void) /* First probe the page size */ val = FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, GIC_PAGE_SIZE_64K); - gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); - val = gits_read_vpropbaser(vlpi_base + GICR_VPROPBASER); + gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + val = gicr_read_vpropbaser(vlpi_base + GICR_VPROPBASER); gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val); esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val); @@ -2531,7 +2598,7 @@ static int allocate_vpe_l1_table(void) npg = 1; } - val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, npg); + val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, npg - 1); /* Right, that's the number of CPU pages we need for L1 */ np = DIV_ROUND_UP(npg * psz, PAGE_SIZE); @@ -2542,7 +2609,7 @@ static int allocate_vpe_l1_table(void) if (!page) return -ENOMEM; - gic_data_rdist()->vpe_l1_page = page; + gic_data_rdist()->vpe_l1_base = page_address(page); pa = virt_to_phys(page_address(page)); WARN_ON(!IS_ALIGNED(pa, psz)); @@ -2553,7 +2620,7 @@ static int allocate_vpe_l1_table(void) val |= GICR_VPROPBASER_4_1_VALID; out: - gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); cpumask_set_cpu(smp_processor_id(), gic_data_rdist()->vpe_table_mask); pr_debug("CPU%d: VPROPBASER = %llx %*pbl\n", @@ -2660,14 +2727,14 @@ static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) bool clean; u64 val; - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); val &= ~GICR_VPENDBASER_Valid; val &= ~clr; val |= set; - gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); do { - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); clean = !(val & GICR_VPENDBASER_Dirty); if (!clean) { count--; @@ -2782,7 +2849,7 @@ static void its_cpu_init_lpis(void) val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n", smp_processor_id(), val); - gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); /* * Also clear Valid bit of GICR_VPENDBASER, in case some @@ -2790,7 +2857,6 @@ static void its_cpu_init_lpis(void) * corrupting memory. */ val = its_clear_vpend_valid(vlpi_base, 0, 0); - WARN_ON(val & GICR_VPENDBASER_Dirty); } if (allocate_vpe_l1_table()) { @@ -2954,6 +3020,7 @@ static bool its_alloc_device_table(struct its_node *its, u32 dev_id) static bool its_alloc_vpe_table(u32 vpe_id) { struct its_node *its; + int cpu; /* * Make sure the L2 tables are allocated on *all* v4 ITSs. We @@ -2976,6 +3043,19 @@ static bool its_alloc_vpe_table(u32 vpe_id) return false; } + /* Non v4.1? No need to iterate RDs and go back early. */ + if (!gic_rdists->has_rvpeid) + return true; + + /* + * Make sure the L2 tables are allocated for all copies of + * the L1 table on *all* v4.1 RDs. + */ + for_each_possible_cpu(cpu) { + if (!allocate_vpe_l2_table(cpu, vpe_id)) + return false; + } + return true; } @@ -3443,7 +3523,7 @@ static void its_vpe_schedule(struct its_vpe *vpe) val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; val |= GICR_VPROPBASER_RaWb; val |= GICR_VPROPBASER_InnerShareable; - gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); val = virt_to_phys(page_address(vpe->vpt_page)) & GENMASK_ULL(51, 16); @@ -3461,7 +3541,7 @@ static void its_vpe_schedule(struct its_vpe *vpe) val |= GICR_VPENDBASER_PendingLast; val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; val |= GICR_VPENDBASER_Valid; - gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); } static void its_vpe_deschedule(struct its_vpe *vpe) @@ -3661,7 +3741,7 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe, val |= info->g1en ? GICR_VPENDBASER_4_1_VGRP1EN : 0; val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); - gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); } static void its_vpe_4_1_deschedule(struct its_vpe *vpe, diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 286f98222878..c1f7af9d9ae7 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1839,6 +1839,7 @@ static struct struct redist_region *redist_regs; u32 nr_redist_regions; bool single_redist; + int enabled_rdists; u32 maint_irq; int maint_irq_mode; phys_addr_t vcpu_base; @@ -1933,8 +1934,10 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + acpi_data.enabled_rdists++; return 0; + } /* * It's perfectly valid firmware can pass disabled GICC entry, driver @@ -1964,8 +1967,10 @@ static int __init gic_acpi_count_gicr_regions(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, gic_acpi_match_gicc, 0); - if (count > 0) + if (count > 0) { acpi_data.single_redist = true; + count = acpi_data.enabled_rdists; + } return count; } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 48d5ec770b94..d10805e5e623 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3526,6 +3526,47 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } +#ifdef CONFIG_LOCKDEP +static int bond_get_lowest_level_rcu(struct net_device *dev) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int cur = 0, max = 0; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + next = NULL; + while (1) { + ldev = netdev_next_lower_dev_rcu(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + if (max <= cur) + max = cur; + break; + } + + if (!next) { + if (!cur) + return max; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return max; +} +#endif + static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -3533,11 +3574,17 @@ static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 temp; struct list_head *iter; struct slave *slave; + int nest_level = 0; - spin_lock(&bond->stats_lock); - memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); +#ifdef CONFIG_LOCKDEP + nest_level = bond_get_lowest_level_rcu(bond_dev); +#endif + + spin_lock_nested(&bond->stats_lock, nest_level); + memcpy(stats, &bond->bond_stats, sizeof(*stats)); + bond_for_each_slave_rcu(bond, slave, iter) { const struct rtnl_link_stats64 *new = dev_get_stats(slave->dev, &temp); @@ -3547,10 +3594,10 @@ static void bond_get_stats(struct net_device *bond_dev, /* save off the slave stats for the next run */ memcpy(&slave->slave_stats, new, sizeof(*new)); } - rcu_read_unlock(); memcpy(&bond->bond_stats, stats, sizeof(*stats)); spin_unlock(&bond->stats_lock); + rcu_read_unlock(); } static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) @@ -3640,6 +3687,8 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd case BOND_RELEASE_OLD: case SIOCBONDRELEASE: res = bond_release(bond_dev, slave_dev); + if (!res) + netdev_update_lockdep_key(slave_dev); break; case BOND_SETHWADDR_OLD: case SIOCBONDSETHWADDR: diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index ddb3916d3506..215c10923289 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1398,6 +1398,8 @@ static int bond_option_slaves_set(struct bonding *bond, case '-': slave_dbg(bond->dev, dev, "Releasing interface\n"); ret = bond_release(bond->dev, dev); + if (!ret) + netdev_update_lockdep_key(dev); break; default: diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 449a22172e07..1a69286daa8d 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1366,6 +1366,9 @@ void b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); + if (vid == 0 && vid == b53_default_pvid(dev)) + untagged = true; + vl->members |= BIT(port); if (untagged && !dsa_is_cpu_port(ds, port)) vl->untag |= BIT(port); diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index ea62604fdf8c..1fb58f9ad80b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -200,6 +200,11 @@ static void comp_ctxt_release(struct ena_com_admin_queue *queue, static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, u16 command_id, bool capture) { + if (unlikely(!queue->comp_ctx)) { + pr_err("Completion context is NULL\n"); + return NULL; + } + if (unlikely(command_id >= queue->q_depth)) { pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", command_id, queue->q_depth); @@ -1041,9 +1046,41 @@ static int ena_com_get_feature(struct ena_com_dev *ena_dev, feature_ver); } +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev) +{ + return ena_dev->rss.hash_func; +} + +static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) +{ + struct ena_admin_feature_rss_flow_hash_control *hash_key = + (ena_dev->rss).hash_key; + + netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key)); + /* The key is stored in the device in u32 array + * as well as the API requires the key to be passed in this + * format. Thus the size of our array should be divided by 4 + */ + hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32); +} + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_flow_hash_control *hash_key; + struct ena_admin_get_feat_resp get_resp; + int rc; + + hash_key = (ena_dev->rss).hash_key; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + ena_dev->rss.hash_key_dma_addr, + sizeof(ena_dev->rss.hash_key), 0); + if (unlikely(rc)) { + hash_key = NULL; + return -EOPNOTSUPP; + } rss->hash_key = dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), @@ -1254,30 +1291,6 @@ static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev) return 0; } -static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev) -{ - u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 }; - struct ena_rss *rss = &ena_dev->rss; - u8 idx; - u16 i; - - for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++) - dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i; - - for (i = 0; i < 1 << rss->tbl_log_size; i++) { - if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES) - return -EINVAL; - idx = (u8)rss->rss_ind_tbl[i].cq_idx; - - if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES) - return -EINVAL; - - rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx]; - } - - return 0; -} - static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, u16 intr_delay_resolution) { @@ -2297,15 +2310,16 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, switch (func) { case ENA_ADMIN_TOEPLITZ: - if (key_len > sizeof(hash_key->key)) { - pr_err("key len (%hu) is bigger than the max supported (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; + if (key) { + if (key_len != sizeof(hash_key->key)) { + pr_err("key len (%hu) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; } - - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->keys_num = key_len >> 2; break; case ENA_ADMIN_CRC32: rss->hash_init_val = init_val; @@ -2342,7 +2356,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, if (unlikely(rc)) return rc; - rss->hash_func = get_resp.u.flow_hash_func.selected_func; + /* ffs() returns 1 in case the lsb is set */ + rss->hash_func = ffs(get_resp.u.flow_hash_func.selected_func); + if (rss->hash_func) + rss->hash_func--; + if (func) *func = rss->hash_func; @@ -2606,10 +2624,6 @@ int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl) if (!ind_tbl) return 0; - rc = ena_com_ind_tbl_convert_from_device(ena_dev); - if (unlikely(rc)) - return rc; - for (i = 0; i < (1 << rss->tbl_log_size); i++) ind_tbl[i] = rss->host_rss_ind_tbl[i]; @@ -2626,9 +2640,15 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) if (unlikely(rc)) goto err_indr_tbl; + /* The following function might return unsupported in case the + * device doesn't support setting the key / hash function. We can safely + * ignore this error and have indirection table support only. + */ rc = ena_com_hash_key_allocate(ena_dev); - if (unlikely(rc)) + if (unlikely(rc) && rc != -EOPNOTSUPP) goto err_hash_key; + else if (rc != -EOPNOTSUPP) + ena_com_hash_key_fill_default_key(ena_dev); rc = ena_com_hash_ctrl_init(ena_dev); if (unlikely(rc)) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 0ce37d54ed10..469f298199a7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -44,6 +44,7 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/wait.h> +#include <linux/netdevice.h> #include "ena_common_defs.h" #include "ena_admin_defs.h" @@ -655,6 +656,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); */ void ena_com_rss_destroy(struct ena_com_dev *ena_dev); +/* ena_com_get_current_hash_function - Get RSS hash function + * @ena_dev: ENA communication layer struct + * + * Return the current hash function. + * @return: 0 or one of the ena_admin_hash_functions values. + */ +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev); + /* ena_com_fill_hash_function - Fill RSS hash function * @ena_dev: ENA communication layer struct * @func: The hash function (Toeplitz or crc) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index b4e891d49a94..ced1d577b62a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -636,6 +636,28 @@ static u32 ena_get_rxfh_key_size(struct net_device *netdev) return ENA_HASH_KEY_SIZE; } +static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int i, rc; + + if (!indir) + return 0; + + rc = ena_com_indirect_table_get(ena_dev, indir); + if (rc) + return rc; + + /* Our internal representation of the indices is: even indices + * for Tx and uneven indices for Rx. We need to convert the Rx + * indices to be consecutive + */ + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) + indir[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(indir[i]); + + return rc; +} + static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { @@ -644,11 +666,25 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 func; int rc; - rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + rc = ena_indirection_table_get(adapter, indir); if (rc) return rc; + /* We call this function in order to check if the device + * supports getting/setting the hash function. + */ rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + + if (rc) { + if (rc == -EOPNOTSUPP) { + key = NULL; + hfunc = NULL; + rc = 0; + } + + return rc; + } + if (rc) return rc; @@ -657,7 +693,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, func = ETH_RSS_HASH_TOP; break; case ENA_ADMIN_CRC32: - func = ETH_RSS_HASH_XOR; + func = ETH_RSS_HASH_CRC32; break; default: netif_err(adapter, drv, netdev, @@ -700,10 +736,13 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, } switch (hfunc) { + case ETH_RSS_HASH_NO_CHANGE: + func = ena_com_get_current_hash_function(ena_dev); + break; case ETH_RSS_HASH_TOP: func = ENA_ADMIN_TOEPLITZ; break; - case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_CRC32: func = ENA_ADMIN_CRC32; break; default: @@ -814,6 +853,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .set_channels = ena_set_channels, .get_tunable = ena_get_tunable, .set_tunable = ena_set_tunable, + .get_ts_info = ethtool_op_get_ts_info, }; void ena_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 894e8c1a8cf1..0b2fd96b93d7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3706,8 +3706,8 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) return; - keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + - adapter->keep_alive_timeout); + keep_alive_expired = adapter->last_keep_alive_jiffies + + adapter->keep_alive_timeout; if (unlikely(time_is_before_jiffies(keep_alive_expired))) { netif_err(adapter, drv, adapter->netdev, "Keep alive watchdog timeout.\n"); @@ -3809,7 +3809,7 @@ static void ena_timer_service(struct timer_list *t) } /* Reset the timer */ - mod_timer(&adapter->timer_service, jiffies + HZ); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); } static int ena_calc_max_io_queue_num(struct pci_dev *pdev, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 094324fd0edc..8795e0b1dc3c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -130,6 +130,8 @@ #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) +#define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2) +#define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2) #define ENA_MGMNT_IRQ_IDX 0 #define ENA_IO_IRQ_FIRST_IDX 1 diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a1f99bef4a68..7b55633d2cb9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -722,6 +722,11 @@ static int aq_ethtool_set_priv_flags(struct net_device *ndev, u32 flags) if (flags & ~AQ_PRIV_FLAGS_MASK) return -EOPNOTSUPP; + if (hweight32((flags | priv_flags) & AQ_HW_LOOPBACK_MASK) > 1) { + netdev_info(ndev, "Can't enable more than one loopback simultaneously\n"); + return -EINVAL; + } + cfg->priv_flags = flags; if ((priv_flags ^ flags) & BIT(AQ_HW_LOOPBACK_DMA_NET)) { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 6102251bb909..03ff92bc4a7f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -163,7 +163,7 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic, } if ((aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) && - (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci), + (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci) & VLAN_VID_MASK, aq_nic->active_vlans))) { netdev_err(aq_nic->ndev, "ethtool: unknown vlan-id specified"); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index cc70c606b6ef..251767c31f7e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -337,6 +337,8 @@ struct aq_fw_ops { void (*enable_ptp)(struct aq_hw_s *self, int enable); + void (*adjust_ptp)(struct aq_hw_s *self, uint64_t adj); + int (*set_eee_rate)(struct aq_hw_s *self, u32 speed); int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index c85e3e29012c..e95f6a6bef73 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -533,8 +533,10 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + ret = 0; goto exit; + } first = dx_buff; dx_buff->len_pkt = skb->len; @@ -655,10 +657,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) if (likely(frags)) { err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw, ring, frags); - if (err >= 0) { - ++ring->stats.tx.packets; - ring->stats.tx.bytes += skb->len; - } } else { err = NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 6b27af0db499..78b6f3248756 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -359,7 +359,8 @@ static int aq_suspend_common(struct device *dev, bool deep) netif_device_detach(nic->ndev); netif_tx_stop_all_queues(nic->ndev); - aq_nic_stop(nic); + if (netif_running(nic->ndev)) + aq_nic_stop(nic); if (deep) { aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); @@ -375,7 +376,7 @@ static int atl_resume_common(struct device *dev, bool deep) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; - int ret; + int ret = 0; nic = pci_get_drvdata(pdev); @@ -390,9 +391,11 @@ static int atl_resume_common(struct device *dev, bool deep) goto err_exit; } - ret = aq_nic_start(nic); - if (ret) - goto err_exit; + if (netif_running(nic->ndev)) { + ret = aq_nic_start(nic); + if (ret) + goto err_exit; + } netif_device_attach(nic->ndev); netif_tx_start_all_queues(nic->ndev); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 951d86f8b66e..bae95a618560 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -272,9 +272,12 @@ bool aq_ring_tx_clean(struct aq_ring_s *self) } } - if (unlikely(buff->is_eop)) - dev_kfree_skb_any(buff->skb); + if (unlikely(buff->is_eop)) { + ++self->stats.rx.packets; + self->stats.tx.bytes += buff->skb->len; + dev_kfree_skb_any(buff->skb); + } buff->pa = 0U; buff->eop_index = 0xffffU; self->sw_head = aq_ring_next_dx(self, self->sw_head); @@ -351,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = 0; goto err_exit; } - if (buff->is_error || buff->is_cso_err) { + if (buff->is_error || + (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { next_ = buff_->next, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 991e4d31b094..2c96f20f6289 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -78,7 +78,8 @@ struct __packed aq_ring_buff_s { u32 is_cleaned:1; u32 is_error:1; u32 is_vlan:1; - u32 rsvd3:4; + u32 is_lro:1; + u32 rsvd3:3; u16 eop_index; u16 rsvd4; }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index ec041f78d063..d20d91cdece8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -823,6 +823,8 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, } } + buff->is_lro = !!(HW_ATL_B0_RXD_WB_STAT2_RSCCNT & + rxd_wb->status); if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { buff->len = rxd_wb->pkt_len % AQ_CFG_RX_FRAME_MAX; @@ -835,8 +837,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ? AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len; - if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & - rxd_wb->status) { + if (buff->is_lro) { /* LRO */ buff->next = rxd_wb->next_desc_ptr; ++ring->stats.rx.lro_packets; @@ -884,13 +885,16 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, { struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; unsigned int i = 0U; + u32 vlan_promisc; + u32 l2_promisc; - hw_atl_rpfl2promiscuous_mode_en_set(self, - IS_FILTER_ENABLED(IFF_PROMISC)); + l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) || + !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)); + vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc; - hw_atl_rpf_vlan_prom_mode_en_set(self, - IS_FILTER_ENABLED(IFF_PROMISC) || - cfg->is_vlan_force_promisc); + hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc); + + hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc); hw_atl_rpfl2multicast_flr_en_set(self, IS_FILTER_ENABLED(IFF_ALLMULTI) && @@ -1161,6 +1165,8 @@ static int hw_atl_b0_adj_sys_clock(struct aq_hw_s *self, s64 delta) { self->ptp_clk_offset += delta; + self->aq_fw_ops->adjust_ptp(self, self->ptp_clk_offset); + return 0; } @@ -1211,7 +1217,7 @@ static int hw_atl_b0_gpio_pulse(struct aq_hw_s *self, u32 index, fwreq.ptp_gpio_ctrl.index = index; fwreq.ptp_gpio_ctrl.period = period; /* Apply time offset */ - fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset; + fwreq.ptp_gpio_ctrl.start = start; size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl); return self->aq_fw_ops->send_fw_request(self, &fwreq, size); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index f547baa6c954..354705f9bc49 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -22,6 +22,7 @@ #define HW_ATL_MIF_ADDR 0x0208U #define HW_ATL_MIF_VAL 0x020CU +#define HW_ATL_MPI_RPC_ADDR 0x0334U #define HW_ATL_RPC_CONTROL_ADR 0x0338U #define HW_ATL_RPC_STATE_ADR 0x033CU @@ -53,15 +54,14 @@ enum mcp_area { }; static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual); - static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self, enum hal_atl_utils_fw_state_e state); - static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self); static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self); static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self); static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self); static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self); +static u32 aq_fw1x_rpc_get(struct aq_hw_s *self); int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) { @@ -476,6 +476,10 @@ static int hw_atl_utils_init_ucp(struct aq_hw_s *self, self, self->mbox_addr, self->mbox_addr != 0U, 1000U, 10000U); + err = readx_poll_timeout_atomic(aq_fw1x_rpc_get, self, + self->rpc_addr, + self->rpc_addr != 0U, + 1000U, 100000U); return err; } @@ -531,6 +535,12 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, self, fw.val, sw.tid == fw.tid, 1000U, 100000U); + if (err < 0) + goto err_exit; + + err = aq_hw_err_from_flags(self); + if (err < 0) + goto err_exit; if (fw.len == 0xFFFFU) { err = hw_atl_utils_fw_rpc_call(self, sw.len); @@ -1025,6 +1035,11 @@ static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self) return aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR); } +static u32 aq_fw1x_rpc_get(struct aq_hw_s *self) +{ + return aq_hw_read_reg(self, HW_ATL_MPI_RPC_ADDR); +} + const struct aq_fw_ops aq_fw_1x_ops = { .init = hw_atl_utils_mpi_create, .deinit = hw_atl_fw1x_deinit, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index 97ebf849695f..77a4ed64830f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -30,6 +30,9 @@ #define HW_ATL_FW3X_EXT_CONTROL_ADDR 0x378 #define HW_ATL_FW3X_EXT_STATE_ADDR 0x37c +#define HW_ATL_FW3X_PTP_ADJ_LSW_ADDR 0x50a0 +#define HW_ATL_FW3X_PTP_ADJ_MSW_ADDR 0x50a4 + #define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE) #define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE) #define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY) @@ -475,6 +478,14 @@ static void aq_fw3x_enable_ptp(struct aq_hw_s *self, int enable) aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts); } +static void aq_fw3x_adjust_ptp(struct aq_hw_s *self, uint64_t adj) +{ + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_LSW_ADDR, + (adj >> 0) & 0xffffffff); + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_MSW_ADDR, + (adj >> 32) & 0xffffffff); +} + static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode) { if (self->fw_ver_actual < HW_ATL_FW_VER_LED) @@ -633,4 +644,5 @@ const struct aq_fw_ops aq_fw_2x_ops = { .enable_ptp = aq_fw3x_enable_ptp, .led_control = aq_fw2x_led_control, .set_phyloopback = aq_fw2x_set_phyloopback, + .adjust_ptp = aq_fw3x_adjust_ptp, }; diff --git a/drivers/net/ethernet/broadcom/cnic_defs.h b/drivers/net/ethernet/broadcom/cnic_defs.h index b38499774071..99e2c6d4d8c3 100644 --- a/drivers/net/ethernet/broadcom/cnic_defs.h +++ b/drivers/net/ethernet/broadcom/cnic_defs.h @@ -543,13 +543,13 @@ struct l4_kwq_update_pg { #define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2 #endif #if defined(__BIG_ENDIAN) - u16 reserverd3; + u16 reserved3; u8 da0; u8 da1; #elif defined(__LITTLE_ENDIAN) u8 da1; u8 da0; - u16 reserverd3; + u16 reserved3; #endif #if defined(__BIG_ENDIAN) u8 da2; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4508f0d150da..def94e91883a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3790,6 +3790,10 @@ static int at91ether_open(struct net_device *dev) u32 ctl; int ret; + ret = pm_runtime_get_sync(&lp->pdev->dev); + if (ret < 0) + return ret; + /* Clear internal statistics */ ctl = macb_readl(lp, NCR); macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT)); @@ -3854,7 +3858,7 @@ static int at91ether_close(struct net_device *dev) q->rx_buffers, q->rx_buffers_dma); q->rx_buffers = NULL; - return 0; + return pm_runtime_put(&lp->pdev->dev); } /* Transmit packet */ diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index bbd7b3175f09..ddf60dc9ad16 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2013,10 +2013,10 @@ static int enic_stop(struct net_device *netdev) napi_disable(&enic->napi[i]); netif_carrier_off(netdev); - netif_tx_disable(netdev); if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) napi_disable(&enic->napi[enic_cq_wq(enic, i)]); + netif_tx_disable(netdev); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 1ea3372775e6..e94ae9b94dbf 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1405,6 +1405,8 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) ether_addr_copy(pdata->dev_addr, mac_addr); + else if (PTR_ERR(mac_addr) == -EPROBE_DEFER) + return ERR_CAST(mac_addr); return pdata; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index ec5f6eeb639b..492bc9446463 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6113,6 +6113,9 @@ static int hclge_get_all_rules(struct hnae3_handle *handle, static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys, struct hclge_fd_rule_tuples *tuples) { +#define flow_ip6_src fkeys->addrs.v6addrs.src.in6_u.u6_addr32 +#define flow_ip6_dst fkeys->addrs.v6addrs.dst.in6_u.u6_addr32 + tuples->ether_proto = be16_to_cpu(fkeys->basic.n_proto); tuples->ip_proto = fkeys->basic.ip_proto; tuples->dst_port = be16_to_cpu(fkeys->ports.dst); @@ -6121,12 +6124,12 @@ static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys, tuples->src_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.src); tuples->dst_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.dst); } else { - memcpy(tuples->src_ip, - fkeys->addrs.v6addrs.src.in6_u.u6_addr32, - sizeof(tuples->src_ip)); - memcpy(tuples->dst_ip, - fkeys->addrs.v6addrs.dst.in6_u.u6_addr32, - sizeof(tuples->dst_ip)); + int i; + + for (i = 0; i < IPV6_SIZE; i++) { + tuples->src_ip[i] = be32_to_cpu(flow_ip6_src[i]); + tuples->dst_ip[i] = be32_to_cpu(flow_ip6_dst[i]); + } } } @@ -9834,6 +9837,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = init_mgr_tbl(hdev); + if (ret) { + dev_err(&pdev->dev, + "failed to reinit manager table, ret = %d\n", ret); + return ret; + } + ret = hclge_init_fd_config(hdev); if (ret) { dev_err(&pdev->dev, "fd table init fail, ret=%d\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 180224eab1ca..28db13253a5e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -566,7 +566,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) */ kinfo->num_tc = vport->vport_id ? 1 : min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc); - vport->qs_offset = (vport->vport_id ? hdev->tm_info.num_tc : 0) + + vport->qs_offset = (vport->vport_id ? HNAE3_MAX_TC : 0) + (vport->vport_id ? (vport->vport_id - 1) : 0); max_rss_size = min_t(u16, hdev->rss_size_max, diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 69523ac85639..56b9e445732b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2362,7 +2362,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) goto error_param; } - if (i40e_vc_validate_vqs_bitmaps(vqs)) { + if (!i40e_vc_validate_vqs_bitmaps(vqs)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2424,7 +2424,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) goto error_param; } - if (i40e_vc_validate_vqs_bitmaps(vqs)) { + if (!i40e_vc_validate_vqs_bitmaps(vqs)) { aq_ret = I40E_ERR_PARAM; goto error_param; } diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 4459bc564b11..6873998cf145 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1660,6 +1660,7 @@ struct ice_aqc_get_pkg_info_resp { __le32 count; struct ice_aqc_get_pkg_info pkg_info[1]; }; + /** * struct ice_aq_desc - Admin Queue (AQ) descriptor * @flags: ICE_AQ_FLAG_* flags diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index d8e975cceb21..81885efadc7a 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -324,7 +324,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) if (err) return err; - dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n", + dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n", ring->q_index); } else { ring->zca.free = NULL; @@ -405,8 +405,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) /* Absolute queue number out of 2K needs to be passed */ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); if (err) { - dev_err(&vsi->back->pdev->dev, - "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", + dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", pf_q, err); return -EIO; } @@ -428,8 +427,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) : ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); if (err) - dev_info(&vsi->back->pdev->dev, - "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n", + dev_info(ice_pf_to_dev(vsi->back), "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n", ring->xsk_umem ? "UMEM enabled " : "", ring->q_index, pf_q); @@ -490,8 +488,7 @@ int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx) /* wait for the change to finish */ ret = ice_pf_rxq_wait(pf, pf_q, ena); if (ret) - dev_err(ice_pf_to_dev(pf), - "VSI idx %d Rx ring %d %sable timeout\n", + dev_err(ice_pf_to_dev(pf), "VSI idx %d Rx ring %d %sable timeout\n", vsi->idx, pf_q, (ena ? "en" : "dis")); return ret; @@ -506,20 +503,15 @@ int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx) */ int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) { - struct ice_pf *pf = vsi->back; - int v_idx = 0, num_q_vectors; - struct device *dev; - int err; + struct device *dev = ice_pf_to_dev(vsi->back); + int v_idx, err; - dev = ice_pf_to_dev(pf); if (vsi->q_vectors[0]) { dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num); return -EEXIST; } - num_q_vectors = vsi->num_q_vectors; - - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { + for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) { err = ice_vsi_alloc_q_vector(vsi, v_idx); if (err) goto err_out; @@ -648,8 +640,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, 1, qg_buf, buf_len, NULL); if (status) { - dev_err(ice_pf_to_dev(pf), - "Failed to set LAN Tx queue context, error: %d\n", + dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %d\n", status); return -ENODEV; } @@ -815,14 +806,12 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, * queues at the hardware level anyway. */ if (status == ICE_ERR_RESET_ONGOING) { - dev_dbg(&vsi->back->pdev->dev, - "Reset in progress. LAN Tx queues already disabled\n"); + dev_dbg(ice_pf_to_dev(vsi->back), "Reset in progress. LAN Tx queues already disabled\n"); } else if (status == ICE_ERR_DOES_NOT_EXIST) { - dev_dbg(&vsi->back->pdev->dev, - "LAN Tx queues do not exist, nothing to disable\n"); + dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n"); } else if (status) { - dev_err(&vsi->back->pdev->dev, - "Failed to disable LAN Tx queues, error: %d\n", status); + dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %d\n", + status); return -ENODEV; } diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 0207e28c2682..04d5db0a25bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -25,20 +25,6 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) } /** - * ice_dev_onetime_setup - Temporary HW/FW workarounds - * @hw: pointer to the HW structure - * - * This function provides temporary workarounds for certain issues - * that are expected to be fixed in the HW/FW. - */ -void ice_dev_onetime_setup(struct ice_hw *hw) -{ -#define MBX_PF_VT_PFALLOC 0x00231E80 - /* set VFs per PF */ - wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF)); -} - -/** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure * @@ -602,10 +588,10 @@ void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf) } /** - * ice_get_itr_intrl_gran - determine int/intrl granularity + * ice_get_itr_intrl_gran * @hw: pointer to the HW struct * - * Determines the ITR/intrl granularities based on the maximum aggregate + * Determines the ITR/INTRL granularities based on the maximum aggregate * bandwidth according to the device's configuration during power-on. */ static void ice_get_itr_intrl_gran(struct ice_hw *hw) @@ -763,8 +749,6 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_sched; - ice_dev_onetime_setup(hw); - /* Get MAC information */ /* A single port can report up to two (LAN and WoL) addresses */ mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2, @@ -834,7 +818,7 @@ void ice_deinit_hw(struct ice_hw *hw) */ enum ice_status ice_check_reset(struct ice_hw *hw) { - u32 cnt, reg = 0, grst_delay; + u32 cnt, reg = 0, grst_delay, uld_mask; /* Poll for Device Active state in case a recent CORER, GLOBR, * or EMPR has occurred. The grst delay value is in 100ms units. @@ -856,13 +840,20 @@ enum ice_status ice_check_reset(struct ice_hw *hw) return ICE_ERR_RESET_FAILED; } -#define ICE_RESET_DONE_MASK (GLNVM_ULD_CORER_DONE_M | \ - GLNVM_ULD_GLOBR_DONE_M) +#define ICE_RESET_DONE_MASK (GLNVM_ULD_PCIER_DONE_M |\ + GLNVM_ULD_PCIER_DONE_1_M |\ + GLNVM_ULD_CORER_DONE_M |\ + GLNVM_ULD_GLOBR_DONE_M |\ + GLNVM_ULD_POR_DONE_M |\ + GLNVM_ULD_POR_DONE_1_M |\ + GLNVM_ULD_PCIER_DONE_2_M) + + uld_mask = ICE_RESET_DONE_MASK; /* Device is Active; check Global Reset processes are done */ for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) { - reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK; - if (reg == ICE_RESET_DONE_MASK) { + reg = rd32(hw, GLNVM_ULD) & uld_mask; + if (reg == uld_mask) { ice_debug(hw, ICE_DBG_INIT, "Global reset processes done. %d\n", cnt); break; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index b5c013fdaaf9..f9fc005d35a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -54,8 +54,6 @@ enum ice_status ice_get_caps(struct ice_hw *hw); void ice_set_safe_mode_caps(struct ice_hw *hw); -void ice_dev_onetime_setup(struct ice_hw *hw); - enum ice_status ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 713e8a892e14..adb8dab765c8 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -1323,13 +1323,13 @@ enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi) } /** - * ice_aq_query_port_ets - query port ets configuration + * ice_aq_query_port_ets - query port ETS configuration * @pi: port information structure * @buf: pointer to buffer * @buf_size: buffer size in bytes * @cd: pointer to command details structure or NULL * - * query current port ets configuration + * query current port ETS configuration */ static enum ice_status ice_aq_query_port_ets(struct ice_port_info *pi, @@ -1416,13 +1416,13 @@ ice_update_port_tc_tree_cfg(struct ice_port_info *pi, } /** - * ice_query_port_ets - query port ets configuration + * ice_query_port_ets - query port ETS configuration * @pi: port information structure * @buf: pointer to buffer * @buf_size: buffer size in bytes * @cd: pointer to command details structure or NULL * - * query current port ets configuration and update the + * query current port ETS configuration and update the * SW DB with the TC changes */ enum ice_status diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 0664e5b8d130..7108fb41b604 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -315,9 +315,9 @@ ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, */ void ice_dcb_rebuild(struct ice_pf *pf) { - struct ice_dcbx_cfg *local_dcbx_cfg, *desired_dcbx_cfg, *prev_cfg; struct ice_aqc_port_ets_elem buf = { 0 }; struct device *dev = ice_pf_to_dev(pf); + struct ice_dcbx_cfg *err_cfg; enum ice_status ret; ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); @@ -330,53 +330,25 @@ void ice_dcb_rebuild(struct ice_pf *pf) if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags)) return; - local_dcbx_cfg = &pf->hw.port_info->local_dcbx_cfg; - desired_dcbx_cfg = &pf->hw.port_info->desired_dcbx_cfg; + mutex_lock(&pf->tc_mutex); - /* Save current willing state and force FW to unwilling */ - local_dcbx_cfg->etscfg.willing = 0x0; - local_dcbx_cfg->pfc.willing = 0x0; - local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING; + if (!pf->hw.port_info->is_sw_lldp) + ice_cfg_etsrec_defaults(pf->hw.port_info); - ice_cfg_etsrec_defaults(pf->hw.port_info); ret = ice_set_dcb_cfg(pf->hw.port_info); if (ret) { - dev_err(dev, "Failed to set DCB to unwilling\n"); + dev_err(dev, "Failed to set DCB config in rebuild\n"); goto dcb_error; } - /* Retrieve DCB config and ensure same as current in SW */ - prev_cfg = kmemdup(local_dcbx_cfg, sizeof(*prev_cfg), GFP_KERNEL); - if (!prev_cfg) - goto dcb_error; - - ice_init_dcb(&pf->hw, true); - if (pf->hw.port_info->dcbx_status == ICE_DCBX_STATUS_DIS) - pf->hw.port_info->is_sw_lldp = true; - else - pf->hw.port_info->is_sw_lldp = false; - - if (ice_dcb_need_recfg(pf, prev_cfg, local_dcbx_cfg)) { - /* difference in cfg detected - disable DCB till next MIB */ - dev_err(dev, "Set local MIB not accurate\n"); - kfree(prev_cfg); - goto dcb_error; + if (!pf->hw.port_info->is_sw_lldp) { + ret = ice_cfg_lldp_mib_change(&pf->hw, true); + if (ret && !pf->hw.port_info->is_sw_lldp) { + dev_err(dev, "Failed to register for MIB changes\n"); + goto dcb_error; + } } - /* fetched config congruent to previous configuration */ - kfree(prev_cfg); - - /* Set the local desired config */ - if (local_dcbx_cfg->dcbx_mode == ICE_DCBX_MODE_CEE) - memcpy(local_dcbx_cfg, desired_dcbx_cfg, - sizeof(*local_dcbx_cfg)); - - ice_cfg_etsrec_defaults(pf->hw.port_info); - ret = ice_set_dcb_cfg(pf->hw.port_info); - if (ret) { - dev_err(dev, "Failed to set desired config\n"); - goto dcb_error; - } dev_info(dev, "DCB restored after reset\n"); ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { @@ -384,26 +356,32 @@ void ice_dcb_rebuild(struct ice_pf *pf) goto dcb_error; } + mutex_unlock(&pf->tc_mutex); + return; dcb_error: dev_err(dev, "Disabling DCB until new settings occur\n"); - prev_cfg = kzalloc(sizeof(*prev_cfg), GFP_KERNEL); - if (!prev_cfg) + err_cfg = kzalloc(sizeof(*err_cfg), GFP_KERNEL); + if (!err_cfg) { + mutex_unlock(&pf->tc_mutex); return; + } - prev_cfg->etscfg.willing = true; - prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW; - prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; - memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec)); + err_cfg->etscfg.willing = true; + err_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW; + err_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; + memcpy(&err_cfg->etsrec, &err_cfg->etscfg, sizeof(err_cfg->etsrec)); /* Coverity warns the return code of ice_pf_dcb_cfg() is not checked * here as is done for other calls to that function. That check is * not necessary since this is in this function's error cleanup path. * Suppress the Coverity warning with the following comment... */ /* coverity[check_return] */ - ice_pf_dcb_cfg(pf, prev_cfg, false); - kfree(prev_cfg); + ice_pf_dcb_cfg(pf, err_cfg, false); + kfree(err_cfg); + + mutex_unlock(&pf->tc_mutex); } /** @@ -434,9 +412,9 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked) } /** - * ice_dcb_sw_default_config - Apply a default DCB config + * ice_dcb_sw_dflt_cfg - Apply a default DCB config * @pf: PF to apply config to - * @ets_willing: configure ets willing + * @ets_willing: configure ETS willing * @locked: was this function called with RTNL held */ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) @@ -599,8 +577,7 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) goto dcb_init_err; } - dev_info(dev, - "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n", + dev_info(dev, "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n", pf->hw.func_caps.common_cap.maxtc); if (err) { struct ice_vsi *pf_vsi; @@ -610,8 +587,8 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags); err = ice_dcb_sw_dflt_cfg(pf, true, locked); if (err) { - dev_err(dev, - "Failed to set local DCB config %d\n", err); + dev_err(dev, "Failed to set local DCB config %d\n", + err); err = -EIO; goto dcb_init_err; } @@ -777,6 +754,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, } } + mutex_lock(&pf->tc_mutex); + /* store the old configuration */ tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg; @@ -787,20 +766,20 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, ret = ice_get_dcb_cfg(pf->hw.port_info); if (ret) { dev_err(dev, "Failed to get DCB config\n"); - return; + goto out; } /* No change detected in DCBX configs */ if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) { dev_dbg(dev, "No change detected in DCBX configuration.\n"); - return; + goto out; } need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg); ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg); if (!need_reconfig) - return; + goto out; /* Enable DCB tagging only when more than one TC */ if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) { @@ -814,7 +793,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, pf_vsi = ice_get_main_vsi(pf); if (!pf_vsi) { dev_dbg(dev, "PF VSI doesn't exist\n"); - return; + goto out; } rtnl_lock(); @@ -823,13 +802,15 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { dev_err(dev, "Query Port ETS failed\n"); - rtnl_unlock(); - return; + goto unlock_rtnl; } /* changes in configuration update VSI */ ice_pf_dcb_recfg(pf); ice_ena_vsi(pf_vsi, true); +unlock_rtnl: rtnl_unlock(); +out: + mutex_unlock(&pf->tc_mutex); } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index d870c1aedc17..b61aba428adb 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -297,8 +297,7 @@ ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting) return; *setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1; - dev_dbg(ice_pf_to_dev(pf), - "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n", + dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n", prio, *setting, pi->local_dcbx_cfg.pfc.pfcena); } @@ -418,8 +417,8 @@ ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio, return; *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio]; - dev_dbg(ice_pf_to_dev(pf), - "Get PG config prio=%d tc=%d\n", prio, *pgid); + dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio, + *pgid); } /** @@ -713,13 +712,13 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) return -EINVAL; mutex_lock(&pf->tc_mutex); - ret = dcb_ieee_delapp(netdev, app); - if (ret) - goto delapp_out; - old_cfg = &pf->hw.port_info->local_dcbx_cfg; - if (old_cfg->numapps == 1) + if (old_cfg->numapps <= 1) + goto delapp_out; + + ret = dcb_ieee_delapp(netdev, app); + if (ret) goto delapp_out; new_cfg = &pf->hw.port_info->desired_dcbx_cfg; @@ -882,8 +881,7 @@ ice_dcbnl_vsi_del_app(struct ice_vsi *vsi, sapp.protocol = app->prot_id; sapp.priority = app->priority; err = ice_dcbnl_delapp(vsi->netdev, &sapp); - dev_dbg(&vsi->back->pdev->dev, - "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n", + dev_dbg(ice_pf_to_dev(vsi->back), "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n", vsi->idx, err, app->selector, app->prot_id, app->priority); } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 90c6a3ca20c9..77c412a7e7a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -166,13 +166,24 @@ static void ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct ice_netdev_priv *np = netdev_priv(netdev); + u8 oem_ver, oem_patch, nvm_ver_hi, nvm_ver_lo; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u16 oem_build; strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version)); - strlcpy(drvinfo->fw_version, ice_nvm_version_str(&pf->hw), - sizeof(drvinfo->fw_version)); + + /* Display NVM version (from which the firmware version can be + * determined) which contains more pertinent information. + */ + ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch, + &nvm_ver_hi, &nvm_ver_lo); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%x.%02x 0x%x %d.%d.%d", nvm_ver_hi, nvm_ver_lo, + hw->nvm.eetrack, oem_ver, oem_build, oem_patch); + strlcpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; @@ -363,8 +374,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) val = rd32(hw, reg); if (val == pattern) continue; - dev_err(dev, - "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n" + dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n" , __func__, reg, pattern, val); return 1; } @@ -372,8 +382,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) wr32(hw, reg, orig_val); val = rd32(hw, reg); if (val != orig_val) { - dev_err(dev, - "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n" + dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n" , __func__, reg, orig_val, val); return 1; } @@ -791,8 +800,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, set_bit(__ICE_TESTING, pf->state); if (ice_active_vfs(pf)) { - dev_warn(dev, - "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); + dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); data[ICE_ETH_TEST_REG] = 1; data[ICE_ETH_TEST_EEPROM] = 1; data[ICE_ETH_TEST_INTR] = 1; @@ -1047,7 +1055,7 @@ ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) fec = ICE_FEC_NONE; break; default: - dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n", + dev_warn(ice_pf_to_dev(vsi->back), "Unsupported FEC mode: %d\n", fecparam->fec); return -EINVAL; } @@ -1200,8 +1208,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) * events to respond to. */ if (status) - dev_info(dev, - "Failed to unreg for LLDP events\n"); + dev_info(dev, "Failed to unreg for LLDP events\n"); /* The AQ call to stop the FW LLDP agent will generate * an error if the agent is already stopped. @@ -1256,8 +1263,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) /* Register for MIB change events */ status = ice_cfg_lldp_mib_change(&pf->hw, true); if (status) - dev_dbg(dev, - "Fail to enable MIB change events\n"); + dev_dbg(dev, "Fail to enable MIB change events\n"); } } if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) { @@ -1710,291 +1716,13 @@ ice_get_settings_link_up(struct ethtool_link_ksettings *ks, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_port_info *pi = np->vsi->port_info; - struct ethtool_link_ksettings cap_ksettings; struct ice_link_status *link_info; struct ice_vsi *vsi = np->vsi; - bool unrecog_phy_high = false; - bool unrecog_phy_low = false; link_info = &vsi->port_info->phy.link_info; - /* Initialize supported and advertised settings based on PHY settings */ - switch (link_info->phy_type_low) { - case ICE_PHY_TYPE_LOW_100BASE_TX: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100baseT_Full); - break; - case ICE_PHY_TYPE_LOW_100M_SGMII: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100baseT_Full); - break; - case ICE_PHY_TYPE_LOW_1000BASE_T: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 1000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_1G_SGMII: - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_1000BASE_SX: - case ICE_PHY_TYPE_LOW_1000BASE_LX: - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseX_Full); - break; - case ICE_PHY_TYPE_LOW_1000BASE_KX: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseKX_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 1000baseKX_Full); - break; - case ICE_PHY_TYPE_LOW_2500BASE_T: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 2500baseT_Full); - break; - case ICE_PHY_TYPE_LOW_2500BASE_X: - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseX_Full); - break; - case ICE_PHY_TYPE_LOW_2500BASE_KX: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseX_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 2500baseX_Full); - break; - case ICE_PHY_TYPE_LOW_5GBASE_T: - case ICE_PHY_TYPE_LOW_5GBASE_KR: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 5000baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 5000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_10GBASE_T: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseT_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 10000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_10G_SFI_DA: - case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC: - case ICE_PHY_TYPE_LOW_10G_SFI_C2C: - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseT_Full); - break; - case ICE_PHY_TYPE_LOW_10GBASE_SR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseSR_Full); - break; - case ICE_PHY_TYPE_LOW_10GBASE_LR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseLR_Full); - break; - case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseKR_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 10000baseKR_Full); - break; - case ICE_PHY_TYPE_LOW_25GBASE_T: - case ICE_PHY_TYPE_LOW_25GBASE_CR: - case ICE_PHY_TYPE_LOW_25GBASE_CR_S: - case ICE_PHY_TYPE_LOW_25GBASE_CR1: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseCR_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 25000baseCR_Full); - break; - case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC: - case ICE_PHY_TYPE_LOW_25G_AUI_C2C: - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseCR_Full); - break; - case ICE_PHY_TYPE_LOW_25GBASE_SR: - case ICE_PHY_TYPE_LOW_25GBASE_LR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseSR_Full); - break; - case ICE_PHY_TYPE_LOW_25GBASE_KR: - case ICE_PHY_TYPE_LOW_25GBASE_KR1: - case ICE_PHY_TYPE_LOW_25GBASE_KR_S: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseKR_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 25000baseKR_Full); - break; - case ICE_PHY_TYPE_LOW_40GBASE_CR4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseCR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 40000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC: - case ICE_PHY_TYPE_LOW_40G_XLAUI: - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_40GBASE_SR4: - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseSR4_Full); - break; - case ICE_PHY_TYPE_LOW_40GBASE_LR4: - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseLR4_Full); - break; - case ICE_PHY_TYPE_LOW_40GBASE_KR4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseKR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 40000baseKR4_Full); - break; - case ICE_PHY_TYPE_LOW_50GBASE_CR2: - case ICE_PHY_TYPE_LOW_50GBASE_CP: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseCR2_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 50000baseCR2_Full); - break; - case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC: - case ICE_PHY_TYPE_LOW_50G_LAUI2: - case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC: - case ICE_PHY_TYPE_LOW_50G_AUI2: - case ICE_PHY_TYPE_LOW_50GBASE_SR: - case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC: - case ICE_PHY_TYPE_LOW_50G_AUI1: - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseCR2_Full); - break; - case ICE_PHY_TYPE_LOW_50GBASE_KR2: - case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseKR2_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 50000baseKR2_Full); - break; - case ICE_PHY_TYPE_LOW_50GBASE_SR2: - case ICE_PHY_TYPE_LOW_50GBASE_LR2: - case ICE_PHY_TYPE_LOW_50GBASE_FR: - case ICE_PHY_TYPE_LOW_50GBASE_LR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseSR2_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_CR4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC: - case ICE_PHY_TYPE_LOW_100G_CAUI4: - case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC: - case ICE_PHY_TYPE_LOW_100G_AUI4: - case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_CP2: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100000baseCR4_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_SR4: - case ICE_PHY_TYPE_LOW_100GBASE_SR2: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseSR4_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_LR4: - case ICE_PHY_TYPE_LOW_100GBASE_DR: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseLR4_ER4_Full); - break; - case ICE_PHY_TYPE_LOW_100GBASE_KR4: - case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseKR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100000baseKR4_Full); - break; - default: - unrecog_phy_low = true; - } - - switch (link_info->phy_type_high) { - case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4: - ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseKR4_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); - ethtool_link_ksettings_add_link_mode(ks, advertising, - 100000baseKR4_Full); - break; - case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC: - case ICE_PHY_TYPE_HIGH_100G_CAUI2: - case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC: - case ICE_PHY_TYPE_HIGH_100G_AUI2: - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - break; - default: - unrecog_phy_high = true; - } - - if (unrecog_phy_low && unrecog_phy_high) { - /* if we got here and link is up something bad is afoot */ - netdev_info(netdev, - "WARNING: Unrecognized PHY_Low (0x%llx).\n", - (u64)link_info->phy_type_low); - netdev_info(netdev, - "WARNING: Unrecognized PHY_High (0x%llx).\n", - (u64)link_info->phy_type_high); - } - - /* Now that we've worked out everything that could be supported by the - * current PHY type, get what is supported by the NVM and intersect - * them to get what is truly supported - */ - memset(&cap_ksettings, 0, sizeof(cap_ksettings)); - ice_phy_type_to_ethtool(netdev, &cap_ksettings); - ethtool_intersect_link_masks(ks, &cap_ksettings); + /* Get supported and advertised settings from PHY ability with media */ + ice_phy_type_to_ethtool(netdev, ks); switch (link_info->link_speed) { case ICE_AQ_LINK_SPEED_100GB: @@ -2028,8 +1756,7 @@ ice_get_settings_link_up(struct ethtool_link_ksettings *ks, ks->base.speed = SPEED_100; break; default: - netdev_info(netdev, - "WARNING: Unrecognized link_speed (0x%x).\n", + netdev_info(netdev, "WARNING: Unrecognized link_speed (0x%x).\n", link_info->link_speed); break; } @@ -2845,13 +2572,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE); if (new_tx_cnt != ring->tx_pending) - netdev_info(netdev, - "Requested Tx descriptor count rounded up to %d\n", + netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", new_tx_cnt); new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE); if (new_rx_cnt != ring->rx_pending) - netdev_info(netdev, - "Requested Rx descriptor count rounded up to %d\n", + netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", new_rx_cnt); /* if nothing to do return success */ @@ -3211,13 +2936,6 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) else return -EINVAL; - /* Tell the OS link is going down, the link will go back up when fw - * says it is ready asynchronously - */ - ice_print_link_msg(vsi, false); - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - /* Set the FC mode and only restart AN if link is up */ status = ice_set_fc(pi, &aq_failures, link_up); @@ -3718,8 +3436,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) { - netdev_info(vsi->netdev, - "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n", + netdev_info(vsi->netdev, "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n", c_type_str, pf->hw.intrl_gran, ICE_MAX_INTRL); return -EINVAL; @@ -3737,8 +3454,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, break; case ICE_TX_CONTAINER: if (ec->tx_coalesce_usecs_high) { - netdev_info(vsi->netdev, - "setting %s-usecs-high is not supported\n", + netdev_info(vsi->netdev, "setting %s-usecs-high is not supported\n", c_type_str); return -EINVAL; } @@ -3755,35 +3471,24 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; if (coalesce_usecs != itr_setting && use_adaptive_coalesce) { - netdev_info(vsi->netdev, - "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n", + netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n", c_type_str, c_type_str); return -EINVAL; } if (coalesce_usecs > ICE_ITR_MAX) { - netdev_info(vsi->netdev, - "Invalid value, %s-usecs range is 0-%d\n", + netdev_info(vsi->netdev, "Invalid value, %s-usecs range is 0-%d\n", c_type_str, ICE_ITR_MAX); return -EINVAL; } - /* hardware only supports an ITR granularity of 2us */ - if (coalesce_usecs % 2 != 0) { - netdev_info(vsi->netdev, - "Invalid value, %s-usecs must be even\n", - c_type_str); - return -EINVAL; - } - if (use_adaptive_coalesce) { rc->itr_setting |= ICE_ITR_DYNAMIC; } else { - /* store user facing value how it was set */ + /* save the user set usecs */ rc->itr_setting = coalesce_usecs; - /* set to static and convert to value HW understands */ - rc->target_itr = - ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting)); + /* device ITR granularity is in 2 usec increments */ + rc->target_itr = ITR_REG_ALIGN(rc->itr_setting); } return 0; @@ -3877,6 +3582,30 @@ ice_is_coalesce_param_invalid(struct net_device *netdev, } /** + * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs + * @netdev: netdev used for print + * @itr_setting: previous user setting + * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled + * @coalesce_usecs: requested value of [tx|rx]-usecs + * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs + */ +static void +ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting, + u32 use_adaptive_coalesce, u32 coalesce_usecs, + const char *c_type_str) +{ + if (use_adaptive_coalesce) + return; + + itr_setting = ITR_TO_REG(itr_setting); + + if (itr_setting != coalesce_usecs && (coalesce_usecs % 2)) + netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n", + c_type_str, coalesce_usecs, c_type_str, + ITR_REG_ALIGN(coalesce_usecs)); +} + +/** * __ice_set_coalesce - set ITR/INTRL values for the device * @netdev: pointer to the netdev associated with this query * @ec: ethtool structure to fill with driver's coalesce settings @@ -3896,8 +3625,19 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, return -EINVAL; if (q_num < 0) { + struct ice_q_vector *q_vector = vsi->q_vectors[0]; int v_idx; + if (q_vector) { + ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting, + ec->use_adaptive_rx_coalesce, + ec->rx_coalesce_usecs, "rx"); + + ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting, + ec->use_adaptive_tx_coalesce, + ec->tx_coalesce_usecs, "tx"); + } + ice_for_each_q_vector(vsi, v_idx) { /* In some cases if DCB is configured the num_[rx|tx]q * can be less than vsi->num_q_vectors. This check @@ -4012,8 +3752,7 @@ ice_get_module_info(struct net_device *netdev, } break; default: - netdev_warn(netdev, - "SFF Module Type not recognized.\n"); + netdev_warn(netdev, "SFF Module Type not recognized.\n"); return -EINVAL; } return 0; @@ -4081,11 +3820,11 @@ ice_get_module_eeprom(struct net_device *netdev, static const struct ethtool_ops ice_ethtool_ops = { .get_link_ksettings = ice_get_link_ksettings, .set_link_ksettings = ice_set_link_ksettings, - .get_drvinfo = ice_get_drvinfo, - .get_regs_len = ice_get_regs_len, - .get_regs = ice_get_regs, - .get_msglevel = ice_get_msglevel, - .set_msglevel = ice_set_msglevel, + .get_drvinfo = ice_get_drvinfo, + .get_regs_len = ice_get_regs_len, + .get_regs = ice_get_regs, + .get_msglevel = ice_get_msglevel, + .set_msglevel = ice_set_msglevel, .self_test = ice_self_test, .get_link = ethtool_op_get_link, .get_eeprom_len = ice_get_eeprom_len, @@ -4112,8 +3851,8 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_channels = ice_get_channels, .set_channels = ice_set_channels, .get_ts_info = ethtool_op_get_ts_info, - .get_per_queue_coalesce = ice_get_per_q_coalesce, - .set_per_queue_coalesce = ice_set_per_q_coalesce, + .get_per_queue_coalesce = ice_get_per_q_coalesce, + .set_per_queue_coalesce = ice_set_per_q_coalesce, .get_fecparam = ice_get_fecparam, .set_fecparam = ice_set_fecparam, .get_module_info = ice_get_module_info, diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index f2cababf2561..6db3d0494127 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -267,8 +267,14 @@ #define GLNVM_GENS_SR_SIZE_S 5 #define GLNVM_GENS_SR_SIZE_M ICE_M(0x7, 5) #define GLNVM_ULD 0x000B6008 +#define GLNVM_ULD_PCIER_DONE_M BIT(0) +#define GLNVM_ULD_PCIER_DONE_1_M BIT(1) #define GLNVM_ULD_CORER_DONE_M BIT(3) #define GLNVM_ULD_GLOBR_DONE_M BIT(4) +#define GLNVM_ULD_POR_DONE_M BIT(5) +#define GLNVM_ULD_POR_DONE_1_M BIT(8) +#define GLNVM_ULD_PCIER_DONE_2_M BIT(9) +#define GLNVM_ULD_PE_DONE_M BIT(10) #define GLPCI_CNF2 0x000BE004 #define GLPCI_CNF2_CACHELINE_SIZE_M BIT(1) #define PF_FUNC_RID 0x0009E880 @@ -331,7 +337,6 @@ #define GLV_TEPC(_VSI) (0x00312000 + ((_VSI) * 4)) #define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8)) #define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8)) -#define PF_VT_PFALLOC_HIF 0x0009DD80 #define VSIQF_HKEY_MAX_INDEX 12 #define VSIQF_HLUT_MAX_INDEX 15 #define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 1874c9f51a32..d974e2fa3e63 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -117,8 +117,7 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC; break; default: - dev_dbg(&vsi->back->pdev->dev, - "Not setting number of Tx/Rx descriptors for VSI type %d\n", + dev_dbg(ice_pf_to_dev(vsi->back), "Not setting number of Tx/Rx descriptors for VSI type %d\n", vsi->type); break; } @@ -724,7 +723,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) vsi->num_txq = tx_count; if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) { - dev_dbg(&vsi->back->pdev->dev, "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n"); + dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n"); /* since there is a chance that num_rxq could have been changed * in the above for loop, make num_txq equal to num_rxq. */ @@ -929,8 +928,7 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); if (vsi->base_vector < 0) { - dev_err(dev, - "Failed to get tracking for %d vectors for VSI %d, err=%d\n", + dev_err(dev, "Failed to get tracking for %d vectors for VSI %d, err=%d\n", num_q_vectors, vsi->vsi_num, vsi->base_vector); return -ENOENT; } @@ -1232,8 +1230,9 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi) * * Returns 0 on success or ENOMEM on failure. */ -int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, - const u8 *macaddr) +int +ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, + const u8 *macaddr) { struct ice_fltr_list_entry *tmp; struct ice_pf *pf = vsi->back; @@ -1392,12 +1391,10 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) status = ice_remove_vlan(&pf->hw, &tmp_add_list); if (status == ICE_ERR_DOES_NOT_EXIST) { - dev_dbg(dev, - "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n", + dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n", vid, vsi->vsi_num, status); } else if (status) { - dev_err(dev, - "Error removing VLAN %d on vsi %i error: %d\n", + dev_err(dev, "Error removing VLAN %d on vsi %i error: %d\n", vid, vsi->vsi_num, status); err = -EIO; } @@ -1453,8 +1450,7 @@ setup_rings: err = ice_setup_rx_ctx(vsi->rx_rings[i]); if (err) { - dev_err(&vsi->back->pdev->dev, - "ice_setup_rx_ctx failed for RxQ %d, err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n", i, err); return err; } @@ -1623,7 +1619,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(&vsi->back->pdev->dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -1669,7 +1665,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(&vsi->back->pdev->dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", ena, status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -1834,8 +1830,7 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi) struct ice_q_vector *q_vector = vsi->q_vectors[i]; if (!q_vector) { - dev_err(&vsi->back->pdev->dev, - "Failed to set reg_idx on q_vector %d VSI %d\n", + dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n", i, vsi->vsi_num); goto clear_reg_idx; } @@ -1898,8 +1893,7 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule) status = ice_remove_eth_mac(&pf->hw, &tmp_add_list); if (status) - dev_err(dev, - "Failure Adding or Removing Ethertype on VSI %i error: %d\n", + dev_err(dev, "Failure Adding or Removing Ethertype on VSI %i error: %d\n", vsi->vsi_num, status); ice_free_fltr_list(dev, &tmp_add_list); @@ -2384,8 +2378,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) return -EINVAL; if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) { - dev_err(ice_pf_to_dev(pf), - "param err: needed=%d, num_entries = %d id=0x%04x\n", + dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n", needed, res->num_entries, id); return -EINVAL; } @@ -2686,7 +2679,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_put_qs(vsi); ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); - ice_dev_onetime_setup(&pf->hw); if (vsi->type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf->vf_id); else @@ -2765,8 +2757,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { - dev_err(ice_pf_to_dev(pf), - "VSI %d failed lan queue config, error %d\n", + dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %d\n", vsi->vsi_num, status); if (init_vsi) { ret = -EIO; @@ -2834,8 +2825,8 @@ static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx) int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; - struct ice_vsi_ctx *ctx; struct ice_pf *pf = vsi->back; + struct ice_vsi_ctx *ctx; enum ice_status status; struct device *dev; int i, ret = 0; @@ -2892,25 +2883,6 @@ out: #endif /* CONFIG_DCB */ /** - * ice_nvm_version_str - format the NVM version strings - * @hw: ptr to the hardware info - */ -char *ice_nvm_version_str(struct ice_hw *hw) -{ - u8 oem_ver, oem_patch, ver_hi, ver_lo; - static char buf[ICE_NVM_VER_LEN]; - u16 oem_build; - - ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch, &ver_hi, - &ver_lo); - - snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d", ver_hi, ver_lo, - hw->nvm.eetrack, oem_ver, oem_build, oem_patch); - - return buf; -} - -/** * ice_update_ring_stats - Update ring statistics * @ring: ring to update * @cont: used to increment per-vector counters @@ -2981,7 +2953,7 @@ ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set) status = ice_remove_mac(&vsi->back->hw, &tmp_add_list); cfg_mac_fltr_exit: - ice_free_fltr_list(&vsi->back->pdev->dev, &tmp_add_list); + ice_free_fltr_list(ice_pf_to_dev(vsi->back), &tmp_add_list); return status; } @@ -3043,16 +3015,14 @@ int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi) /* another VSI is already the default VSI for this switch */ if (ice_is_dflt_vsi_in_use(sw)) { - dev_err(dev, - "Default forwarding VSI %d already in use, disable it and try again\n", + dev_err(dev, "Default forwarding VSI %d already in use, disable it and try again\n", sw->dflt_vsi->vsi_num); return -EEXIST; } status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX); if (status) { - dev_err(dev, - "Failed to set VSI %d as the default forwarding VSI, error %d\n", + dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %d\n", vsi->vsi_num, status); return -EIO; } @@ -3091,8 +3061,7 @@ int ice_clear_dflt_vsi(struct ice_sw *sw) status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false, ICE_FLTR_RX); if (status) { - dev_err(dev, - "Failed to clear the default forwarding VSI %d, error %d\n", + dev_err(dev, "Failed to clear the default forwarding VSI %d, error %d\n", dflt_vsi->vsi_num, status); return -EIO; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 68fd0d4505c2..e2c0dadce920 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -97,8 +97,6 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran); -char *ice_nvm_version_str(struct ice_hw *hw); - enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5ae671609f98..5ef28052c0f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -162,8 +162,7 @@ unregister: * had an error */ if (status && vsi->netdev->reg_state == NETREG_REGISTERED) { - dev_err(ice_pf_to_dev(pf), - "Could not add MAC filters error %d. Unregistering device\n", + dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %d. Unregistering device\n", status); unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); @@ -269,7 +268,7 @@ static int ice_cfg_promisc(struct ice_vsi *vsi, u8 promisc_m, bool set_promisc) */ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) { - struct device *dev = &vsi->back->pdev->dev; + struct device *dev = ice_pf_to_dev(vsi->back); struct net_device *netdev = vsi->netdev; bool promisc_forced_on = false; struct ice_pf *pf = vsi->back; @@ -335,8 +334,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC, vsi->state)) { promisc_forced_on = true; - netdev_warn(netdev, - "Reached MAC filter limit, forcing promisc mode on VSI %d\n", + netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n", vsi->vsi_num); } else { err = -EIO; @@ -382,8 +380,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) if (!ice_is_dflt_vsi_in_use(pf->first_sw)) { err = ice_set_dflt_vsi(pf->first_sw, vsi); if (err && err != -EEXIST) { - netdev_err(netdev, - "Error %d setting default VSI %i Rx rule\n", + netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n", err, vsi->vsi_num); vsi->current_netdev_flags &= ~IFF_PROMISC; @@ -395,8 +392,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) { err = ice_clear_dflt_vsi(pf->first_sw); if (err) { - netdev_err(netdev, - "Error %d clearing default VSI %i Rx rule\n", + netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n", err, vsi->vsi_num); vsi->current_netdev_flags |= IFF_PROMISC; @@ -752,7 +748,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) kfree(caps); done: - netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Autoneg: %s, Flow Control: %s\n", + netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n", speed, fec_req, fec, an, fc); ice_print_topo_conflict(vsi); } @@ -815,8 +811,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, */ result = ice_update_link_info(pi); if (result) - dev_dbg(dev, - "Failed to update link status and re-enable link events for port %d\n", + dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n", pi->lport); /* if the old link up/down and speed is the same as the new */ @@ -834,13 +829,13 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, result = ice_aq_set_link_restart_an(pi, false, NULL); if (result) { - dev_dbg(dev, - "Failed to set link down, VSI %d error %d\n", + dev_dbg(dev, "Failed to set link down, VSI %d error %d\n", vsi->vsi_num, result); return result; } } + ice_dcb_rebuild(pf); ice_vsi_link_event(vsi, link_up); ice_print_link_msg(vsi, link_up); @@ -892,15 +887,13 @@ static int ice_init_link_events(struct ice_port_info *pi) ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { - dev_dbg(ice_hw_to_dev(pi->hw), - "Failed to set link event mask for port %d\n", + dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n", pi->lport); return -EIO; } if (ice_aq_get_link_info(pi, true, NULL, NULL)) { - dev_dbg(ice_hw_to_dev(pi->hw), - "Failed to enable link events for port %d\n", + dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n", pi->lport); return -EIO; } @@ -929,8 +922,8 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) !!(link_data->link_info & ICE_AQ_LINK_UP), le16_to_cpu(link_data->link_speed)); if (status) - dev_dbg(ice_pf_to_dev(pf), - "Could not process link event, error %d\n", status); + dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n", + status); return status; } @@ -979,13 +972,11 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) dev_dbg(dev, "%s Receive Queue VF Error detected\n", qtype); if (val & PF_FW_ARQLEN_ARQOVFL_M) { - dev_dbg(dev, - "%s Receive Queue Overflow Error detected\n", + dev_dbg(dev, "%s Receive Queue Overflow Error detected\n", qtype); } if (val & PF_FW_ARQLEN_ARQCRIT_M) - dev_dbg(dev, - "%s Receive Queue Critical Error detected\n", + dev_dbg(dev, "%s Receive Queue Critical Error detected\n", qtype); val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M | PF_FW_ARQLEN_ARQCRIT_M); @@ -998,8 +989,8 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) PF_FW_ATQLEN_ATQCRIT_M)) { oldval = val; if (val & PF_FW_ATQLEN_ATQVFE_M) - dev_dbg(dev, - "%s Send Queue VF Error detected\n", qtype); + dev_dbg(dev, "%s Send Queue VF Error detected\n", + qtype); if (val & PF_FW_ATQLEN_ATQOVFL_M) { dev_dbg(dev, "%s Send Queue Overflow Error detected\n", qtype); @@ -1048,8 +1039,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) ice_dcb_process_lldp_set_mib_change(pf, &event); break; default: - dev_dbg(dev, - "%s Receive Queue unknown event 0x%04x ignored\n", + dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n", qtype, opcode); break; } @@ -1238,7 +1228,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S); - if (netif_msg_rx_err(pf)) + if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); @@ -1335,8 +1325,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) vf->num_mdd_events++; if (vf->num_mdd_events && vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD) - dev_info(dev, - "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n", + dev_info(dev, "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n", i, vf->num_mdd_events); } } @@ -1367,7 +1356,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) if (vsi->type != ICE_VSI_PF) return 0; - dev = &vsi->back->pdev->dev; + dev = ice_pf_to_dev(vsi->back); pi = vsi->port_info; @@ -1378,8 +1367,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, NULL); if (retcode) { - dev_err(dev, - "Failed to get phy capabilities, VSI %d error %d\n", + dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n", vsi->vsi_num, retcode); retcode = -EIO; goto out; @@ -1649,8 +1637,8 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0, q_vector->name, q_vector); if (err) { - netdev_err(vsi->netdev, - "MSIX request_irq failed, error: %d\n", err); + netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n", + err); goto free_q_irqs; } @@ -1685,7 +1673,7 @@ free_q_irqs: */ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) { - struct device *dev = &vsi->back->pdev->dev; + struct device *dev = ice_pf_to_dev(vsi->back); int i; for (i = 0; i < vsi->num_xdp_txq; i++) { @@ -2664,14 +2652,12 @@ static void ice_set_pf_caps(struct ice_pf *pf) clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); if (func_caps->common_cap.dcb) set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); -#ifdef CONFIG_PCI_IOV clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); if (func_caps->common_cap.sr_iov_1_1) { set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs, ICE_MAX_VF_COUNT); } -#endif /* CONFIG_PCI_IOV */ clear_bit(ICE_FLAG_RSS_ENA, pf->flags); if (func_caps->common_cap.rss_table_size) set_bit(ICE_FLAG_RSS_ENA, pf->flags); @@ -2764,8 +2750,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) } if (v_actual < v_budget) { - dev_warn(dev, - "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", + dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", v_budget, v_actual); /* 2 vectors for LAN (traffic + OICR) */ #define ICE_MIN_LAN_VECS 2 @@ -2787,8 +2772,7 @@ msix_err: goto exit_err; no_hw_vecs_left_err: - dev_err(dev, - "not enough device MSI-X vectors. requested = %d, available = %d\n", + dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n", needed, v_left); err = -ERANGE; exit_err: @@ -2921,16 +2905,14 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status) !memcmp(hw->pkg_name, hw->active_pkg_name, sizeof(hw->pkg_name))) { if (hw->pkg_dwnld_status == ICE_AQ_RC_EEXIST) - dev_info(dev, - "DDP package already present on device: %s version %d.%d.%d.%d\n", + dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n", hw->active_pkg_name, hw->active_pkg_ver.major, hw->active_pkg_ver.minor, hw->active_pkg_ver.update, hw->active_pkg_ver.draft); else - dev_info(dev, - "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n", + dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n", hw->active_pkg_name, hw->active_pkg_ver.major, hw->active_pkg_ver.minor, @@ -2938,8 +2920,7 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status) hw->active_pkg_ver.draft); } else if (hw->active_pkg_ver.major != ICE_PKG_SUPP_VER_MAJ || hw->active_pkg_ver.minor != ICE_PKG_SUPP_VER_MNR) { - dev_err(dev, - "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", + dev_err(dev, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n", hw->active_pkg_name, hw->active_pkg_ver.major, hw->active_pkg_ver.minor, @@ -2947,8 +2928,7 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status) *status = ICE_ERR_NOT_SUPPORTED; } else if (hw->active_pkg_ver.major == ICE_PKG_SUPP_VER_MAJ && hw->active_pkg_ver.minor == ICE_PKG_SUPP_VER_MNR) { - dev_info(dev, - "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n", + dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n", hw->active_pkg_name, hw->active_pkg_ver.major, hw->active_pkg_ver.minor, @@ -2960,54 +2940,46 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status) hw->pkg_ver.update, hw->pkg_ver.draft); } else { - dev_err(dev, - "An unknown error occurred when loading the DDP package, please reboot the system. If the problem persists, update the NVM. Entering Safe Mode.\n"); + dev_err(dev, "An unknown error occurred when loading the DDP package, please reboot the system. If the problem persists, update the NVM. Entering Safe Mode.\n"); *status = ICE_ERR_NOT_SUPPORTED; } break; case ICE_ERR_BUF_TOO_SHORT: /* fall-through */ case ICE_ERR_CFG: - dev_err(dev, - "The DDP package file is invalid. Entering Safe Mode.\n"); + dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n"); break; case ICE_ERR_NOT_SUPPORTED: /* Package File version not supported */ if (hw->pkg_ver.major > ICE_PKG_SUPP_VER_MAJ || (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ && hw->pkg_ver.minor > ICE_PKG_SUPP_VER_MNR)) - dev_err(dev, - "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n"); + dev_err(dev, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n"); else if (hw->pkg_ver.major < ICE_PKG_SUPP_VER_MAJ || (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ && hw->pkg_ver.minor < ICE_PKG_SUPP_VER_MNR)) - dev_err(dev, - "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n", + dev_err(dev, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n", ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR); break; case ICE_ERR_AQ_ERROR: switch (hw->pkg_dwnld_status) { case ICE_AQ_RC_ENOSEC: case ICE_AQ_RC_EBADSIG: - dev_err(dev, - "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n"); + dev_err(dev, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n"); return; case ICE_AQ_RC_ESVN: - dev_err(dev, - "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n"); + dev_err(dev, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n"); return; case ICE_AQ_RC_EBADMAN: case ICE_AQ_RC_EBADBUF: - dev_err(dev, - "An error occurred on the device while loading the DDP package. The device will be reset.\n"); + dev_err(dev, "An error occurred on the device while loading the DDP package. The device will be reset.\n"); return; default: break; } /* fall-through */ default: - dev_err(dev, - "An unknown error (%d) occurred when loading the DDP package. Entering Safe Mode.\n", + dev_err(dev, "An unknown error (%d) occurred when loading the DDP package. Entering Safe Mode.\n", *status); break; } @@ -3038,8 +3010,7 @@ ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf) status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size); ice_log_pkg_init(hw, &status); } else { - dev_err(dev, - "The DDP package file failed to load. Entering Safe Mode.\n"); + dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n"); } if (status) { @@ -3065,8 +3036,7 @@ ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf) static void ice_verify_cacheline_size(struct ice_pf *pf) { if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M) - dev_warn(ice_pf_to_dev(pf), - "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n", + dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n", ICE_CACHE_LINE_BYTES); } @@ -3159,8 +3129,7 @@ static void ice_request_fw(struct ice_pf *pf) dflt_pkg_load: err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev); if (err) { - dev_err(dev, - "The DDP package file was not found or could not be read. Entering Safe Mode\n"); + dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n"); return; } @@ -3184,7 +3153,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) struct ice_hw *hw; int err; - /* this driver uses devres, see Documentation/driver-api/driver-model/devres.rst */ + /* this driver uses devres, see + * Documentation/driver-api/driver-model/devres.rst + */ err = pcim_enable_device(pdev); if (err) return err; @@ -3245,11 +3216,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_exit_unroll; } - dev_info(dev, "firmware %d.%d.%d api %d.%d.%d nvm %s build 0x%08x\n", - hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch, - hw->api_maj_ver, hw->api_min_ver, hw->api_patch, - ice_nvm_version_str(hw), hw->fw_build); - ice_request_fw(pf); /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be @@ -3257,8 +3223,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) * true */ if (ice_is_safe_mode(pf)) { - dev_err(dev, - "Package download failed. Advanced features disabled - Device now in Safe Mode\n"); + dev_err(dev, "Package download failed. Advanced features disabled - Device now in Safe Mode\n"); /* we already got function/device capabilities but these don't * reflect what the driver needs to do in safe mode. Instead of * adding conditional logic everywhere to ignore these @@ -3335,8 +3300,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) /* tell the firmware we are up */ err = ice_send_version(pf); if (err) { - dev_err(dev, - "probe failed sending driver version %s. error: %d\n", + dev_err(dev, "probe failed sending driver version %s. error: %d\n", ice_drv_ver, err); goto err_alloc_sw_unroll; } @@ -3477,8 +3441,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) err = pci_enable_device_mem(pdev); if (err) { - dev_err(&pdev->dev, - "Cannot re-enable PCI device after reset, error %d\n", + dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n", err); result = PCI_ERS_RESULT_DISCONNECT; } else { @@ -3497,8 +3460,7 @@ static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) err = pci_cleanup_aer_uncorrect_error_status(pdev); if (err) - dev_dbg(&pdev->dev, - "pci_cleanup_aer_uncorrect_error_status failed, error %d\n", + dev_dbg(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status failed, error %d\n", err); /* non-fatal, continue */ @@ -3517,8 +3479,8 @@ static void ice_pci_err_resume(struct pci_dev *pdev) struct ice_pf *pf = pci_get_drvdata(pdev); if (!pf) { - dev_err(&pdev->dev, - "%s failed, device is unrecoverable\n", __func__); + dev_err(&pdev->dev, "%s failed, device is unrecoverable\n", + __func__); return; } @@ -3766,8 +3728,7 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) /* Validate maxrate requested is within permitted range */ if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) { - netdev_err(netdev, - "Invalid max rate %d specified for the queue %d\n", + netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n", maxrate, queue_index); return -EINVAL; } @@ -3783,8 +3744,8 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc, q_handle, ICE_MAX_BW, maxrate * 1000); if (status) { - netdev_err(netdev, - "Unable to set Tx max rate, error %d\n", status); + netdev_err(netdev, "Unable to set Tx max rate, error %d\n", + status); return -EIO; } @@ -3876,15 +3837,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) /* Don't set any netdev advanced features with device in Safe Mode */ if (ice_is_safe_mode(vsi->back)) { - dev_err(&vsi->back->pdev->dev, - "Device is in Safe Mode - not enabling advanced netdev features\n"); + dev_err(ice_pf_to_dev(vsi->back), "Device is in Safe Mode - not enabling advanced netdev features\n"); return ret; } /* Do not change setting during reset */ if (ice_is_reset_in_progress(pf->state)) { - dev_err(&vsi->back->pdev->dev, - "Device is resetting, changing advanced netdev features temporarily unavailable.\n"); + dev_err(ice_pf_to_dev(vsi->back), "Device is resetting, changing advanced netdev features temporarily unavailable.\n"); return -EBUSY; } @@ -4372,21 +4331,18 @@ int ice_down(struct ice_vsi *vsi) tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0); if (tx_err) - netdev_err(vsi->netdev, - "Failed stop Tx rings, VSI %d error %d\n", + netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n", vsi->vsi_num, tx_err); if (!tx_err && ice_is_xdp_ena_vsi(vsi)) { tx_err = ice_vsi_stop_xdp_tx_rings(vsi); if (tx_err) - netdev_err(vsi->netdev, - "Failed stop XDP rings, VSI %d error %d\n", + netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n", vsi->vsi_num, tx_err); } rx_err = ice_vsi_stop_rx_rings(vsi); if (rx_err) - netdev_err(vsi->netdev, - "Failed stop Rx rings, VSI %d error %d\n", + netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n", vsi->vsi_num, rx_err); ice_napi_disable_all(vsi); @@ -4394,8 +4350,7 @@ int ice_down(struct ice_vsi *vsi) if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) { link_err = ice_force_phys_link_state(vsi, false); if (link_err) - netdev_err(vsi->netdev, - "Failed to set physical link down, VSI %d error %d\n", + netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n", vsi->vsi_num, link_err); } @@ -4406,8 +4361,7 @@ int ice_down(struct ice_vsi *vsi) ice_clean_rx_ring(vsi->rx_rings[i]); if (tx_err || rx_err || link_err) { - netdev_err(vsi->netdev, - "Failed to close VSI 0x%04X on switch 0x%04X\n", + netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); return -EIO; } @@ -4426,7 +4380,7 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) int i, err = 0; if (!vsi->num_txq) { - dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", + dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n", vsi->vsi_num); return -EINVAL; } @@ -4457,7 +4411,7 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) int i, err = 0; if (!vsi->num_rxq) { - dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", + dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n", vsi->vsi_num); return -EINVAL; } @@ -4554,8 +4508,7 @@ static void ice_vsi_release_all(struct ice_pf *pf) err = ice_vsi_release(pf->vsi[i]); if (err) - dev_dbg(ice_pf_to_dev(pf), - "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", + dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", i, err, pf->vsi[i]->vsi_num); } } @@ -4582,8 +4535,7 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) /* rebuild the VSI */ err = ice_vsi_rebuild(vsi, true); if (err) { - dev_err(dev, - "rebuild VSI failed, err %d, VSI index %d, type %s\n", + dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n", err, vsi->idx, ice_vsi_type_str(type)); return err; } @@ -4591,8 +4543,7 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) /* replay filters for the VSI */ status = ice_replay_vsi(&pf->hw, vsi->idx); if (status) { - dev_err(dev, - "replay VSI failed, status %d, VSI index %d, type %s\n", + dev_err(dev, "replay VSI failed, status %d, VSI index %d, type %s\n", status, vsi->idx, ice_vsi_type_str(type)); return -EIO; } @@ -4605,8 +4556,7 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) /* enable the VSI */ err = ice_ena_vsi(vsi, false); if (err) { - dev_err(dev, - "enable VSI failed, err %d, VSI index %d, type %s\n", + dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n", err, vsi->idx, ice_vsi_type_str(type)); return err; } @@ -4684,8 +4634,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) } if (pf->first_sw->dflt_vsi_ena) - dev_info(dev, - "Clearing default VSI, re-enable after reset completes\n"); + dev_info(dev, "Clearing default VSI, re-enable after reset completes\n"); /* clear the default VSI configuration if it exists */ pf->first_sw->dflt_vsi = NULL; pf->first_sw->dflt_vsi_ena = false; @@ -4736,8 +4685,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) /* tell the firmware we are up */ ret = ice_send_version(pf); if (ret) { - dev_err(dev, - "Rebuild failed due to error sending driver version: %d\n", + dev_err(dev, "Rebuild failed due to error sending driver version: %d\n", ret); goto err_vsi_rebuild; } @@ -4993,7 +4941,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(&vsi->back->pdev->dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", bmode, status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -5185,8 +5133,7 @@ int ice_open(struct net_device *netdev) if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { err = ice_force_phys_link_state(vsi, true); if (err) { - netdev_err(netdev, - "Failed to set physical link up, error %d\n", + netdev_err(netdev, "Failed to set physical link up, error %d\n", err); return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index fd17ace6b226..4de61dbedd36 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -644,7 +644,7 @@ static bool ice_page_is_reserved(struct page *page) * Update the offset within page so that Rx buf will be ready to be reused. * For systems with PAGE_SIZE < 8192 this function will flip the page offset * so the second half of page assigned to Rx buffer will be used, otherwise - * the offset is moved by the @size bytes + * the offset is moved by "size" bytes */ static void ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) @@ -1078,8 +1078,6 @@ construct_skb: skb = ice_build_skb(rx_ring, rx_buf, &xdp); else skb = ice_construct_skb(rx_ring, rx_buf, &xdp); - } else { - skb = ice_construct_skb(rx_ring, rx_buf, &xdp); } /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -1621,11 +1619,11 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, { u64 td_offset, td_tag, td_cmd; u16 i = tx_ring->next_to_use; - skb_frag_t *frag; unsigned int data_len, size; struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; struct sk_buff *skb; + skb_frag_t *frag; dma_addr_t dma; td_tag = off->td_l2tag1; @@ -1738,9 +1736,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, ice_maybe_stop_tx(tx_ring, DESC_NEEDED); /* notify HW of packet */ - if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { + if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) writel(i, tx_ring->tail); - } return; @@ -2078,7 +2075,7 @@ static bool __ice_chk_linearize(struct sk_buff *skb) frag = &skb_shinfo(skb)->frags[0]; /* Initialize size to the negative value of gso_size minus 1. We - * use this as the worst case scenerio in which the frag ahead + * use this as the worst case scenario in which the frag ahead * of us only provides one byte which is why we are limited to 6 * descriptors for a single transmit as the header and previous * fragment are already consuming 2 descriptors. diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index a86270696df1..7ee00a128663 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -33,8 +33,8 @@ * frame. * * Note: For cache line sizes 256 or larger this value is going to end - * up negative. In these cases we should fall back to the legacy - * receive path. + * up negative. In these cases we should fall back to the legacy + * receive path. */ #if (PAGE_SIZE < 8192) #define ICE_2K_TOO_SMALL_WITH_PADDING \ @@ -222,7 +222,7 @@ enum ice_rx_dtype { #define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */ #define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S) #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ -#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK) +#define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK) #define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 #define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002 diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 35bbc4ff603c..6da048a6ca7c 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -10,7 +10,7 @@ */ void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) { - u16 prev_ntu = rx_ring->next_to_use; + u16 prev_ntu = rx_ring->next_to_use & ~0x7; rx_ring->next_to_use = val; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index b361ffabb0ca..db0ef6ba907f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -517,7 +517,7 @@ struct ice_hw { struct ice_fw_log_cfg fw_log; /* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL - * register. Used for determining the ITR/intrl granularity during + * register. Used for determining the ITR/INTRL granularity during * initialization. */ #define ICE_MAX_AGG_BW_200G 0x0 diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 82b1e7a4cb92..75c70d432c72 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -199,8 +199,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0); else - dev_err(dev, - "Scattered mode for VF Rx queues is not yet implemented\n"); + dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n"); } /** @@ -402,8 +401,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) if ((reg & VF_TRANS_PENDING_M) == 0) break; - dev_err(dev, - "VF %d PCI transactions stuck\n", vf->vf_id); + dev_err(dev, "VF %d PCI transactions stuck\n", vf->vf_id); udelay(ICE_PCI_CIAD_WAIT_DELAY_US); } } @@ -462,7 +460,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_info(&vsi->back->pdev->dev, "update VSI for port VLAN failed, err %d aq_err %d\n", + dev_info(ice_pf_to_dev(vsi->back), "update VSI for port VLAN failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -1095,7 +1093,6 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) * finished resetting. */ for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) { - /* Check each VF in sequence */ while (v < pf->num_alloc_vfs) { u32 reg; @@ -1553,8 +1550,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id, v_opcode, v_retval); if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) { - dev_err(dev, - "Number of invalid messages exceeded for VF %d\n", + dev_err(dev, "Number of invalid messages exceeded for VF %d\n", vf->vf_id); dev_err(dev, "Use PF Control I/F to enable the VF\n"); set_bit(ICE_VF_STATE_DIS, vf->vf_states); @@ -1569,8 +1565,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval, msg, msglen, NULL); if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) { - dev_info(dev, - "Unable to send the message to VF %d ret %d aq_err %d\n", + dev_info(dev, "Unable to send the message to VF %d ret %d aq_err %d\n", vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status); return -EIO; } @@ -1879,6 +1874,48 @@ error_param: } /** + * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset + * @vf: The VF being resseting + * + * The max poll time is about ~800ms, which is about the maximum time it takes + * for a VF to be reset and/or a VF driver to be removed. + */ +static void ice_wait_on_vf_reset(struct ice_vf *vf) +{ + int i; + + for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) { + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) + break; + msleep(ICE_MAX_VF_RESET_SLEEP_MS); + } +} + +/** + * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried + * @vf: VF to check if it's ready to be configured/queried + * + * The purpose of this function is to make sure the VF is not in reset, not + * disabled, and initialized so it can be configured and/or queried by a host + * administrator. + */ +static int ice_check_vf_ready_for_cfg(struct ice_vf *vf) +{ + struct ice_pf *pf; + + ice_wait_on_vf_reset(vf); + + if (ice_is_vf_disabled(vf)) + return -EINVAL; + + pf = vf->pf; + if (ice_check_vf_init(pf, vf)) + return -EBUSY; + + return 0; +} + +/** * ice_set_vf_spoofchk * @netdev: network interface device structure * @vf_id: VF identifier @@ -1895,16 +1932,16 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) enum ice_status status; struct device *dev; struct ice_vf *vf; - int ret = 0; + int ret; dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; vf_vsi = pf->vsi[vf->lan_vsi_idx]; if (!vf_vsi) { @@ -1914,8 +1951,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) } if (vf_vsi->type != ICE_VSI_VF) { - netdev_err(netdev, - "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n", + netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n", vf_vsi->type, vf_vsi->vsi_num, vf->vf_id); return -ENODEV; } @@ -1945,8 +1981,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL); if (status) { - dev_err(dev, - "Failed to %sable spoofchk on VF %d VSI %d\n error %d", + dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %d", ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, status); ret = -EIO; goto out; @@ -2063,8 +2098,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) continue; if (ice_vsi_ctrl_rx_ring(vsi, true, vf_q_id)) { - dev_err(&vsi->back->pdev->dev, - "Failed to enable Rx ring %d on VSI %d\n", + dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n", vf_q_id, vsi->vsi_num); v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2166,8 +2200,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta)) { - dev_err(&vsi->back->pdev->dev, - "Failed to stop Tx ring %d on VSI %d\n", + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", vf_q_id, vsi->vsi_num); v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2193,8 +2226,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) continue; if (ice_vsi_ctrl_rx_ring(vsi, false, vf_q_id)) { - dev_err(&vsi->back->pdev->dev, - "Failed to stop Rx ring %d on VSI %d\n", + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n", vf_q_id, vsi->vsi_num); v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2357,8 +2389,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF || qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { - dev_err(ice_pf_to_dev(pf), - "VF-%d requesting more than supported number of queues: %d\n", + dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n", vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2570,8 +2601,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) */ if (set && !ice_is_vf_trusted(vf) && (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) { - dev_err(ice_pf_to_dev(pf), - "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n", + dev_err(ice_pf_to_dev(pf), "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n", vf->vf_id); v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto handle_mac_exit; @@ -2648,8 +2678,8 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) struct ice_pf *pf = vf->pf; u16 max_allowed_vf_queues; u16 tx_rx_queue_left; - u16 cur_queues; struct device *dev; + u16 cur_queues; dev = ice_pf_to_dev(pf); if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -2670,8 +2700,7 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) vfres->num_queue_pairs = ICE_MAX_BASE_QS_PER_VF; } else if (req_queues > cur_queues && req_queues - cur_queues > tx_rx_queue_left) { - dev_warn(dev, - "VF %d requested %u more queues, but only %u left.\n", + dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n", vf->vf_id, req_queues - cur_queues, tx_rx_queue_left); vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues, ICE_MAX_BASE_QS_PER_VF); @@ -2709,7 +2738,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, struct ice_vsi *vsi; struct device *dev; struct ice_vf *vf; - int ret = 0; + int ret; dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) @@ -2727,13 +2756,15 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; if (le16_to_cpu(vsi->info.pvid) == vlanprio) { /* duplicate request, so just return success */ dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio); - return ret; + return 0; } /* If PVID, then remove all filters on the old VLAN */ @@ -2744,7 +2775,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, if (vlan_id || qos) { ret = ice_vsi_manage_pvid(vsi, vlanprio, true); if (ret) - goto error_set_pvid; + return ret; } else { ice_vsi_manage_pvid(vsi, 0, false); vsi->info.pvid = 0; @@ -2757,7 +2788,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, /* add new VLAN filter for each MAC */ ret = ice_vsi_add_vlan(vsi, vlan_id); if (ret) - goto error_set_pvid; + return ret; } /* The Port VLAN needs to be saved across resets the same as the @@ -2765,8 +2796,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, */ vf->port_vlan_id = le16_to_cpu(vsi->info.pvid); -error_set_pvid: - return ret; + return 0; } /** @@ -2821,8 +2851,8 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) for (i = 0; i < vfl->num_elements; i++) { if (vfl->vlan_id[i] > ICE_MAX_VLANID) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, - "invalid VF VLAN id %d\n", vfl->vlan_id[i]); + dev_err(dev, "invalid VF VLAN id %d\n", + vfl->vlan_id[i]); goto error_param; } } @@ -2836,8 +2866,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (add_v && !ice_is_vf_trusted(vf) && vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) { - dev_info(dev, - "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", + dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", vf->vf_id); /* There is no need to let VF know about being not trusted, * so we can just return success message here @@ -2860,8 +2889,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (!ice_is_vf_trusted(vf) && vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) { - dev_info(dev, - "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", + dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", vf->vf_id); /* There is no need to let VF know about being * not trusted, so we can just return success @@ -2889,8 +2917,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) status = ice_cfg_vlan_pruning(vsi, true, false); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, - "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", + dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", vid, status); goto error_param; } @@ -2903,8 +2930,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) promisc_m, vid); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, - "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n", + dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n", vid, status); } } @@ -3140,8 +3166,7 @@ error_handler: case VIRTCHNL_OP_GET_VF_RESOURCES: err = ice_vc_get_vf_res_msg(vf, msg); if (ice_vf_init_vlan_stripping(vf)) - dev_err(dev, - "Failed to initialize VLAN stripping for VF %d\n", + dev_err(dev, "Failed to initialize VLAN stripping for VF %d\n", vf->vf_id); ice_vc_notify_vf_link_state(vf); break; @@ -3255,23 +3280,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) } /** - * ice_wait_on_vf_reset - * @vf: The VF being resseting - * - * Poll to make sure a given VF is ready after reset - */ -static void ice_wait_on_vf_reset(struct ice_vf *vf) -{ - int i; - - for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) { - if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) - break; - msleep(20); - } -} - -/** * ice_set_vf_mac * @netdev: network interface device structure * @vf_id: VF identifier @@ -3283,29 +3291,21 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; - int ret = 0; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - vf = &pf->vf[vf_id]; - /* Don't set MAC on disabled VF */ - if (ice_is_vf_disabled(vf)) - return -EINVAL; - - /* In case VF is in reset mode, wait until it is completed. Depending - * on factors like queue disabling routine, this could take ~250ms - */ - ice_wait_on_vf_reset(vf); - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; - if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) { netdev_err(netdev, "%pM not a valid unicast address\n", mac); return -EINVAL; } + vf = &pf->vf[vf_id]; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; + /* copy MAC into dflt_lan_addr and trigger a VF reset. The reset * flow will use the updated dflt_lan_addr and add a MAC filter * using ice_add_mac. Also set pf_set_mac to indicate that the PF has @@ -3313,12 +3313,11 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) */ ether_addr_copy(vf->dflt_lan_addr.addr, mac); vf->pf_set_mac = true; - netdev_info(netdev, - "MAC on VF %d set to %pM. VF driver will be reinitialized\n", + netdev_info(netdev, "MAC on VF %d set to %pM. VF driver will be reinitialized\n", vf_id, mac); ice_vc_reset_vf(vf); - return ret; + return 0; } /** @@ -3332,25 +3331,16 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) { struct ice_pf *pf = ice_netdev_to_pf(netdev); - struct device *dev; struct ice_vf *vf; + int ret; - dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - /* Don't set Trusted Mode on disabled VF */ - if (ice_is_vf_disabled(vf)) - return -EINVAL; - - /* In case VF is in reset mode, wait until it is completed. Depending - * on factors like queue disabling routine, this could take ~250ms - */ - ice_wait_on_vf_reset(vf); - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; /* Check if already trusted */ if (trusted == vf->trusted) @@ -3358,7 +3348,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) vf->trusted = trusted; ice_vc_reset_vf(vf); - dev_info(dev, "VF %u is now %strusted\n", + dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", vf_id, trusted ? "" : "un"); return 0; @@ -3376,13 +3366,15 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; switch (link_state) { case IFLA_VF_LINK_STATE_AUTO: @@ -3418,14 +3410,15 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id, struct ice_eth_stats *stats; struct ice_vsi *vsi; struct ice_vf *vf; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 4647d636ed36..ac67982751df 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -38,7 +38,8 @@ #define ICE_MAX_POLICY_INTR_PER_VF 33 #define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) #define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) -#define ICE_MAX_VF_RESET_WAIT 15 +#define ICE_MAX_VF_RESET_TRIES 40 +#define ICE_MAX_VF_RESET_SLEEP_MS 20 #define ice_for_each_vf(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 149dca0012ba..4d3407bbd4c4 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -338,8 +338,8 @@ static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem) DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR); if (dma_mapping_error(dev, dma)) { - dev_dbg(dev, - "XSK UMEM DMA mapping error on page num %d", i); + dev_dbg(dev, "XSK UMEM DMA mapping error on page num %d\n", + i); goto out_unmap; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 3a975641f902..20b907dc1e29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -200,7 +200,7 @@ int mlx5e_health_report(struct mlx5e_priv *priv, netdev_err(priv->netdev, err_str); if (!reporter) - return err_ctx->recover(&err_ctx->ctx); + return err_ctx->recover(err_ctx->ctx); return devlink_health_report(reporter, err_str, err_ctx); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 7c8796d9743f..a226277b0980 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -179,6 +179,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) } } +static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) +{ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + mlx5_wq_ll_reset(&rq->mpwqe.wq); + else + mlx5_wq_cyc_reset(&rq->wqe.wq); +} + /* SW parser related functions */ struct mlx5e_swp_spec { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 454d3459bd8b..966983674663 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -712,6 +712,9 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) if (!in) return -ENOMEM; + if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) + mlx5e_rqwq_reset(rq); + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rq_state, curr_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5acf60b1bbfe..e49acd0c5da5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -459,12 +459,16 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) static int esw_legacy_enable(struct mlx5_eswitch *esw) { - int ret; + struct mlx5_vport *vport; + int ret, i; ret = esw_create_legacy_table(esw); if (ret) return ret; + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); if (ret) esw_destroy_legacy_table(esw); @@ -2452,25 +2456,17 @@ out: int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) { - int err = 0; - if (!esw) return -EOPNOTSUPP; if (!ESW_ALLOWED(esw)) return -EPERM; - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - err = -EOPNOTSUPP; - goto out; - } + if (esw->mode != MLX5_ESWITCH_LEGACY) + return -EOPNOTSUPP; *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; - -out: - mutex_unlock(&esw->state_lock); - return err; + return 0; } int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 979f13bdc203..1a57b2bd74b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1172,7 +1172,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, true); + mlx5_eswitch_disable(esw, false); mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { @@ -2065,7 +2065,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, true); + mlx5_eswitch_disable(esw, false); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c index c5a446e295aa..4276194b633f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -35,7 +35,7 @@ static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, 1 * 1024 * 1024, 64 * 1024, - 4 * 1024, }; + 128 }; struct mlx5_esw_chains_priv { struct rhashtable chains_ht; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index c6c7d1defbd7..aade62a9ee5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -2307,7 +2307,9 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_cmd_caps *caps; + u8 *bit_mask = sb->bit_mask; u8 *tag = hw_ste->tag; + bool source_gvmi_set; DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); @@ -2328,7 +2330,8 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, if (!vport_cap) return -EINVAL; - if (vport_cap->vport_gvmi) + source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi); + if (vport_cap->vport_gvmi && source_gvmi_set) MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); misc->source_eswitch_owner_vhca_id = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 3abfc8125926..c2027192e21e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -66,15 +66,20 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *next_ft) { struct mlx5dr_table *tbl; + u32 flags; int err; if (mlx5_dr_is_fw_table(ft->flags)) return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, log_size, next_ft); + flags = ft->flags; + /* turn off encap/decap if not supported for sw-str by fw */ + if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported)) + flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, - ft->level, ft->flags); + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags); if (!tbl) { mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 02f7e4a39578..01f075fac276 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -94,6 +94,13 @@ void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides) print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false); } +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_cyc_update_db_record(wq); +} + int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) @@ -192,6 +199,19 @@ err_db_free: return err; } +static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq) +{ + struct mlx5_wqe_srq_next_seg *next_seg; + int i; + + for (i = 0; i < wq->fbc.sz_m1; i++) { + next_seg = mlx5_wq_ll_get_wqe(wq, i); + next_seg->next_wqe_index = cpu_to_be16(i + 1); + } + next_seg = mlx5_wq_ll_get_wqe(wq, i); + wq->tail_next = &next_seg->next_wqe_index; +} + int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl) @@ -199,9 +219,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; - struct mlx5_wqe_srq_next_seg *next_seg; int err; - int i; err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -220,13 +238,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); - for (i = 0; i < fbc->sz_m1; i++) { - next_seg = mlx5_wq_ll_get_wqe(wq, i); - next_seg->next_wqe_index = cpu_to_be16(i + 1); - } - next_seg = mlx5_wq_ll_get_wqe(wq, i); - wq->tail_next = &next_seg->next_wqe_index; - + mlx5_wq_ll_init_list(wq); wq_ctrl->mdev = mdev; return 0; @@ -237,6 +249,15 @@ err_db_free: return err; } +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq) +{ + wq->head = 0; + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_ll_init_list(wq); + mlx5_wq_ll_update_db_record(wq); +} + void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) { mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index d9a94bc223c0..4cadc336593f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -80,6 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl); void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides); +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq); int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, @@ -92,6 +93,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq); void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index a41a90c589db..1c9e70c8cc30 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -157,24 +157,6 @@ static int msg_enable; */ /** - * ks_rdreg8 - read 8 bit register from device - * @ks : The chip information - * @offset: The register address - * - * Read a 8bit register from the chip, returning the result - */ -static u8 ks_rdreg8(struct ks_net *ks, int offset) -{ - u16 data; - u8 shift_bit = offset & 0x03; - u8 shift_data = (offset & 1) << 3; - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - data = ioread16(ks->hw_addr); - return (u8)(data >> shift_data); -} - -/** * ks_rdreg16 - read 16 bit register from device * @ks : The chip information * @offset: The register address @@ -184,28 +166,12 @@ static u8 ks_rdreg8(struct ks_net *ks, int offset) static u16 ks_rdreg16(struct ks_net *ks, int offset) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); return ioread16(ks->hw_addr); } /** - * ks_wrreg8 - write 8bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ -static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) -{ - u8 shift_bit = (offset & 0x03); - u16 value_write = (u16)(value << ((offset & 1) << 3)); - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value_write, ks->hw_addr); -} - -/** * ks_wrreg16 - write 16bit register value to chip * @ks: The chip information * @offset: The register address @@ -215,7 +181,7 @@ static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); iowrite16(value, ks->hw_addr); } @@ -231,7 +197,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - *wptr++ = (u16)ioread16(ks->hw_addr); + *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); } /** @@ -245,7 +211,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - iowrite16(*wptr++, ks->hw_addr); + iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); } static void ks_disable_int(struct ks_net *ks) @@ -324,8 +290,7 @@ static void ks_read_config(struct ks_net *ks) u16 reg_data = 0; /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; + reg_data = ks_rdreg16(ks, KS_CCR); /* addr/data bus are multiplexed */ ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; @@ -429,7 +394,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) /* 1. set sudo DMA mode */ ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. read prepend data */ /** @@ -446,7 +411,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) ks_inblk(ks, buf, ALIGN(len, 4)); /* 4. reset sudo DMA Mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); } /** @@ -679,13 +644,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) ks->txh.txw[1] = cpu_to_le16(len); /* 1. set sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. write status/lenth info */ ks_outblk(ks, ks->txh.txw, 4); /* 3. write pkt data */ ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); /* 4. reset sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); /* 6. wait until TXQCR_METFE is auto-cleared */ diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index b38820849faa..1135a18019c7 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -114,6 +114,14 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) if (err != 4) break; + /* At this point the IFH was read correctly, so it is safe to + * presume that there is no error. The err needs to be reset + * otherwise a frame could come in CPU queue between the while + * condition and the check for error later on. And in that case + * the new frame is just removed and not processed. + */ + err = 0; + ocelot_parse_ifh(ifh, &info); ocelot_port = ocelot->ports[info.port]; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index e8a1b27db84d..234c6f30effb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -163,6 +163,8 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + struct kref refcnt; + struct completion event_comp; bool exp_recovery; }; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index ffabc2d2f082..2d873ae8a234 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -59,6 +59,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) static int qede_rdma_create_wq(struct qede_dev *edev) { INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); + kref_init(&edev->rdma_info.refcnt); + init_completion(&edev->rdma_info.event_comp); + edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); if (!edev->rdma_info.rdma_wq) { DP_NOTICE(edev, "qedr: Could not create workqueue\n"); @@ -83,8 +86,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev) } } +static void qede_rdma_complete_event(struct kref *ref) +{ + struct qede_rdma_dev *rdma_dev = + container_of(ref, struct qede_rdma_dev, refcnt); + + /* no more events will be added after this */ + complete(&rdma_dev->event_comp); +} + static void qede_rdma_destroy_wq(struct qede_dev *edev) { + /* Avoid race with add_event flow, make sure it finishes before + * we start accessing the list and cleaning up the work + */ + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); + wait_for_completion(&edev->rdma_info.event_comp); + qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); } @@ -310,15 +328,24 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (!edev->rdma_info.qedr_dev) return; + /* We don't want the cleanup flow to start while we're allocating and + * scheduling the work + */ + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) + return; /* already being destroyed */ + event_node = qede_rdma_get_free_event_node(edev); if (!event_node) - return; + goto out; event_node->event = event; event_node->ptr = edev; INIT_WORK(&event_node->work, qede_rdma_handle_event); queue_work(edev->rdma_info.rdma_wq, &event_node->work); + +out: + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); } void qede_rdma_dev_event_open(struct qede_dev *edev) diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index b7032422393f..67ddf782d98a 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1810,6 +1810,9 @@ static int ave_pro4_get_pinmode(struct ave_private *priv, break; case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: priv->pinmode_val = 0; break; default: @@ -1854,6 +1857,9 @@ static int ave_ld20_get_pinmode(struct ave_private *priv, priv->pinmode_val = SG_ETPINMODE_RMII(0); break; case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: priv->pinmode_val = 0; break; default: @@ -1876,6 +1882,9 @@ static int ave_pxs3_get_pinmode(struct ave_private *priv, priv->pinmode_val = SG_ETPINMODE_RMII(arg); break; case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: priv->pinmode_val = 0; break; default: diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index c23ce838ff63..8dc6c9ff22e1 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1350,27 +1350,12 @@ sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, if (vio_version_after_eq(&port->vio, 1, 3)) localmtu -= VLAN_HLEN; - if (skb->protocol == htons(ETH_P_IP)) { - struct flowi4 fl4; - struct rtable *rt = NULL; - - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = dev->ifindex; - fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); - fl4.daddr = ip_hdr(skb)->daddr; - fl4.saddr = ip_hdr(skb)->saddr; - - rt = ip_route_output_key(dev_net(dev), &fl4); - if (!IS_ERR(rt)) { - skb_dst_set(skb, &rt->dst); - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, - htonl(localmtu)); - } - } + if (skb->protocol == htons(ETH_P_IP)) + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(localmtu)); #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu); #endif goto out_dropped; } diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index af07ea760b35..672cd2caf2fb 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -546,8 +546,8 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, mtu < ntohs(iph->tot_len)) { netdev_dbg(dev, "packet too big, fragmentation needed\n"); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); goto err_rt; } diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 7d68b28bb893..a62229a8b1a4 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -410,7 +410,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) struct device_node *np = phydev->mdio.dev.of_node; int ret; - /* Aneg firsly. */ + /* Aneg firstly. */ ret = genphy_config_aneg(phydev); /* Then we can set up the delay. */ @@ -463,7 +463,7 @@ static int bcm54616s_config_aneg(struct phy_device *phydev) { int ret; - /* Aneg firsly. */ + /* Aneg firstly. */ if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) ret = genphy_c37_config_aneg(phydev); else diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 7e9975d25066..f1ded03f0229 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -178,6 +178,23 @@ static int iproc_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +int iproc_mdio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); + + /* restore the mii clock configuration */ + iproc_mdio_config_clk(priv->base); + + return 0; +} + +static const struct dev_pm_ops iproc_mdio_pm_ops = { + .resume = iproc_mdio_resume +}; +#endif /* CONFIG_PM_SLEEP */ + static const struct of_device_id iproc_mdio_of_match[] = { { .compatible = "brcm,iproc-mdio", }, { /* sentinel */ }, @@ -188,6 +205,9 @@ static struct platform_driver iproc_mdio_driver = { .driver = { .name = "iproc-mdio", .of_match_table = iproc_mdio_of_match, +#ifdef CONFIG_PM_SLEEP + .pm = &iproc_mdio_pm_ops, +#endif }, .probe = iproc_mdio_probe, .remove = iproc_mdio_remove, diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 16b19824b9ad..cdc96968b0f4 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -203,9 +203,9 @@ err_peer: err: ++dev->stats.tx_errors; if (skb->protocol == htons(ETH_P_IP)) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); else if (skb->protocol == htons(ETH_P_IPV6)) - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); kfree_skb(skb); return ret; } @@ -258,6 +258,8 @@ static void wg_setup(struct net_device *dev) enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; + const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); dev->netdev_ops = &netdev_ops; dev->hard_header_len = 0; @@ -271,9 +273,8 @@ static void wg_setup(struct net_device *dev) dev->features |= WG_NETDEV_FEATURES; dev->hw_features |= WG_NETDEV_FEATURES; dev->hw_enc_features |= WG_NETDEV_FEATURES; - dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - - sizeof(struct udphdr) - - max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); + dev->mtu = ETH_DATA_LEN - overhead; + dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; SET_NETDEV_DEVTYPE(dev, &device_type); diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 9c6bab9c981f..4a153894cee2 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(struct wg_device *wg, under_load = skb_queue_len(&wg->incoming_handshakes) >= MAX_QUEUED_INCOMING_HANDSHAKES / 8; - if (under_load) + if (under_load) { last_under_load = ktime_get_coarse_boottime_ns(); - else if (last_under_load) + } else if (last_under_load) { under_load = !wg_birthdate_has_expired(last_under_load, 1); + if (!under_load) + last_under_load = 0; + } mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, under_load); if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index c13260563446..7348c10cbae3 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_peer *peer) static unsigned int calculate_skb_padding(struct sk_buff *skb) { + unsigned int padded_size, last_unit = skb->len; + + if (unlikely(!PACKET_CB(skb)->mtu)) + return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; + /* We do this modulo business with the MTU, just in case the networking * layer gives us a packet that's bigger than the MTU. In that case, we * wouldn't want the final subtraction to overflow in the case of the - * padded_size being clamped. + * padded_size being clamped. Fortunately, that's very rarely the case, + * so we optimize for that not happening. */ - unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; - unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); + if (unlikely(last_unit > PACKET_CB(skb)->mtu)) + last_unit %= PACKET_CB(skb)->mtu; - if (padded_size > PACKET_CB(skb)->mtu) - padded_size = PACKET_CB(skb)->mtu; + padded_size = min(PACKET_CB(skb)->mtu, + ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); return padded_size - last_unit; } diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 262f3b5c819d..b0d6541582d3 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4, wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); mutex_unlock(&wg->socket_update_lock); synchronize_rcu(); - synchronize_net(); sock_free(old4); sock_free(old6); } diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 720c89d6066e..4ac8cb262559 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -225,6 +225,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy) out: gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity); + usleep_range(10000, 15000); } static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode) diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index 2b83156efe3f..b788870473e8 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -682,7 +682,7 @@ static int pn544_hci_tm_send(struct nfc_hci_dev *hdev, struct sk_buff *skb) static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, struct nfc_target *target) { - pr_debug("supported protocol %d\b", target->supported_protocols); + pr_debug("supported protocol %d\n", target->supported_protocols); if (target->supported_protocols & (NFC_PROTO_ISO14443_MASK | NFC_PROTO_ISO14443_B_MASK)) { return nfc_hci_send_cmd(hdev, target->hci_reader_gate, diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index d704eccc548f..f01a57e5a5f3 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -771,7 +771,7 @@ static int smmu_pmu_probe(struct platform_device *pdev) smmu_pmu->reloc_base = smmu_pmu->reg_base; } - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq > 0) smmu_pmu->irq = irq; diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index 97acc2ba2912..de844b412110 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -731,6 +731,7 @@ static int qpnpint_irq_domain_translate(struct irq_domain *d, return 0; } +static struct lock_class_key qpnpint_irq_lock_class, qpnpint_irq_request_class; static void qpnpint_irq_domain_map(struct spmi_pmic_arb *pmic_arb, struct irq_domain *domain, unsigned int virq, @@ -746,6 +747,9 @@ static void qpnpint_irq_domain_map(struct spmi_pmic_arb *pmic_arb, else handler = handle_level_irq; + + irq_set_lockdep_class(virq, &qpnpint_irq_lock_class, + &qpnpint_irq_request_class); irq_domain_set_info(domain, virq, hwirq, &pmic_arb_irqchip, pmic_arb, handler, NULL, NULL); } diff --git a/drivers/tty/vt/Makefile b/drivers/tty/vt/Makefile index 329ca336b8ee..fe30ce512819 100644 --- a/drivers/tty/vt/Makefile +++ b/drivers/tty/vt/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_HW_CONSOLE) += vt.o defkeymap.o # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c -hostprogs-y += conmakehash +hostprogs += conmakehash quiet_cmd_conmk = CONMK $@ cmd_conmk = $(obj)/conmakehash $< > $@ diff --git a/drivers/video/logo/Makefile b/drivers/video/logo/Makefile index bcda657493a4..895c60b8402e 100644 --- a/drivers/video/logo/Makefile +++ b/drivers/video/logo/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_SPU_BASE) += logo_spe_clut224.o # How to generate logo's -hostprogs-y := pnmtologo +hostprogs := pnmtologo # Create commands like "pnmtologo -t mono -n logo_mac_mono -o ..." quiet_cmd_logo = LOGO $@ diff --git a/drivers/zorro/Makefile b/drivers/zorro/Makefile index b360ac4ea846..91ba82e633e7 100644 --- a/drivers/zorro/Makefile +++ b/drivers/zorro/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_ZORRO) += zorro.o zorro-driver.o zorro-sysfs.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_ZORRO_NAMES) += names.o -hostprogs-y := gen-devlist +hostprogs := gen-devlist # Files generated that shall be removed upon make clean clean-files := devlist.h diff --git a/fs/Kconfig b/fs/Kconfig index 7b623e9fc1b0..708ba336e689 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig" source "fs/btrfs/Kconfig" source "fs/nilfs2/Kconfig" source "fs/f2fs/Kconfig" +source "fs/zonefs/Kconfig" config FS_DAX bool "Direct Access (DAX) support" @@ -264,6 +265,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/erofs/Kconfig" +source "fs/vboxsf/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index 98be354fdb61..505e51166973 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -133,3 +133,5 @@ obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ obj-$(CONFIG_EROFS_FS) += erofs/ +obj-$(CONFIG_VBOXSF_FS) += vboxsf/ +obj-$(CONFIG_ZONEFS_FS) += zonefs/ diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index 0f0dc1c1fe41..153d5c842a9b 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -65,6 +65,11 @@ struct smb3_key_debug_info { __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; } __packed; +struct smb3_notify { + __u32 completion_filter; + bool watch_tree; +} __packed; + #define CIFS_IOCTL_MAGIC 0xCF #define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) #define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4) @@ -72,3 +77,4 @@ struct smb3_key_debug_info { #define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array) #define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info) #define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info) +#define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index fb41e51dd574..440828afcdde 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1084,7 +1084,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, struct cifs_ntsd *pntsd = NULL; int oplock = 0; unsigned int xid; - int rc, create_options = 0; + int rc; struct cifs_tcon *tcon; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct cifs_fid fid; @@ -1096,13 +1096,10 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = READ_CONTROL; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -1147,7 +1144,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, { int oplock = 0; unsigned int xid; - int rc, access_flags, create_options = 0; + int rc, access_flags; struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -1160,9 +1157,6 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) access_flags = WRITE_OWNER; else @@ -1171,7 +1165,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = access_flags; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5492b9860baa..febab27cd838 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -275,7 +275,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = 0; /* unlimited */ if (server->ops->queryfs) - rc = server->ops->queryfs(xid, tcon, buf); + rc = server->ops->queryfs(xid, tcon, cifs_sb, buf); free_xid(xid); return 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 239338d57086..de82cfa44b1a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -298,7 +298,8 @@ struct smb_version_operations { const char *, struct dfs_info3_param **, unsigned int *, const struct nls_table *, int); /* informational QFS call */ - void (*qfs_tcon)(const unsigned int, struct cifs_tcon *); + void (*qfs_tcon)(const unsigned int, struct cifs_tcon *, + struct cifs_sb_info *); /* check if a path is accessible or not */ int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *); @@ -409,7 +410,7 @@ struct smb_version_operations { struct cifsInodeInfo *); /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, - struct kstatfs *); + struct cifs_sb_info *, struct kstatfs *); /* send mandatory brlock to the server */ int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, __u64, __u32, int, int, bool); @@ -430,6 +431,8 @@ struct smb_version_operations { struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *src_file, void __user *); + int (*notify)(const unsigned int xid, struct file *pfile, + void __user *pbuf); int (*query_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); @@ -490,6 +493,7 @@ struct smb_version_operations { /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p); /* make unix special files (block, char, fifo, socket) */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 948bf3474db1..89eaaf46d1ca 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -612,4 +612,12 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, } #endif +static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) +{ + if (cifs_sb && (backup_cred(cifs_sb))) + return options | CREATE_OPEN_BACKUP_INTENT; + else + return options; +} + #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a481296f417f..3c89569e7210 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -260,7 +260,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) if (server->tcpStatus != CifsNeedReconnect) break; - if (--retries) + if (retries && --retries) continue; /* diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0aa3623ae0e1..a941ac7a659d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4365,7 +4365,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, /* do not care if a following call succeed - informational */ if (!tcon->pipe && server->ops->qfs_tcon) { - server->ops->qfs_tcon(*xid, tcon); + server->ops->qfs_tcon(*xid, tcon, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & cpu_to_le32(FILE_READ_ONLY_DEVICE)) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f3b79012ff29..0ef099442f20 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -355,13 +355,10 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a4e8f7d445ac..bc9516ab4b34 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -222,9 +222,6 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (!buf) return -ENOMEM; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -235,7 +232,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; @@ -752,9 +749,6 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) desired_access = cifs_convert_flags(cfile->f_flags); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -768,7 +762,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = &cfile->fid; @@ -2599,8 +2593,10 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); rc = file_write_and_wait_range(file, start, end); - if (rc) + if (rc) { + trace_cifs_fsync_err(inode->i_ino, rc); return rc; + } xid = get_xid(); @@ -2638,8 +2634,10 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); rc = file_write_and_wait_range(file, start, end); - if (rc) + if (rc) { + trace_cifs_fsync_err(file_inode(file)->i_ino, rc); return rc; + } xid = get_xid(); @@ -2672,7 +2670,8 @@ int cifs_flush(struct file *file, fl_owner_t id) rc = filemap_write_and_wait(inode->i_mapping); cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); - + if (rc) + trace_cifs_flush_err(inode->i_ino, rc); return rc; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 676e96a7a9f0..9ba623b601ec 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -475,9 +475,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -1285,7 +1283,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = DELETE | FILE_WRITE_ATTRIBUTES; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -1823,7 +1821,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, oparms.cifs_sb = cifs_sb; /* open the file to be renamed -- we need DELETE perms */ oparms.desired_access = DELETE; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = from_path; oparms.fid = &fid; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 1a01e108d75e..4a73e63c4d43 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -65,7 +65,7 @@ static long cifs_ioctl_query_info(unsigned int xid, struct file *filep, if (tcon->ses->server->ops->ioctl_query_info) rc = tcon->ses->server->ops->ioctl_query_info( - xid, tcon, utf16_path, + xid, tcon, cifs_sb, utf16_path, filep->private_data ? 0 : 1, p); else rc = -EOPNOTSUPP; @@ -169,6 +169,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) unsigned int xid; struct cifsFileInfo *pSMBFile = filep->private_data; struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; __u64 ExtAttrBits = 0; __u64 caps; @@ -299,6 +300,21 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) else rc = 0; break; + case CIFS_IOC_NOTIFY: + if (!S_ISDIR(inode->i_mode)) { + /* Notify can only be done on directories */ + rc = -EOPNOTSUPP; + break; + } + cifs_sb = CIFS_SB(inode->i_sb); + tcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); + if (tcon && tcon->ses->server->ops->notify) { + rc = tcon->ses->server->ops->notify(xid, + filep, (void __user *)arg); + cifs_dbg(FYI, "ioctl notify rc %d\n", rc); + } else + rc = -EOPNOTSUPP; + break; default: cifs_dbg(FYI, "unsupported ioctl\n"); break; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index b736acd3917b..852aa00ec729 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -315,7 +315,7 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -353,15 +353,11 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifs_io_parms io_parms; - int create_options = CREATE_NOT_DIR; - - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_CREATE; oparms.path = path; oparms.fid = &fid; @@ -402,9 +398,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.fid = &fid; oparms.reconnect = false; @@ -457,14 +451,10 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifs_io_parms io_parms; - int create_options = CREATE_NOT_DIR; __le16 *utf16_path; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct kvec iov[2]; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - cifs_dbg(FYI, "%s: path: %s\n", __func__, path); utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); @@ -474,7 +464,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d17587c2c4ab..ba9dadf3be24 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -196,7 +196,8 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * may look wrong since the inodes may not have timed out by the time * "ls" does a stat() call on them. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f0795c856d8f..43a88e26d26b 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -101,7 +101,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses) iface_count = ses->iface_count; if (iface_count <= 0) { spin_unlock(&ses->iface_lock); - cifs_dbg(FYI, "no iface list available to open channels\n"); + cifs_dbg(VFS, "no iface list available to open channels\n"); return 0; } ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces), diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index d70a2bb062df..eb994e313c6a 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -504,7 +504,8 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) } static void -cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { CIFSSMBQFSDeviceInfo(xid, tcon); CIFSSMBQFSAttributeInfo(xid, tcon); @@ -565,7 +566,7 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -793,7 +794,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = SYNCHRONIZE | FILE_WRITE_ATTRIBUTES; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -872,7 +873,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, static int cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc = -EOPNOTSUPP; @@ -970,7 +971,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.create_options = OPEN_REPARSE_POINT; + oparms.create_options = cifs_create_options(cifs_sb, + OPEN_REPARSE_POINT); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -1029,7 +1031,6 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; FILE_ALL_INFO *buf = NULL; struct cifs_io_parms io_parms; __u32 oplock = 0; @@ -1090,13 +1091,11 @@ cifs_make_node(unsigned int xid, struct inode *inode, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL); oparms.disposition = FILE_CREATE; oparms.path = full_path; oparms.fid = &fid; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 5ef5e97a6d13..1cf207564ff9 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -99,9 +99,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = desired_access; oparms.disposition = create_disposition; - oparms.create_options = create_options; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; oparms.mode = mode; @@ -457,7 +455,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, /* If it is a root and its handle is cached then use it */ if (!strlen(full_path) && !no_cached_open) { - rc = open_shroot(xid, tcon, &fid); + rc = open_shroot(xid, tcon, cifs_sb, &fid); if (rc) goto out; @@ -474,9 +472,6 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 6787fce26f20..baa825f4cec0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -655,7 +655,8 @@ smb2_cached_lease_break(struct work_struct *work) /* * Open the directory at the root of a share */ -int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) +int open_shroot(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid) { struct cifs_ses *ses = tcon->ses; struct TCP_Server_Info *server = ses->server; @@ -702,7 +703,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE; oparms.tcon = tcon; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; oparms.fid = pfid; @@ -818,7 +819,8 @@ oshr_free: } static void -smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -830,7 +832,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -838,7 +840,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, NULL); else - rc = open_shroot(xid, tcon, &fid); + rc = open_shroot(xid, tcon, cifs_sb, &fid); if (rc) return; @@ -860,7 +862,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) } static void -smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -871,7 +874,7 @@ smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -906,10 +909,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -1151,10 +1151,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_WRITE_EA; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -1422,6 +1419,7 @@ req_res_key_exit: static int smb2_ioctl_query_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p) { @@ -1447,6 +1445,7 @@ smb2_ioctl_query_info(const unsigned int xid, struct kvec close_iov[1]; unsigned int size[2]; void *data[2]; + int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; memset(rqst, 0, sizeof(rqst)); resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; @@ -1477,10 +1476,7 @@ smb2_ioctl_query_info(const unsigned int xid, memset(&oparms, 0, sizeof(oparms)); oparms.tcon = tcon; oparms.disposition = FILE_OPEN; - if (is_dir) - oparms.create_options = CREATE_NOT_FILE; - else - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; @@ -2049,6 +2045,66 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, return rc; } + + +static int +smb3_notify(const unsigned int xid, struct file *pfile, + void __user *ioc_buf) +{ + struct smb3_notify notify; + struct dentry *dentry = pfile->f_path.dentry; + struct inode *inode = file_inode(pfile); + struct cifs_sb_info *cifs_sb; + struct cifs_open_parms oparms; + struct cifs_fid fid; + struct cifs_tcon *tcon; + unsigned char *path = NULL; + __le16 *utf16_path = NULL; + u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + int rc = 0; + + path = build_path_from_dentry(dentry); + if (path == NULL) + return -ENOMEM; + + cifs_sb = CIFS_SB(inode->i_sb); + + utf16_path = cifs_convert_path_to_utf16(path + 1, cifs_sb); + if (utf16_path == NULL) { + rc = -ENOMEM; + goto notify_exit; + } + + if (copy_from_user(¬ify, ioc_buf, sizeof(struct smb3_notify))) { + rc = -EFAULT; + goto notify_exit; + } + + tcon = cifs_sb_master_tcon(cifs_sb); + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = cifs_create_options(cifs_sb, 0); + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL); + if (rc) + goto notify_exit; + + rc = SMB2_change_notify(xid, tcon, fid.persistent_fid, fid.volatile_fid, + notify.watch_tree, notify.completion_filter); + + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + + cifs_dbg(FYI, "change notify for path %s rc %d\n", path, rc); + +notify_exit: + kfree(path); + kfree(utf16_path); + return rc; +} + static int smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, const char *path, struct cifs_sb_info *cifs_sb, @@ -2086,10 +2142,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = fid; oparms.reconnect = false; @@ -2343,10 +2396,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = desired_access; oparms.disposition = FILE_OPEN; - if (cifs_sb && backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2402,7 +2452,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, static int smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { struct smb2_query_info_rsp *rsp; struct smb2_fs_full_size_info *info = NULL; @@ -2417,7 +2467,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, FS_FULL_SIZE_INFORMATION, SMB2_O_INFO_FILESYSTEM, sizeof(struct smb2_fs_full_size_info), - &rsp_iov, &buftype, NULL); + &rsp_iov, &buftype, cifs_sb); if (rc) goto qfs_exit; @@ -2439,7 +2489,7 @@ qfs_exit: static int smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -2448,12 +2498,12 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; if (!tcon->posix_extensions) - return smb2_queryfs(xid, tcon, buf); + return smb2_queryfs(xid, tcon, cifs_sb, buf); oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2722,6 +2772,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; struct reparse_data_buffer *reparse_buf; + int create_options = is_reparse_point ? OPEN_REPARSE_POINT : 0; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); @@ -2748,14 +2799,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - if (is_reparse_point) - oparms.create_options = OPEN_REPARSE_POINT; - + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; @@ -2934,11 +2978,6 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; @@ -2949,6 +2988,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, oparms.tcon = tcon; oparms.desired_access = READ_CONTROL; oparms.disposition = FILE_OPEN; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2990,11 +3030,6 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) access_flags = WRITE_OWNER; else @@ -3009,6 +3044,7 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, oparms.tcon = tcon; oparms.desired_access = access_flags; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -4491,7 +4527,6 @@ smb2_make_node(unsigned int xid, struct inode *inode, { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); int rc = -EPERM; - int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; FILE_ALL_INFO *buf = NULL; struct cifs_io_parms io_parms; __u32 oplock = 0; @@ -4527,13 +4562,11 @@ smb2_make_node(unsigned int xid, struct inode *inode, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL); oparms.disposition = FILE_CREATE; oparms.path = full_path; oparms.fid = &fid; @@ -4868,6 +4901,7 @@ struct smb_version_operations smb30_operations = { .dir_needs_close = smb2_dir_needs_close, .fallocate = smb3_fallocate, .enum_snapshots = smb3_enum_snapshots, + .notify = smb3_notify, .init_transform_rq = smb3_init_transform_rq, .is_transform_hdr = smb3_is_transform_hdr, .receive_transform = smb3_receive_transform, @@ -4978,6 +5012,7 @@ struct smb_version_operations smb311_operations = { .dir_needs_close = smb2_dir_needs_close, .fallocate = smb3_fallocate, .enum_snapshots = smb3_enum_snapshots, + .notify = smb3_notify, .init_transform_rq = smb3_init_transform_rq, .is_transform_hdr = smb3_is_transform_hdr, .receive_transform = smb3_receive_transform, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 14f209f7376f..1234f9ccab03 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -350,9 +350,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } rc = cifs_negotiate_protocol(0, tcon->ses); - if (!rc && tcon->ses->need_reconnect) + if (!rc && tcon->ses->need_reconnect) { rc = cifs_setup_session(0, tcon->ses, nls_codepage); - + if ((rc == -EACCES) && !tcon->retry) { + rc = -EHOSTDOWN; + mutex_unlock(&tcon->ses->session_mutex); + goto failed; + } + } if (rc || !tcon->need_reconnect) { mutex_unlock(&tcon->ses->session_mutex); goto out; @@ -397,6 +402,7 @@ out: case SMB2_SET_INFO: rc = -EAGAIN; } +failed: unload_nls(nls_codepage); return rc; } @@ -1933,6 +1939,16 @@ parse_query_id_ctxt(struct create_context *cc, struct smb2_file_all_info *buf) buf->IndexNumber = pdisk_id->DiskFileId; } +static void +parse_posix_ctxt(struct create_context *cc, struct smb_posix_info *pposix_inf) +{ + /* struct smb_posix_info *ppinf = (struct smb_posix_info *)cc; */ + + /* TODO: Need to add parsing for the context and return */ + printk_once(KERN_WARNING + "SMB3 3.11 POSIX response context not completed yet\n"); +} + void smb2_parse_contexts(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, @@ -1944,6 +1960,9 @@ smb2_parse_contexts(struct TCP_Server_Info *server, unsigned int next; unsigned int remaining; char *name; + const char smb3_create_tag_posix[] = {0x93, 0xAD, 0x25, 0x50, 0x9C, + 0xB4, 0x11, 0xE7, 0xB4, 0x23, 0x83, + 0xDE, 0x96, 0x8B, 0xCD, 0x7C}; *oplock = 0; data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); @@ -1963,6 +1982,15 @@ smb2_parse_contexts(struct TCP_Server_Info *server, else if (buf && (le16_to_cpu(cc->NameLength) == 4) && strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) parse_query_id_ctxt(cc, buf); + else if ((le16_to_cpu(cc->NameLength) == 16)) { + if (memcmp(name, smb3_create_tag_posix, 16) == 0) + parse_posix_ctxt(cc, NULL); + } + /* else { + cifs_dbg(FYI, "Context not matched with len %d\n", + le16_to_cpu(cc->NameLength)); + cifs_dump_mem("Cctxt name: ", name, 4); + } */ next = le32_to_cpu(cc->Next); if (!next) @@ -3357,6 +3385,7 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst, req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; + /* See note 354 of MS-SMB2, 64K max */ req->OutputBufferLength = cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE); req->CompletionFilter = cpu_to_le32(completion_filter); @@ -4023,6 +4052,9 @@ smb2_writev_callback(struct mid_q_entry *mid) wdata->cfile->fid.persistent_fid, tcon->tid, tcon->ses->Suid, wdata->offset, wdata->bytes, wdata->result); + if (wdata->result == -ENOSPC) + printk_once(KERN_WARNING "Out of space writing to %s\n", + tcon->treeName); } else trace_smb3_write_done(0 /* no xid */, wdata->cfile->fid.persistent_fid, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4c43dbd1e089..fa03df130f1a 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1519,6 +1519,7 @@ struct smb3_fs_vol_info { #define FILE_NORMALIZED_NAME_INFORMATION 48 #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 #define FILE_STANDARD_LINK_INFORMATION 54 +#define FILE_ID_INFORMATION 59 struct smb2_file_internal_info { __le64 IndexNumber; @@ -1593,6 +1594,21 @@ struct smb2_file_network_open_info { __le32 Reserved; } __packed; /* level 34 Query also similar returned in close rsp and open rsp */ +/* See MS-FSCC 2.4.43 */ +struct smb2_file_id_information { + __le64 VolumeSerialNumber; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ +} __packed; /* level 59 */ + extern char smb2_padding[7]; +/* equivalent of the contents of SMB3.1.1 POSIX open context response */ +struct smb_posix_info { + __le32 nlink; + __le32 reparse_tag; + __le32 mode; + kuid_t uid; + kuid_t gid; +}; #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 6c678e00046f..de6388ef344f 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -68,7 +68,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon, - struct cifs_fid *pfid); + struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid); extern void close_shroot(struct cached_fid *cfid); extern void close_shroot_lease(struct cached_fid *cfid); extern void close_shroot_lease_locked(struct cached_fid *cfid); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index fe6acfce3390..08b703b7a15e 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -104,13 +104,14 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { struct cifs_chan *chan; struct cifs_ses *ses = NULL; + struct TCP_Server_Info *it = NULL; int i; int rc = 0; spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) { if (ses->Suid == ses_id) goto found; } diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index e7e350b13d6a..4cb0d5f7ce45 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -547,6 +547,33 @@ DEFINE_EVENT(smb3_exit_err_class, smb3_##name, \ DEFINE_SMB3_EXIT_ERR_EVENT(exit_err); + +DECLARE_EVENT_CLASS(smb3_sync_err_class, + TP_PROTO(unsigned long ino, + int rc), + TP_ARGS(ino, rc), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(int, rc) + ), + TP_fast_assign( + __entry->ino = ino; + __entry->rc = rc; + ), + TP_printk("\tino=%lu rc=%d", + __entry->ino, __entry->rc) +) + +#define DEFINE_SMB3_SYNC_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_sync_err_class, cifs_##name, \ + TP_PROTO(unsigned long ino, \ + int rc), \ + TP_ARGS(ino, rc)) + +DEFINE_SMB3_SYNC_ERR_EVENT(fsync_err); +DEFINE_SMB3_SYNC_ERR_EVENT(flush_err); + + DECLARE_EVENT_CLASS(smb3_enter_exit_class, TP_PROTO(unsigned int xid, const char *func_name), @@ -937,12 +937,11 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, * on persistent storage prior to completion of the operation. */ int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc) + struct dax_device *dax_dev, struct writeback_control *wbc) { XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT); struct inode *inode = mapping->host; pgoff_t end_index = wbc->range_end >> PAGE_SHIFT; - struct dax_device *dax_dev; void *entry; int ret = 0; unsigned int scanned = 0; @@ -953,10 +952,6 @@ int dax_writeback_mapping_range(struct address_space *mapping, if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) return 0; - dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); - if (!dax_dev) - return -EIO; - trace_dax_writeback_range(inode, xas.xa_index, end_index); tag_pages_for_writeback(mapping, xas.xa_index, end_index); @@ -977,7 +972,6 @@ int dax_writeback_mapping_range(struct address_space *mapping, xas_lock_irq(&xas); } xas_unlock_irq(&xas); - put_dax(dax_dev); trace_dax_writeback_range_done(inode, xas.xa_index, end_index); return ret; } @@ -1207,6 +1201,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, lockdep_assert_held(&inode->i_rwsem); } + if (iocb->ki_flags & IOCB_NOWAIT) + flags |= IOMAP_NOWAIT; + while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 119667e65890..c885cf7d724b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -960,8 +960,9 @@ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) static int ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc) { - return dax_writeback_mapping_range(mapping, - mapping->host->i_sb->s_bdev, wbc); + struct ext2_sb_info *sbi = EXT2_SB(mapping->host->i_sb); + + return dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); } const struct address_space_operations ext2_aops = { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3313168b680f..1305b810c44a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2867,7 +2867,7 @@ static int ext4_dax_writepages(struct address_space *mapping, percpu_down_read(&sbi->s_journal_flag_rwsem); trace_ext4_writepages(inode, wbc); - ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc); + ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); percpu_up_read(&sbi->s_journal_flag_rwsem); diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile index d46e9baee285..b88aecc86550 100644 --- a/fs/unicode/Makefile +++ b/fs/unicode/Makefile @@ -35,4 +35,4 @@ $(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE endif targets += utf8data.h -hostprogs-y += mkutf8data +hostprogs += mkutf8data diff --git a/fs/vboxsf/Kconfig b/fs/vboxsf/Kconfig new file mode 100644 index 000000000000..b84586ae08b3 --- /dev/null +++ b/fs/vboxsf/Kconfig @@ -0,0 +1,10 @@ +config VBOXSF_FS + tristate "VirtualBox guest shared folder (vboxsf) support" + depends on X86 && VBOXGUEST + select NLS + help + VirtualBox hosts can share folders with guests, this driver + implements the Linux-guest side of this allowing folders exported + by the host to be mounted under Linux. + + If you want to use shared folders in VirtualBox guests, answer Y or M. diff --git a/fs/vboxsf/Makefile b/fs/vboxsf/Makefile new file mode 100644 index 000000000000..9e4328e79623 --- /dev/null +++ b/fs/vboxsf/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: MIT + +obj-$(CONFIG_VBOXSF_FS) += vboxsf.o + +vboxsf-y := dir.o file.o utils.o vboxsf_wrappers.o super.o diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c new file mode 100644 index 000000000000..dd147b490982 --- /dev/null +++ b/fs/vboxsf/dir.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Directory inode and file operations + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/namei.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +static int vboxsf_dir_open(struct inode *inode, struct file *file) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_createparms params = {}; + struct vboxsf_dir_info *sf_d; + int err; + + sf_d = vboxsf_dir_info_alloc(); + if (!sf_d) + return -ENOMEM; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_DIRECTORY | SHFL_CF_ACT_OPEN_IF_EXISTS | + SHFL_CF_ACT_FAIL_IF_NEW | SHFL_CF_ACCESS_READ; + + err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); + if (err) + goto err_free_dir_info; + + if (params.result != SHFL_FILE_EXISTS) { + err = -ENOENT; + goto err_close; + } + + err = vboxsf_dir_read_all(sbi, sf_d, params.handle); + if (err) + goto err_close; + + vboxsf_close(sbi->root, params.handle); + file->private_data = sf_d; + return 0; + +err_close: + vboxsf_close(sbi->root, params.handle); +err_free_dir_info: + vboxsf_dir_info_free(sf_d); + return err; +} + +static int vboxsf_dir_release(struct inode *inode, struct file *file) +{ + if (file->private_data) + vboxsf_dir_info_free(file->private_data); + + return 0; +} + +static unsigned int vboxsf_get_d_type(u32 mode) +{ + unsigned int d_type; + + switch (mode & SHFL_TYPE_MASK) { + case SHFL_TYPE_FIFO: + d_type = DT_FIFO; + break; + case SHFL_TYPE_DEV_CHAR: + d_type = DT_CHR; + break; + case SHFL_TYPE_DIRECTORY: + d_type = DT_DIR; + break; + case SHFL_TYPE_DEV_BLOCK: + d_type = DT_BLK; + break; + case SHFL_TYPE_FILE: + d_type = DT_REG; + break; + case SHFL_TYPE_SYMLINK: + d_type = DT_LNK; + break; + case SHFL_TYPE_SOCKET: + d_type = DT_SOCK; + break; + case SHFL_TYPE_WHITEOUT: + d_type = DT_WHT; + break; + default: + d_type = DT_UNKNOWN; + break; + } + return d_type; +} + +static bool vboxsf_dir_emit(struct file *dir, struct dir_context *ctx) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(file_inode(dir)->i_sb); + struct vboxsf_dir_info *sf_d = dir->private_data; + struct shfl_dirinfo *info; + struct vboxsf_dir_buf *b; + unsigned int d_type; + loff_t i, cur = 0; + ino_t fake_ino; + void *end; + int err; + + list_for_each_entry(b, &sf_d->info_list, head) { +try_next_entry: + if (ctx->pos >= cur + b->entries) { + cur += b->entries; + continue; + } + + /* + * Note the vboxsf_dir_info objects we are iterating over here + * are variable sized, so the info pointer may end up being + * unaligned. This is how we get the data from the host. + * Since vboxsf is only supported on x86 machines this is not + * a problem. + */ + for (i = 0, info = b->buf; i < ctx->pos - cur; i++) { + end = &info->name.string.utf8[info->name.size]; + /* Only happens if the host gives us corrupt data */ + if (WARN_ON(end > (b->buf + b->used))) + return false; + info = end; + } + + end = &info->name.string.utf8[info->name.size]; + if (WARN_ON(end > (b->buf + b->used))) + return false; + + /* Info now points to the right entry, emit it. */ + d_type = vboxsf_get_d_type(info->info.attr.mode); + + /* + * On 32 bit systems pos is 64 signed, while ino is 32 bit + * unsigned so fake_ino may overflow, check for this. + */ + if ((ino_t)(ctx->pos + 1) != (u64)(ctx->pos + 1)) { + vbg_err("vboxsf: fake ino overflow, truncating dir\n"); + return false; + } + fake_ino = ctx->pos + 1; + + if (sbi->nls) { + char d_name[NAME_MAX]; + + err = vboxsf_nlscpy(sbi, d_name, NAME_MAX, + info->name.string.utf8, + info->name.length); + if (err) { + /* skip erroneous entry and proceed */ + ctx->pos += 1; + goto try_next_entry; + } + + return dir_emit(ctx, d_name, strlen(d_name), + fake_ino, d_type); + } + + return dir_emit(ctx, info->name.string.utf8, info->name.length, + fake_ino, d_type); + } + + return false; +} + +static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx) +{ + bool emitted; + + do { + emitted = vboxsf_dir_emit(dir, ctx); + if (emitted) + ctx->pos += 1; + } while (emitted); + + return 0; +} + +const struct file_operations vboxsf_dir_fops = { + .open = vboxsf_dir_open, + .iterate = vboxsf_dir_iterate, + .release = vboxsf_dir_release, + .read = generic_read_dir, + .llseek = generic_file_llseek, +}; + +/* + * This is called during name resolution/lookup to check if the @dentry in + * the cache is still valid. the job is handled by vboxsf_inode_revalidate. + */ +static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (d_really_is_positive(dentry)) + return vboxsf_inode_revalidate(dentry) == 0; + else + return vboxsf_stat_dentry(dentry, NULL) == -ENOENT; +} + +const struct dentry_operations vboxsf_dentry_ops = { + .d_revalidate = vboxsf_dentry_revalidate +}; + +/* iops */ + +static struct dentry *vboxsf_dir_lookup(struct inode *parent, + struct dentry *dentry, + unsigned int flags) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct shfl_fsobjinfo fsinfo; + struct inode *inode; + int err; + + dentry->d_time = jiffies; + + err = vboxsf_stat_dentry(dentry, &fsinfo); + if (err) { + inode = (err == -ENOENT) ? NULL : ERR_PTR(err); + } else { + inode = vboxsf_new_inode(parent->i_sb); + if (!IS_ERR(inode)) + vboxsf_init_inode(sbi, inode, &fsinfo); + } + + return d_splice_alias(inode, dentry); +} + +static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry, + struct shfl_fsobjinfo *info) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_inode *sf_i; + struct inode *inode; + + inode = vboxsf_new_inode(parent->i_sb); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + sf_i = VBOXSF_I(inode); + /* The host may have given us different attr then requested */ + sf_i->force_restat = 1; + vboxsf_init_inode(sbi, inode, info); + + d_instantiate(dentry, inode); + + return 0; +} + +static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry, + umode_t mode, int is_dir) +{ + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct shfl_createparms params = {}; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW | + SHFL_CF_ACT_FAIL_IF_EXISTS | + SHFL_CF_ACCESS_READWRITE | + (is_dir ? SHFL_CF_DIRECTORY : 0); + params.info.attr.mode = (mode & 0777) | + (is_dir ? SHFL_TYPE_DIRECTORY : SHFL_TYPE_FILE); + params.info.attr.additional = SHFLFSOBJATTRADD_NOTHING; + + err = vboxsf_create_at_dentry(dentry, ¶ms); + if (err) + return err; + + if (params.result != SHFL_FILE_CREATED) + return -EPERM; + + vboxsf_close(sbi->root, params.handle); + + err = vboxsf_dir_instantiate(parent, dentry, ¶ms.info); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + + return 0; +} + +static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry, + umode_t mode, bool excl) +{ + return vboxsf_dir_create(parent, dentry, mode, 0); +} + +static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry, + umode_t mode) +{ + return vboxsf_dir_create(parent, dentry, mode, 1); +} + +static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct inode *inode = d_inode(dentry); + struct shfl_string *path; + u32 flags; + int err; + + if (S_ISDIR(inode->i_mode)) + flags = SHFL_REMOVE_DIR; + else + flags = SHFL_REMOVE_FILE; + + if (S_ISLNK(inode->i_mode)) + flags |= SHFL_REMOVE_SYMLINK; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_remove(sbi->root, path, flags); + __putname(path); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + + return 0; +} + +static int vboxsf_dir_rename(struct inode *old_parent, + struct dentry *old_dentry, + struct inode *new_parent, + struct dentry *new_dentry, + unsigned int flags) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(old_parent->i_sb); + struct vboxsf_inode *sf_old_parent_i = VBOXSF_I(old_parent); + struct vboxsf_inode *sf_new_parent_i = VBOXSF_I(new_parent); + u32 shfl_flags = SHFL_RENAME_FILE | SHFL_RENAME_REPLACE_IF_EXISTS; + struct shfl_string *old_path, *new_path; + int err; + + if (flags) + return -EINVAL; + + old_path = vboxsf_path_from_dentry(sbi, old_dentry); + if (IS_ERR(old_path)) + return PTR_ERR(old_path); + + new_path = vboxsf_path_from_dentry(sbi, new_dentry); + if (IS_ERR(new_path)) { + err = PTR_ERR(new_path); + goto err_put_old_path; + } + + if (d_inode(old_dentry)->i_mode & S_IFDIR) + shfl_flags = 0; + + err = vboxsf_rename(sbi->root, old_path, new_path, shfl_flags); + if (err == 0) { + /* parent directories access/change time changed */ + sf_new_parent_i->force_restat = 1; + sf_old_parent_i->force_restat = 1; + } + + __putname(new_path); +err_put_old_path: + __putname(old_path); + return err; +} + +static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry, + const char *symname) +{ + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + int symname_size = strlen(symname) + 1; + struct shfl_string *path, *ssymname; + struct shfl_fsobjinfo info; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + ssymname = kmalloc(SHFLSTRING_HEADER_SIZE + symname_size, GFP_KERNEL); + if (!ssymname) { + __putname(path); + return -ENOMEM; + } + ssymname->length = symname_size - 1; + ssymname->size = symname_size; + memcpy(ssymname->string.utf8, symname, symname_size); + + err = vboxsf_symlink(sbi->root, path, ssymname, &info); + kfree(ssymname); + __putname(path); + if (err) { + /* -EROFS means symlinks are note support -> -EPERM */ + return (err == -EROFS) ? -EPERM : err; + } + + err = vboxsf_dir_instantiate(parent, dentry, &info); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + return 0; +} + +const struct inode_operations vboxsf_dir_iops = { + .lookup = vboxsf_dir_lookup, + .create = vboxsf_dir_mkfile, + .mkdir = vboxsf_dir_mkdir, + .rmdir = vboxsf_dir_unlink, + .unlink = vboxsf_dir_unlink, + .rename = vboxsf_dir_rename, + .symlink = vboxsf_dir_symlink, + .getattr = vboxsf_getattr, + .setattr = vboxsf_setattr, +}; diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c new file mode 100644 index 000000000000..c4ab5996d97a --- /dev/null +++ b/fs/vboxsf/file.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Regular file inode and file ops. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/mm.h> +#include <linux/page-flags.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/sizes.h> +#include "vfsmod.h" + +struct vboxsf_handle { + u64 handle; + u32 root; + u32 access_flags; + struct kref refcount; + struct list_head head; +}; + +static int vboxsf_file_open(struct inode *inode, struct file *file) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct shfl_createparms params = {}; + struct vboxsf_handle *sf_handle; + u32 access_flags = 0; + int err; + + sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL); + if (!sf_handle) + return -ENOMEM; + + /* + * We check the value of params.handle afterwards to find out if + * the call succeeded or failed, as the API does not seem to cleanly + * distinguish error and informational messages. + * + * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to + * make the shared folders host service use our mode parameter. + */ + params.handle = SHFL_HANDLE_NIL; + if (file->f_flags & O_CREAT) { + params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW; + /* + * We ignore O_EXCL, as the Linux kernel seems to call create + * beforehand itself, so O_EXCL should always fail. + */ + if (file->f_flags & O_TRUNC) + params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + else + params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS; + } else { + params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW; + if (file->f_flags & O_TRUNC) + params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + } + + switch (file->f_flags & O_ACCMODE) { + case O_RDONLY: + access_flags |= SHFL_CF_ACCESS_READ; + break; + + case O_WRONLY: + access_flags |= SHFL_CF_ACCESS_WRITE; + break; + + case O_RDWR: + access_flags |= SHFL_CF_ACCESS_READWRITE; + break; + + default: + WARN_ON(1); + } + + if (file->f_flags & O_APPEND) + access_flags |= SHFL_CF_ACCESS_APPEND; + + params.create_flags |= access_flags; + params.info.attr.mode = inode->i_mode; + + err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); + if (err == 0 && params.handle == SHFL_HANDLE_NIL) + err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT; + if (err) { + kfree(sf_handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + + /* init our handle struct and add it to the inode's handles list */ + sf_handle->handle = params.handle; + sf_handle->root = VBOXSF_SBI(inode->i_sb)->root; + sf_handle->access_flags = access_flags; + kref_init(&sf_handle->refcount); + + mutex_lock(&sf_i->handle_list_mutex); + list_add(&sf_handle->head, &sf_i->handle_list); + mutex_unlock(&sf_i->handle_list_mutex); + + file->private_data = sf_handle; + return 0; +} + +static void vboxsf_handle_release(struct kref *refcount) +{ + struct vboxsf_handle *sf_handle = + container_of(refcount, struct vboxsf_handle, refcount); + + vboxsf_close(sf_handle->root, sf_handle->handle); + kfree(sf_handle); +} + +static int vboxsf_file_release(struct inode *inode, struct file *file) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct vboxsf_handle *sf_handle = file->private_data; + + /* + * When a file is closed on our (the guest) side, we want any subsequent + * accesses done on the host side to see all changes done from our side. + */ + filemap_write_and_wait(inode->i_mapping); + + mutex_lock(&sf_i->handle_list_mutex); + list_del(&sf_handle->head); + mutex_unlock(&sf_i->handle_list_mutex); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); + return 0; +} + +/* + * Write back dirty pages now, because there may not be any suitable + * open files later + */ +static void vboxsf_vma_close(struct vm_area_struct *vma) +{ + filemap_write_and_wait(vma->vm_file->f_mapping); +} + +static const struct vm_operations_struct vboxsf_file_vm_ops = { + .close = vboxsf_vma_close, + .fault = filemap_fault, + .map_pages = filemap_map_pages, +}; + +static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err; + + err = generic_file_mmap(file, vma); + if (!err) + vma->vm_ops = &vboxsf_file_vm_ops; + + return err; +} + +/* + * Note that since we are accessing files on the host's filesystem, files + * may always be changed underneath us by the host! + * + * The vboxsf API between the guest and the host does not offer any functions + * to deal with this. There is no inode-generation to check for changes, no + * events / callback on changes and no way to lock files. + * + * To avoid returning stale data when a file gets *opened* on our (the guest) + * side, we do a "stat" on the host side, then compare the mtime with the + * last known mtime and invalidate the page-cache if they differ. + * This is done from vboxsf_inode_revalidate(). + * + * When reads are done through the read_iter fop, it is possible to do + * further cache revalidation then, there are 3 options to deal with this: + * + * 1) Rely solely on the revalidation done at open time + * 2) Do another "stat" and compare mtime again. Unfortunately the vboxsf + * host API does not allow stat on handles, so we would need to use + * file->f_path.dentry and the stat will then fail if the file was unlinked + * or renamed (and there is no thing like NFS' silly-rename). So we get: + * 2a) "stat" and compare mtime, on stat failure invalidate the cache + * 2b) "stat" and compare mtime, on stat failure do nothing + * 3) Simply always call invalidate_inode_pages2_range on the range of the read + * + * Currently we are keeping things KISS and using option 1. this allows + * directly using generic_file_read_iter without wrapping it. + * + * This means that only data written on the host side before open() on + * the guest side is guaranteed to be seen by the guest. If necessary + * we may provide other read-cache strategies in the future and make this + * configurable through a mount option. + */ +const struct file_operations vboxsf_reg_fops = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = vboxsf_file_mmap, + .open = vboxsf_file_open, + .release = vboxsf_file_release, + .fsync = noop_fsync, + .splice_read = generic_file_splice_read, +}; + +const struct inode_operations vboxsf_reg_iops = { + .getattr = vboxsf_getattr, + .setattr = vboxsf_setattr +}; + +static int vboxsf_readpage(struct file *file, struct page *page) +{ + struct vboxsf_handle *sf_handle = file->private_data; + loff_t off = page_offset(page); + u32 nread = PAGE_SIZE; + u8 *buf; + int err; + + buf = kmap(page); + + err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf); + if (err == 0) { + memset(&buf[nread], 0, PAGE_SIZE - nread); + flush_dcache_page(page); + SetPageUptodate(page); + } else { + SetPageError(page); + } + + kunmap(page); + unlock_page(page); + return err; +} + +static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i) +{ + struct vboxsf_handle *h, *sf_handle = NULL; + + mutex_lock(&sf_i->handle_list_mutex); + list_for_each_entry(h, &sf_i->handle_list, head) { + if (h->access_flags == SHFL_CF_ACCESS_WRITE || + h->access_flags == SHFL_CF_ACCESS_READWRITE) { + kref_get(&h->refcount); + sf_handle = h; + break; + } + } + mutex_unlock(&sf_i->handle_list_mutex); + + return sf_handle; +} + +static int vboxsf_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct vboxsf_handle *sf_handle; + loff_t off = page_offset(page); + loff_t size = i_size_read(inode); + u32 nwrite = PAGE_SIZE; + u8 *buf; + int err; + + if (off + PAGE_SIZE > size) + nwrite = size & ~PAGE_MASK; + + sf_handle = vboxsf_get_write_handle(sf_i); + if (!sf_handle) + return -EBADF; + + buf = kmap(page); + err = vboxsf_write(sf_handle->root, sf_handle->handle, + off, &nwrite, buf); + kunmap(page); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); + + if (err == 0) { + ClearPageError(page); + /* mtime changed */ + sf_i->force_restat = 1; + } else { + ClearPageUptodate(page); + } + + unlock_page(page); + return err; +} + +static int vboxsf_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + struct vboxsf_handle *sf_handle = file->private_data; + unsigned int from = pos & ~PAGE_MASK; + u32 nwritten = len; + u8 *buf; + int err; + + /* zero the stale part of the page if we did a short copy */ + if (!PageUptodate(page) && copied < len) + zero_user(page, from + copied, len - copied); + + buf = kmap(page); + err = vboxsf_write(sf_handle->root, sf_handle->handle, + pos, &nwritten, buf + from); + kunmap(page); + + if (err) { + nwritten = 0; + goto out; + } + + /* mtime changed */ + VBOXSF_I(inode)->force_restat = 1; + + if (!PageUptodate(page) && nwritten == PAGE_SIZE) + SetPageUptodate(page); + + pos += nwritten; + if (pos > inode->i_size) + i_size_write(inode, pos); + +out: + unlock_page(page); + put_page(page); + + return nwritten; +} + +/* + * Note simple_write_begin does not read the page from disk on partial writes + * this is ok since vboxsf_write_end only writes the written parts of the + * page and it does not call SetPageUptodate for partial writes. + */ +const struct address_space_operations vboxsf_reg_aops = { + .readpage = vboxsf_readpage, + .writepage = vboxsf_writepage, + .set_page_dirty = __set_page_dirty_nobuffers, + .write_begin = simple_write_begin, + .write_end = vboxsf_write_end, +}; + +static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_string *path; + char *link; + int err; + + if (!dentry) + return ERR_PTR(-ECHILD); + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return ERR_CAST(path); + + link = kzalloc(PATH_MAX, GFP_KERNEL); + if (!link) { + __putname(path); + return ERR_PTR(-ENOMEM); + } + + err = vboxsf_readlink(sbi->root, path, PATH_MAX, link); + __putname(path); + if (err) { + kfree(link); + return ERR_PTR(err); + } + + set_delayed_call(done, kfree_link, link); + return link; +} + +const struct inode_operations vboxsf_lnk_iops = { + .get_link = vboxsf_get_link +}; diff --git a/fs/vboxsf/shfl_hostintf.h b/fs/vboxsf/shfl_hostintf.h new file mode 100644 index 000000000000..aca829062c12 --- /dev/null +++ b/fs/vboxsf/shfl_hostintf.h @@ -0,0 +1,901 @@ +/* SPDX-License-Identifier: MIT */ +/* + * VirtualBox Shared Folders: host interface definition. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#ifndef SHFL_HOSTINTF_H +#define SHFL_HOSTINTF_H + +#include <linux/vbox_vmmdev_types.h> + +/* The max in/out buffer size for a FN_READ or FN_WRITE call */ +#define SHFL_MAX_RW_COUNT (16 * SZ_1M) + +/* + * Structures shared between guest and the service + * can be relocated and use offsets to point to variable + * length parts. + * + * Shared folders protocol works with handles. + * Before doing any action on a file system object, + * one have to obtain the object handle via a SHFL_FN_CREATE + * request. A handle must be closed with SHFL_FN_CLOSE. + */ + +enum { + SHFL_FN_QUERY_MAPPINGS = 1, /* Query mappings changes. */ + SHFL_FN_QUERY_MAP_NAME = 2, /* Query map name. */ + SHFL_FN_CREATE = 3, /* Open/create object. */ + SHFL_FN_CLOSE = 4, /* Close object handle. */ + SHFL_FN_READ = 5, /* Read object content. */ + SHFL_FN_WRITE = 6, /* Write new object content. */ + SHFL_FN_LOCK = 7, /* Lock/unlock a range in the object. */ + SHFL_FN_LIST = 8, /* List object content. */ + SHFL_FN_INFORMATION = 9, /* Query/set object information. */ + /* Note function number 10 is not used! */ + SHFL_FN_REMOVE = 11, /* Remove object */ + SHFL_FN_MAP_FOLDER_OLD = 12, /* Map folder (legacy) */ + SHFL_FN_UNMAP_FOLDER = 13, /* Unmap folder */ + SHFL_FN_RENAME = 14, /* Rename object */ + SHFL_FN_FLUSH = 15, /* Flush file */ + SHFL_FN_SET_UTF8 = 16, /* Select UTF8 filename encoding */ + SHFL_FN_MAP_FOLDER = 17, /* Map folder */ + SHFL_FN_READLINK = 18, /* Read symlink dest (as of VBox 4.0) */ + SHFL_FN_SYMLINK = 19, /* Create symlink (as of VBox 4.0) */ + SHFL_FN_SET_SYMLINKS = 20, /* Ask host to show symlinks (4.0+) */ +}; + +/* Root handles for a mapping are of type u32, Root handles are unique. */ +#define SHFL_ROOT_NIL UINT_MAX + +/* Shared folders handle for an opened object are of type u64. */ +#define SHFL_HANDLE_NIL ULLONG_MAX + +/* Hardcoded maximum length (in chars) of a shared folder name. */ +#define SHFL_MAX_LEN (256) +/* Hardcoded maximum number of shared folder mapping available to the guest. */ +#define SHFL_MAX_MAPPINGS (64) + +/** Shared folder string buffer structure. */ +struct shfl_string { + /** Allocated size of the string member in bytes. */ + u16 size; + + /** Length of string without trailing nul in bytes. */ + u16 length; + + /** UTF-8 or UTF-16 string. Nul terminated. */ + union { + u8 utf8[2]; + u16 utf16[1]; + u16 ucs2[1]; /* misnomer, use utf16. */ + } string; +}; +VMMDEV_ASSERT_SIZE(shfl_string, 6); + +/* The size of shfl_string w/o the string part. */ +#define SHFLSTRING_HEADER_SIZE 4 + +/* Calculate size of the string. */ +static inline u32 shfl_string_buf_size(const struct shfl_string *string) +{ + return string ? SHFLSTRING_HEADER_SIZE + string->size : 0; +} + +/* Set user id on execution (S_ISUID). */ +#define SHFL_UNIX_ISUID 0004000U +/* Set group id on execution (S_ISGID). */ +#define SHFL_UNIX_ISGID 0002000U +/* Sticky bit (S_ISVTX / S_ISTXT). */ +#define SHFL_UNIX_ISTXT 0001000U + +/* Owner readable (S_IRUSR). */ +#define SHFL_UNIX_IRUSR 0000400U +/* Owner writable (S_IWUSR). */ +#define SHFL_UNIX_IWUSR 0000200U +/* Owner executable (S_IXUSR). */ +#define SHFL_UNIX_IXUSR 0000100U + +/* Group readable (S_IRGRP). */ +#define SHFL_UNIX_IRGRP 0000040U +/* Group writable (S_IWGRP). */ +#define SHFL_UNIX_IWGRP 0000020U +/* Group executable (S_IXGRP). */ +#define SHFL_UNIX_IXGRP 0000010U + +/* Other readable (S_IROTH). */ +#define SHFL_UNIX_IROTH 0000004U +/* Other writable (S_IWOTH). */ +#define SHFL_UNIX_IWOTH 0000002U +/* Other executable (S_IXOTH). */ +#define SHFL_UNIX_IXOTH 0000001U + +/* Named pipe (fifo) (S_IFIFO). */ +#define SHFL_TYPE_FIFO 0010000U +/* Character device (S_IFCHR). */ +#define SHFL_TYPE_DEV_CHAR 0020000U +/* Directory (S_IFDIR). */ +#define SHFL_TYPE_DIRECTORY 0040000U +/* Block device (S_IFBLK). */ +#define SHFL_TYPE_DEV_BLOCK 0060000U +/* Regular file (S_IFREG). */ +#define SHFL_TYPE_FILE 0100000U +/* Symbolic link (S_IFLNK). */ +#define SHFL_TYPE_SYMLINK 0120000U +/* Socket (S_IFSOCK). */ +#define SHFL_TYPE_SOCKET 0140000U +/* Whiteout (S_IFWHT). */ +#define SHFL_TYPE_WHITEOUT 0160000U +/* Type mask (S_IFMT). */ +#define SHFL_TYPE_MASK 0170000U + +/* Checks the mode flags indicate a directory (S_ISDIR). */ +#define SHFL_IS_DIRECTORY(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_DIRECTORY) +/* Checks the mode flags indicate a symbolic link (S_ISLNK). */ +#define SHFL_IS_SYMLINK(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_SYMLINK) + +/** The available additional information in a shfl_fsobjattr object. */ +enum shfl_fsobjattr_add { + /** No additional information is available / requested. */ + SHFLFSOBJATTRADD_NOTHING = 1, + /** + * The additional unix attributes (shfl_fsobjattr::u::unix_attr) are + * available / requested. + */ + SHFLFSOBJATTRADD_UNIX, + /** + * The additional extended attribute size (shfl_fsobjattr::u::size) is + * available / requested. + */ + SHFLFSOBJATTRADD_EASIZE, + /** + * The last valid item (inclusive). + * The valid range is SHFLFSOBJATTRADD_NOTHING thru + * SHFLFSOBJATTRADD_LAST. + */ + SHFLFSOBJATTRADD_LAST = SHFLFSOBJATTRADD_EASIZE, + + /** The usual 32-bit hack. */ + SHFLFSOBJATTRADD_32BIT_SIZE_HACK = 0x7fffffff +}; + +/** + * Additional unix Attributes, these are available when + * shfl_fsobjattr.additional == SHFLFSOBJATTRADD_UNIX. + */ +struct shfl_fsobjattr_unix { + /** + * The user owning the filesystem object (st_uid). + * This field is ~0U if not supported. + */ + u32 uid; + + /** + * The group the filesystem object is assigned (st_gid). + * This field is ~0U if not supported. + */ + u32 gid; + + /** + * Number of hard links to this filesystem object (st_nlink). + * This field is 1 if the filesystem doesn't support hardlinking or + * the information isn't available. + */ + u32 hardlinks; + + /** + * The device number of the device which this filesystem object resides + * on (st_dev). This field is 0 if this information is not available. + */ + u32 inode_id_device; + + /** + * The unique identifier (within the filesystem) of this filesystem + * object (st_ino). Together with inode_id_device, this field can be + * used as a OS wide unique id, when both their values are not 0. + * This field is 0 if the information is not available. + */ + u64 inode_id; + + /** + * User flags (st_flags). + * This field is 0 if this information is not available. + */ + u32 flags; + + /** + * The current generation number (st_gen). + * This field is 0 if this information is not available. + */ + u32 generation_id; + + /** + * The device number of a char. or block device type object (st_rdev). + * This field is 0 if the file isn't a char. or block device or when + * the OS doesn't use the major+minor device idenfication scheme. + */ + u32 device; +} __packed; + +/** Extended attribute size. */ +struct shfl_fsobjattr_easize { + /** Size of EAs. */ + s64 cb; +} __packed; + +/** Shared folder filesystem object attributes. */ +struct shfl_fsobjattr { + /** Mode flags (st_mode). SHFL_UNIX_*, SHFL_TYPE_*, and SHFL_DOS_*. */ + u32 mode; + + /** The additional attributes available. */ + enum shfl_fsobjattr_add additional; + + /** + * Additional attributes. + * + * Unless explicitly specified to an API, the API can provide additional + * data as it is provided by the underlying OS. + */ + union { + struct shfl_fsobjattr_unix unix_attr; + struct shfl_fsobjattr_easize size; + } __packed u; +} __packed; +VMMDEV_ASSERT_SIZE(shfl_fsobjattr, 44); + +struct shfl_timespec { + s64 ns_relative_to_unix_epoch; +}; + +/** Filesystem object information structure. */ +struct shfl_fsobjinfo { + /** + * Logical size (st_size). + * For normal files this is the size of the file. + * For symbolic links, this is the length of the path name contained + * in the symbolic link. + * For other objects this fields needs to be specified. + */ + s64 size; + + /** Disk allocation size (st_blocks * DEV_BSIZE). */ + s64 allocated; + + /** Time of last access (st_atime). */ + struct shfl_timespec access_time; + + /** Time of last data modification (st_mtime). */ + struct shfl_timespec modification_time; + + /** + * Time of last status change (st_ctime). + * If not available this is set to modification_time. + */ + struct shfl_timespec change_time; + + /** + * Time of file birth (st_birthtime). + * If not available this is set to change_time. + */ + struct shfl_timespec birth_time; + + /** Attributes. */ + struct shfl_fsobjattr attr; + +} __packed; +VMMDEV_ASSERT_SIZE(shfl_fsobjinfo, 92); + +/** + * result of an open/create request. + * Along with handle value the result code + * identifies what has happened while + * trying to open the object. + */ +enum shfl_create_result { + SHFL_NO_RESULT, + /** Specified path does not exist. */ + SHFL_PATH_NOT_FOUND, + /** Path to file exists, but the last component does not. */ + SHFL_FILE_NOT_FOUND, + /** File already exists and either has been opened or not. */ + SHFL_FILE_EXISTS, + /** New file was created. */ + SHFL_FILE_CREATED, + /** Existing file was replaced or overwritten. */ + SHFL_FILE_REPLACED +}; + +/* No flags. Initialization value. */ +#define SHFL_CF_NONE (0x00000000) + +/* + * Only lookup the object, do not return a handle. When this is set all other + * flags are ignored. + */ +#define SHFL_CF_LOOKUP (0x00000001) + +/* + * Open parent directory of specified object. + * Useful for the corresponding Windows FSD flag + * and for opening paths like \\dir\\*.* to search the 'dir'. + */ +#define SHFL_CF_OPEN_TARGET_DIRECTORY (0x00000002) + +/* Create/open a directory. */ +#define SHFL_CF_DIRECTORY (0x00000004) + +/* + * Open/create action to do if object exists + * and if the object does not exists. + * REPLACE file means atomically DELETE and CREATE. + * OVERWRITE file means truncating the file to 0 and + * setting new size. + * When opening an existing directory REPLACE and OVERWRITE + * actions are considered invalid, and cause returning + * FILE_EXISTS with NIL handle. + */ +#define SHFL_CF_ACT_MASK_IF_EXISTS (0x000000f0) +#define SHFL_CF_ACT_MASK_IF_NEW (0x00000f00) + +/* What to do if object exists. */ +#define SHFL_CF_ACT_OPEN_IF_EXISTS (0x00000000) +#define SHFL_CF_ACT_FAIL_IF_EXISTS (0x00000010) +#define SHFL_CF_ACT_REPLACE_IF_EXISTS (0x00000020) +#define SHFL_CF_ACT_OVERWRITE_IF_EXISTS (0x00000030) + +/* What to do if object does not exist. */ +#define SHFL_CF_ACT_CREATE_IF_NEW (0x00000000) +#define SHFL_CF_ACT_FAIL_IF_NEW (0x00000100) + +/* Read/write requested access for the object. */ +#define SHFL_CF_ACCESS_MASK_RW (0x00003000) + +/* No access requested. */ +#define SHFL_CF_ACCESS_NONE (0x00000000) +/* Read access requested. */ +#define SHFL_CF_ACCESS_READ (0x00001000) +/* Write access requested. */ +#define SHFL_CF_ACCESS_WRITE (0x00002000) +/* Read/Write access requested. */ +#define SHFL_CF_ACCESS_READWRITE (0x00003000) + +/* Requested share access for the object. */ +#define SHFL_CF_ACCESS_MASK_DENY (0x0000c000) + +/* Allow any access. */ +#define SHFL_CF_ACCESS_DENYNONE (0x00000000) +/* Do not allow read. */ +#define SHFL_CF_ACCESS_DENYREAD (0x00004000) +/* Do not allow write. */ +#define SHFL_CF_ACCESS_DENYWRITE (0x00008000) +/* Do not allow access. */ +#define SHFL_CF_ACCESS_DENYALL (0x0000c000) + +/* Requested access to attributes of the object. */ +#define SHFL_CF_ACCESS_MASK_ATTR (0x00030000) + +/* No access requested. */ +#define SHFL_CF_ACCESS_ATTR_NONE (0x00000000) +/* Read access requested. */ +#define SHFL_CF_ACCESS_ATTR_READ (0x00010000) +/* Write access requested. */ +#define SHFL_CF_ACCESS_ATTR_WRITE (0x00020000) +/* Read/Write access requested. */ +#define SHFL_CF_ACCESS_ATTR_READWRITE (0x00030000) + +/* + * The file is opened in append mode. + * Ignored if SHFL_CF_ACCESS_WRITE is not set. + */ +#define SHFL_CF_ACCESS_APPEND (0x00040000) + +/** Create parameters buffer struct for SHFL_FN_CREATE call */ +struct shfl_createparms { + /** Returned handle of opened object. */ + u64 handle; + + /** Returned result of the operation */ + enum shfl_create_result result; + + /** SHFL_CF_* */ + u32 create_flags; + + /** + * Attributes of object to create and + * returned actual attributes of opened/created object. + */ + struct shfl_fsobjinfo info; +} __packed; + +/** Shared Folder directory information */ +struct shfl_dirinfo { + /** Full information about the object. */ + struct shfl_fsobjinfo info; + /** + * The length of the short field (number of UTF16 chars). + * It is 16-bit for reasons of alignment. + */ + u16 short_name_len; + /** + * The short name for 8.3 compatibility. + * Empty string if not available. + */ + u16 short_name[14]; + struct shfl_string name; +}; + +/** Shared folder filesystem properties. */ +struct shfl_fsproperties { + /** + * The maximum size of a filesystem object name. + * This does not include the '\\0'. + */ + u32 max_component_len; + + /** + * True if the filesystem is remote. + * False if the filesystem is local. + */ + bool remote; + + /** + * True if the filesystem is case sensitive. + * False if the filesystem is case insensitive. + */ + bool case_sensitive; + + /** + * True if the filesystem is mounted read only. + * False if the filesystem is mounted read write. + */ + bool read_only; + + /** + * True if the filesystem can encode unicode object names. + * False if it can't. + */ + bool supports_unicode; + + /** + * True if the filesystem is compresses. + * False if it isn't or we don't know. + */ + bool compressed; + + /** + * True if the filesystem compresses of individual files. + * False if it doesn't or we don't know. + */ + bool file_compression; +}; +VMMDEV_ASSERT_SIZE(shfl_fsproperties, 12); + +struct shfl_volinfo { + s64 total_allocation_bytes; + s64 available_allocation_bytes; + u32 bytes_per_allocation_unit; + u32 bytes_per_sector; + u32 serial; + struct shfl_fsproperties properties; +}; + + +/** SHFL_FN_MAP_FOLDER Parameters structure. */ +struct shfl_map_folder { + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: UTF16 + * Path delimiter + */ + struct vmmdev_hgcm_function_parameter delimiter; + + /** + * pointer, in: SHFLROOT (u32) + * Case senstive flag + */ + struct vmmdev_hgcm_function_parameter case_sensitive; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_MAP_FOLDER (4) + + +/** SHFL_FN_UNMAP_FOLDER Parameters structure. */ +struct shfl_unmap_folder { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_UNMAP_FOLDER (1) + + +/** SHFL_FN_CREATE Parameters structure. */ +struct shfl_create { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, in/out: + * Points to struct shfl_createparms buffer. + */ + struct vmmdev_hgcm_function_parameter parms; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_CREATE (3) + + +/** SHFL_FN_CLOSE Parameters structure. */ +struct shfl_close { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to close. + */ + struct vmmdev_hgcm_function_parameter handle; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_CLOSE (2) + + +/** SHFL_FN_READ Parameters structure. */ +struct shfl_read { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to read from. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value64, in: + * Offset to read from. + */ + struct vmmdev_hgcm_function_parameter offset; + + /** + * value64, in/out: + * Bytes to read/How many were read. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, out: + * Buffer to place data to. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_READ (5) + + +/** SHFL_FN_WRITE Parameters structure. */ +struct shfl_write { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to write to. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value64, in: + * Offset to write to. + */ + struct vmmdev_hgcm_function_parameter offset; + + /** + * value64, in/out: + * Bytes to write/How many were written. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in: + * Data to write. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_WRITE (5) + + +/* + * SHFL_FN_LIST + * Listing information includes variable length RTDIRENTRY[EX] structures. + */ + +#define SHFL_LIST_NONE 0 +#define SHFL_LIST_RETURN_ONE 1 + +/** SHFL_FN_LIST Parameters structure. */ +struct shfl_list { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to be listed. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value32, in: + * List flags SHFL_LIST_*. + */ + struct vmmdev_hgcm_function_parameter flags; + + /** + * value32, in/out: + * Bytes to be used for listing information/How many bytes were used. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in/optional + * Points to struct shfl_string buffer that specifies a search path. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: + * Buffer to place listing information to. (struct shfl_dirinfo) + */ + struct vmmdev_hgcm_function_parameter buffer; + + /** + * value32, in/out: + * Indicates a key where the listing must be resumed. + * in: 0 means start from begin of object. + * out: 0 means listing completed. + */ + struct vmmdev_hgcm_function_parameter resume_point; + + /** + * pointer, out: + * Number of files returned + */ + struct vmmdev_hgcm_function_parameter file_count; +}; + +/* Number of parameters */ +#define SHFL_CPARMS_LIST (8) + + +/** SHFL_FN_READLINK Parameters structure. */ +struct shfl_readLink { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: + * Buffer to place data to. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_READLINK (3) + + +/* SHFL_FN_INFORMATION */ + +/* Mask of Set/Get bit. */ +#define SHFL_INFO_MODE_MASK (0x1) +/* Get information */ +#define SHFL_INFO_GET (0x0) +/* Set information */ +#define SHFL_INFO_SET (0x1) + +/* Get name of the object. */ +#define SHFL_INFO_NAME (0x2) +/* Set size of object (extend/trucate); only applies to file objects */ +#define SHFL_INFO_SIZE (0x4) +/* Get/Set file object info. */ +#define SHFL_INFO_FILE (0x8) +/* Get volume information. */ +#define SHFL_INFO_VOLUME (0x10) + +/** SHFL_FN_INFORMATION Parameters structure. */ +struct shfl_information { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to be listed. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value32, in: + * SHFL_INFO_* + */ + struct vmmdev_hgcm_function_parameter flags; + + /** + * value32, in/out: + * Bytes to be used for information/How many bytes were used. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in/out: + * Information to be set/get (shfl_fsobjinfo or shfl_string). Do not + * forget to set the shfl_fsobjinfo::attr::additional for a get + * operation as well. + */ + struct vmmdev_hgcm_function_parameter info; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_INFORMATION (5) + + +/* SHFL_FN_REMOVE */ + +#define SHFL_REMOVE_FILE (0x1) +#define SHFL_REMOVE_DIR (0x2) +#define SHFL_REMOVE_SYMLINK (0x4) + +/** SHFL_FN_REMOVE Parameters structure. */ +struct shfl_remove { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * value32, in: + * remove flags (file/directory) + */ + struct vmmdev_hgcm_function_parameter flags; + +}; + +#define SHFL_CPARMS_REMOVE (3) + + +/* SHFL_FN_RENAME */ + +#define SHFL_RENAME_FILE (0x1) +#define SHFL_RENAME_DIR (0x2) +#define SHFL_RENAME_REPLACE_IF_EXISTS (0x4) + +/** SHFL_FN_RENAME Parameters structure. */ +struct shfl_rename { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string src. + */ + struct vmmdev_hgcm_function_parameter src; + + /** + * pointer, in: + * Points to struct shfl_string dest. + */ + struct vmmdev_hgcm_function_parameter dest; + + /** + * value32, in: + * rename flags (file/directory) + */ + struct vmmdev_hgcm_function_parameter flags; + +}; + +#define SHFL_CPARMS_RENAME (4) + + +/** SHFL_FN_SYMLINK Parameters structure. */ +struct shfl_symlink { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string of path for the new symlink. + */ + struct vmmdev_hgcm_function_parameter new_path; + + /** + * pointer, in: + * Points to struct shfl_string of destination for symlink. + */ + struct vmmdev_hgcm_function_parameter old_path; + + /** + * pointer, out: + * Information about created symlink. + */ + struct vmmdev_hgcm_function_parameter info; + +}; + +#define SHFL_CPARMS_SYMLINK (4) + +#endif diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c new file mode 100644 index 000000000000..675e26989376 --- /dev/null +++ b/fs/vboxsf/super.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Virtual File System. + * + * Module initialization/finalization + * File system registration/deregistration + * Superblock reading + * Few utility functions + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/idr.h> +#include <linux/fs_parser.h> +#include <linux/magic.h> +#include <linux/module.h> +#include <linux/nls.h> +#include <linux/statfs.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ + +#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000') +#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377') +#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376') +#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375') + +static int follow_symlinks; +module_param(follow_symlinks, int, 0444); +MODULE_PARM_DESC(follow_symlinks, + "Let host resolve symlinks rather than showing them"); + +static DEFINE_IDA(vboxsf_bdi_ida); +static DEFINE_MUTEX(vboxsf_setup_mutex); +static bool vboxsf_setup_done; +static struct super_operations vboxsf_super_ops; /* forward declaration */ +static struct kmem_cache *vboxsf_inode_cachep; + +static char * const vboxsf_default_nls = CONFIG_NLS_DEFAULT; + +enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode, + opt_dmask, opt_fmask }; + +static const struct fs_parameter_spec vboxsf_fs_parameters[] = { + fsparam_string ("nls", opt_nls), + fsparam_u32 ("uid", opt_uid), + fsparam_u32 ("gid", opt_gid), + fsparam_u32 ("ttl", opt_ttl), + fsparam_u32oct ("dmode", opt_dmode), + fsparam_u32oct ("fmode", opt_fmode), + fsparam_u32oct ("dmask", opt_dmask), + fsparam_u32oct ("fmask", opt_fmask), + {} +}; + +static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + kuid_t uid; + kgid_t gid; + int opt; + + opt = fs_parse(fc, vboxsf_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case opt_nls: + if (ctx->nls_name || fc->purpose != FS_CONTEXT_FOR_MOUNT) { + vbg_err("vboxsf: Cannot reconfigure nls option\n"); + return -EINVAL; + } + ctx->nls_name = param->string; + param->string = NULL; + break; + case opt_uid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + return -EINVAL; + ctx->o.uid = uid; + break; + case opt_gid: + gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(gid)) + return -EINVAL; + ctx->o.gid = gid; + break; + case opt_ttl: + ctx->o.ttl = msecs_to_jiffies(result.uint_32); + break; + case opt_dmode: + if (result.uint_32 & ~0777) + return -EINVAL; + ctx->o.dmode = result.uint_32; + ctx->o.dmode_set = true; + break; + case opt_fmode: + if (result.uint_32 & ~0777) + return -EINVAL; + ctx->o.fmode = result.uint_32; + ctx->o.fmode_set = true; + break; + case opt_dmask: + if (result.uint_32 & ~07777) + return -EINVAL; + ctx->o.dmask = result.uint_32; + break; + case opt_fmask: + if (result.uint_32 & ~07777) + return -EINVAL; + ctx->o.fmask = result.uint_32; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + struct shfl_string *folder_name, root_path; + struct vboxsf_sbi *sbi; + struct dentry *droot; + struct inode *iroot; + char *nls_name; + size_t size; + int err; + + if (!fc->source) + return -EINVAL; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + sbi->o = ctx->o; + idr_init(&sbi->ino_idr); + spin_lock_init(&sbi->ino_idr_lock); + sbi->next_generation = 1; + sbi->bdi_id = -1; + + /* Load nls if not utf8 */ + nls_name = ctx->nls_name ? ctx->nls_name : vboxsf_default_nls; + if (strcmp(nls_name, "utf8") != 0) { + if (nls_name == vboxsf_default_nls) + sbi->nls = load_nls_default(); + else + sbi->nls = load_nls(nls_name); + + if (!sbi->nls) { + vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); + err = -EINVAL; + goto fail_free; + } + } + + sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL); + if (sbi->bdi_id < 0) { + err = sbi->bdi_id; + goto fail_free; + } + + err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + if (err) + goto fail_free; + + /* Turn source into a shfl_string and map the folder */ + size = strlen(fc->source) + 1; + folder_name = kmalloc(SHFLSTRING_HEADER_SIZE + size, GFP_KERNEL); + if (!folder_name) { + err = -ENOMEM; + goto fail_free; + } + folder_name->size = size; + folder_name->length = size - 1; + strlcpy(folder_name->string.utf8, fc->source, size); + err = vboxsf_map_folder(folder_name, &sbi->root); + kfree(folder_name); + if (err) { + vbg_err("vboxsf: Host rejected mount of '%s' with error %d\n", + fc->source, err); + goto fail_free; + } + + root_path.length = 1; + root_path.size = 2; + root_path.string.utf8[0] = '/'; + root_path.string.utf8[1] = 0; + err = vboxsf_stat(sbi, &root_path, &sbi->root_info); + if (err) + goto fail_unmap; + + sb->s_magic = VBOXSF_SUPER_MAGIC; + sb->s_blocksize = 1024; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_op = &vboxsf_super_ops; + sb->s_d_op = &vboxsf_dentry_ops; + + iroot = iget_locked(sb, 0); + if (!iroot) { + err = -ENOMEM; + goto fail_unmap; + } + vboxsf_init_inode(sbi, iroot, &sbi->root_info); + unlock_new_inode(iroot); + + droot = d_make_root(iroot); + if (!droot) { + err = -ENOMEM; + goto fail_unmap; + } + + sb->s_root = droot; + sb->s_fs_info = sbi; + return 0; + +fail_unmap: + vboxsf_unmap_folder(sbi->root); +fail_free: + if (sbi->bdi_id >= 0) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); + idr_destroy(&sbi->ino_idr); + kfree(sbi); + return err; +} + +static void vboxsf_inode_init_once(void *data) +{ + struct vboxsf_inode *sf_i = data; + + mutex_init(&sf_i->handle_list_mutex); + inode_init_once(&sf_i->vfs_inode); +} + +static struct inode *vboxsf_alloc_inode(struct super_block *sb) +{ + struct vboxsf_inode *sf_i; + + sf_i = kmem_cache_alloc(vboxsf_inode_cachep, GFP_NOFS); + if (!sf_i) + return NULL; + + sf_i->force_restat = 0; + INIT_LIST_HEAD(&sf_i->handle_list); + + return &sf_i->vfs_inode; +} + +static void vboxsf_free_inode(struct inode *inode) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + unsigned long flags; + + spin_lock_irqsave(&sbi->ino_idr_lock, flags); + idr_remove(&sbi->ino_idr, inode->i_ino); + spin_unlock_irqrestore(&sbi->ino_idr_lock, flags); + kmem_cache_free(vboxsf_inode_cachep, VBOXSF_I(inode)); +} + +static void vboxsf_put_super(struct super_block *sb) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(sb); + + vboxsf_unmap_folder(sbi->root); + if (sbi->bdi_id >= 0) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); + + /* + * vboxsf_free_inode uses the idr, make sure all delayed rcu free + * inodes are flushed. + */ + rcu_barrier(); + idr_destroy(&sbi->ino_idr); + kfree(sbi); +} + +static int vboxsf_statfs(struct dentry *dentry, struct kstatfs *stat) +{ + struct super_block *sb = dentry->d_sb; + struct shfl_volinfo shfl_volinfo; + struct vboxsf_sbi *sbi; + u32 buf_len; + int err; + + sbi = VBOXSF_SBI(sb); + buf_len = sizeof(shfl_volinfo); + err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME, + &buf_len, &shfl_volinfo); + if (err) + return err; + + stat->f_type = VBOXSF_SUPER_MAGIC; + stat->f_bsize = shfl_volinfo.bytes_per_allocation_unit; + + do_div(shfl_volinfo.total_allocation_bytes, + shfl_volinfo.bytes_per_allocation_unit); + stat->f_blocks = shfl_volinfo.total_allocation_bytes; + + do_div(shfl_volinfo.available_allocation_bytes, + shfl_volinfo.bytes_per_allocation_unit); + stat->f_bfree = shfl_volinfo.available_allocation_bytes; + stat->f_bavail = shfl_volinfo.available_allocation_bytes; + + stat->f_files = 1000; + /* + * Don't return 0 here since the guest may then think that it is not + * possible to create any more files. + */ + stat->f_ffree = 1000000; + stat->f_fsid.val[0] = 0; + stat->f_fsid.val[1] = 0; + stat->f_namelen = 255; + return 0; +} + +static struct super_operations vboxsf_super_ops = { + .alloc_inode = vboxsf_alloc_inode, + .free_inode = vboxsf_free_inode, + .put_super = vboxsf_put_super, + .statfs = vboxsf_statfs, +}; + +static int vboxsf_setup(void) +{ + int err; + + mutex_lock(&vboxsf_setup_mutex); + + if (vboxsf_setup_done) + goto success; + + vboxsf_inode_cachep = + kmem_cache_create("vboxsf_inode_cache", + sizeof(struct vboxsf_inode), 0, + (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | + SLAB_ACCOUNT), + vboxsf_inode_init_once); + if (!vboxsf_inode_cachep) { + err = -ENOMEM; + goto fail_nomem; + } + + err = vboxsf_connect(); + if (err) { + vbg_err("vboxsf: err %d connecting to guest PCI-device\n", err); + vbg_err("vboxsf: make sure you are inside a VirtualBox VM\n"); + vbg_err("vboxsf: and check dmesg for vboxguest errors\n"); + goto fail_free_cache; + } + + err = vboxsf_set_utf8(); + if (err) { + vbg_err("vboxsf_setutf8 error %d\n", err); + goto fail_disconnect; + } + + if (!follow_symlinks) { + err = vboxsf_set_symlinks(); + if (err) + vbg_warn("vboxsf: Unable to show symlinks: %d\n", err); + } + + vboxsf_setup_done = true; +success: + mutex_unlock(&vboxsf_setup_mutex); + return 0; + +fail_disconnect: + vboxsf_disconnect(); +fail_free_cache: + kmem_cache_destroy(vboxsf_inode_cachep); +fail_nomem: + mutex_unlock(&vboxsf_setup_mutex); + return err; +} + +static int vboxsf_parse_monolithic(struct fs_context *fc, void *data) +{ + char *options = data; + + if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 && + options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 && + options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 && + options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) { + vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n"); + return -EINVAL; + } + + return generic_parse_monolithic(fc, data); +} + +static int vboxsf_get_tree(struct fs_context *fc) +{ + int err; + + err = vboxsf_setup(); + if (err) + return err; + + return get_tree_nodev(fc, vboxsf_fill_super); +} + +static int vboxsf_reconfigure(struct fs_context *fc) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(fc->root->d_sb); + struct vboxsf_fs_context *ctx = fc->fs_private; + struct inode *iroot = fc->root->d_sb->s_root->d_inode; + + /* Apply changed options to the root inode */ + sbi->o = ctx->o; + vboxsf_init_inode(sbi, iroot, &sbi->root_info); + + return 0; +} + +static void vboxsf_free_fc(struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + + kfree(ctx->nls_name); + kfree(ctx); +} + +static const struct fs_context_operations vboxsf_context_ops = { + .free = vboxsf_free_fc, + .parse_param = vboxsf_parse_param, + .parse_monolithic = vboxsf_parse_monolithic, + .get_tree = vboxsf_get_tree, + .reconfigure = vboxsf_reconfigure, +}; + +static int vboxsf_init_fs_context(struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + current_uid_gid(&ctx->o.uid, &ctx->o.gid); + + fc->fs_private = ctx; + fc->ops = &vboxsf_context_ops; + return 0; +} + +static struct file_system_type vboxsf_fs_type = { + .owner = THIS_MODULE, + .name = "vboxsf", + .init_fs_context = vboxsf_init_fs_context, + .kill_sb = kill_anon_super +}; + +/* Module initialization/finalization handlers */ +static int __init vboxsf_init(void) +{ + return register_filesystem(&vboxsf_fs_type); +} + +static void __exit vboxsf_fini(void) +{ + unregister_filesystem(&vboxsf_fs_type); + + mutex_lock(&vboxsf_setup_mutex); + if (vboxsf_setup_done) { + vboxsf_disconnect(); + /* + * Make sure all delayed rcu free inodes are flushed + * before we destroy the cache. + */ + rcu_barrier(); + kmem_cache_destroy(vboxsf_inode_cachep); + } + mutex_unlock(&vboxsf_setup_mutex); +} + +module_init(vboxsf_init); +module_exit(vboxsf_fini); + +MODULE_DESCRIPTION("Oracle VM VirtualBox Module for Host File System Access"); +MODULE_AUTHOR("Oracle Corporation"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_FS("vboxsf"); diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c new file mode 100644 index 000000000000..96bd160da48b --- /dev/null +++ b/fs/vboxsf/utils.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Utility functions. + * Mainly conversion from/to VirtualBox/Linux data structures. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/namei.h> +#include <linux/nls.h> +#include <linux/sizes.h> +#include <linux/vfs.h> +#include "vfsmod.h" + +struct inode *vboxsf_new_inode(struct super_block *sb) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(sb); + struct inode *inode; + unsigned long flags; + int cursor, ret; + u32 gen; + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&sbi->ino_idr_lock, flags); + cursor = idr_get_cursor(&sbi->ino_idr); + ret = idr_alloc_cyclic(&sbi->ino_idr, inode, 1, 0, GFP_ATOMIC); + if (ret >= 0 && ret < cursor) + sbi->next_generation++; + gen = sbi->next_generation; + spin_unlock_irqrestore(&sbi->ino_idr_lock, flags); + idr_preload_end(); + + if (ret < 0) { + iput(inode); + return ERR_PTR(ret); + } + + inode->i_ino = ret; + inode->i_generation = gen; + return inode; +} + +/* set [inode] attributes based on [info], uid/gid based on [sbi] */ +void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, + const struct shfl_fsobjinfo *info) +{ + const struct shfl_fsobjattr *attr; + s64 allocated; + int mode; + + attr = &info->attr; + +#define mode_set(r) ((attr->mode & (SHFL_UNIX_##r)) ? (S_##r) : 0) + + mode = mode_set(IRUSR); + mode |= mode_set(IWUSR); + mode |= mode_set(IXUSR); + + mode |= mode_set(IRGRP); + mode |= mode_set(IWGRP); + mode |= mode_set(IXGRP); + + mode |= mode_set(IROTH); + mode |= mode_set(IWOTH); + mode |= mode_set(IXOTH); + +#undef mode_set + + /* We use the host-side values for these */ + inode->i_flags |= S_NOATIME | S_NOCMTIME; + inode->i_mapping->a_ops = &vboxsf_reg_aops; + + if (SHFL_IS_DIRECTORY(attr->mode)) { + inode->i_mode = sbi->o.dmode_set ? sbi->o.dmode : mode; + inode->i_mode &= ~sbi->o.dmask; + inode->i_mode |= S_IFDIR; + inode->i_op = &vboxsf_dir_iops; + inode->i_fop = &vboxsf_dir_fops; + /* + * XXX: this probably should be set to the number of entries + * in the directory plus two (. ..) + */ + set_nlink(inode, 1); + } else if (SHFL_IS_SYMLINK(attr->mode)) { + inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode; + inode->i_mode &= ~sbi->o.fmask; + inode->i_mode |= S_IFLNK; + inode->i_op = &vboxsf_lnk_iops; + set_nlink(inode, 1); + } else { + inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode; + inode->i_mode &= ~sbi->o.fmask; + inode->i_mode |= S_IFREG; + inode->i_op = &vboxsf_reg_iops; + inode->i_fop = &vboxsf_reg_fops; + set_nlink(inode, 1); + } + + inode->i_uid = sbi->o.uid; + inode->i_gid = sbi->o.gid; + + inode->i_size = info->size; + inode->i_blkbits = 12; + /* i_blocks always in units of 512 bytes! */ + allocated = info->allocated + 511; + do_div(allocated, 512); + inode->i_blocks = allocated; + + inode->i_atime = ns_to_timespec64( + info->access_time.ns_relative_to_unix_epoch); + inode->i_ctime = ns_to_timespec64( + info->change_time.ns_relative_to_unix_epoch); + inode->i_mtime = ns_to_timespec64( + info->modification_time.ns_relative_to_unix_epoch); +} + +int vboxsf_create_at_dentry(struct dentry *dentry, + struct shfl_createparms *params) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_string *path; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_create(sbi->root, path, params); + __putname(path); + + return err; +} + +int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, + struct shfl_fsobjinfo *info) +{ + struct shfl_createparms params = {}; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW; + + err = vboxsf_create(sbi->root, path, ¶ms); + if (err) + return err; + + if (params.result != SHFL_FILE_EXISTS) + return -ENOENT; + + if (info) + *info = params.info; + + return 0; +} + +int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_string *path; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_stat(sbi, path, info); + __putname(path); + return err; +} + +int vboxsf_inode_revalidate(struct dentry *dentry) +{ + struct vboxsf_sbi *sbi; + struct vboxsf_inode *sf_i; + struct shfl_fsobjinfo info; + struct timespec64 prev_mtime; + struct inode *inode; + int err; + + if (!dentry || !d_really_is_positive(dentry)) + return -EINVAL; + + inode = d_inode(dentry); + prev_mtime = inode->i_mtime; + sf_i = VBOXSF_I(inode); + sbi = VBOXSF_SBI(dentry->d_sb); + if (!sf_i->force_restat) { + if (time_before(jiffies, dentry->d_time + sbi->o.ttl)) + return 0; + } + + err = vboxsf_stat_dentry(dentry, &info); + if (err) + return err; + + dentry->d_time = jiffies; + sf_i->force_restat = 0; + vboxsf_init_inode(sbi, inode, &info); + + /* + * If the file was changed on the host side we need to invalidate the + * page-cache for it. Note this also gets triggered by our own writes, + * this is unavoidable. + */ + if (timespec64_compare(&inode->i_mtime, &prev_mtime) > 0) + invalidate_inode_pages2(inode->i_mapping); + + return 0; +} + +int vboxsf_getattr(const struct path *path, struct kstat *kstat, + u32 request_mask, unsigned int flags) +{ + int err; + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + + switch (flags & AT_STATX_SYNC_TYPE) { + case AT_STATX_DONT_SYNC: + err = 0; + break; + case AT_STATX_FORCE_SYNC: + sf_i->force_restat = 1; + /* fall-through */ + default: + err = vboxsf_inode_revalidate(dentry); + } + if (err) + return err; + + generic_fillattr(d_inode(dentry), kstat); + return 0; +} + +int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry)); + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_createparms params = {}; + struct shfl_fsobjinfo info = {}; + u32 buf_len; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_ACT_OPEN_IF_EXISTS | + SHFL_CF_ACT_FAIL_IF_NEW | + SHFL_CF_ACCESS_ATTR_WRITE; + + /* this is at least required for Posix hosts */ + if (iattr->ia_valid & ATTR_SIZE) + params.create_flags |= SHFL_CF_ACCESS_WRITE; + + err = vboxsf_create_at_dentry(dentry, ¶ms); + if (err || params.result != SHFL_FILE_EXISTS) + return err ? err : -ENOENT; + +#define mode_set(r) ((iattr->ia_mode & (S_##r)) ? SHFL_UNIX_##r : 0) + + /* + * Setting the file size and setting the other attributes has to + * be handled separately. + */ + if (iattr->ia_valid & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME)) { + if (iattr->ia_valid & ATTR_MODE) { + info.attr.mode = mode_set(IRUSR); + info.attr.mode |= mode_set(IWUSR); + info.attr.mode |= mode_set(IXUSR); + info.attr.mode |= mode_set(IRGRP); + info.attr.mode |= mode_set(IWGRP); + info.attr.mode |= mode_set(IXGRP); + info.attr.mode |= mode_set(IROTH); + info.attr.mode |= mode_set(IWOTH); + info.attr.mode |= mode_set(IXOTH); + + if (iattr->ia_mode & S_IFDIR) + info.attr.mode |= SHFL_TYPE_DIRECTORY; + else + info.attr.mode |= SHFL_TYPE_FILE; + } + + if (iattr->ia_valid & ATTR_ATIME) + info.access_time.ns_relative_to_unix_epoch = + timespec64_to_ns(&iattr->ia_atime); + + if (iattr->ia_valid & ATTR_MTIME) + info.modification_time.ns_relative_to_unix_epoch = + timespec64_to_ns(&iattr->ia_mtime); + + /* + * Ignore ctime (inode change time) as it can't be set + * from userland anyway. + */ + + buf_len = sizeof(info); + err = vboxsf_fsinfo(sbi->root, params.handle, + SHFL_INFO_SET | SHFL_INFO_FILE, &buf_len, + &info); + if (err) { + vboxsf_close(sbi->root, params.handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + } + +#undef mode_set + + if (iattr->ia_valid & ATTR_SIZE) { + memset(&info, 0, sizeof(info)); + info.size = iattr->ia_size; + buf_len = sizeof(info); + err = vboxsf_fsinfo(sbi->root, params.handle, + SHFL_INFO_SET | SHFL_INFO_SIZE, &buf_len, + &info); + if (err) { + vboxsf_close(sbi->root, params.handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + } + + vboxsf_close(sbi->root, params.handle); + + /* Update the inode with what the host has actually given us. */ + if (sf_i->force_restat) + vboxsf_inode_revalidate(dentry); + + return 0; +} + +/* + * [dentry] contains string encoded in coding system that corresponds + * to [sbi]->nls, we must convert it to UTF8 here. + * Returns a shfl_string allocated through __getname (must be freed using + * __putname), or an ERR_PTR on error. + */ +struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, + struct dentry *dentry) +{ + struct shfl_string *shfl_path; + int path_len, out_len, nb; + char *buf, *path; + wchar_t uni; + u8 *out; + + buf = __getname(); + if (!buf) + return ERR_PTR(-ENOMEM); + + path = dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR(path)) { + __putname(buf); + return ERR_CAST(path); + } + path_len = strlen(path); + + if (sbi->nls) { + shfl_path = __getname(); + if (!shfl_path) { + __putname(buf); + return ERR_PTR(-ENOMEM); + } + + out = shfl_path->string.utf8; + out_len = PATH_MAX - SHFLSTRING_HEADER_SIZE - 1; + + while (path_len) { + nb = sbi->nls->char2uni(path, path_len, &uni); + if (nb < 0) { + __putname(shfl_path); + __putname(buf); + return ERR_PTR(-EINVAL); + } + path += nb; + path_len -= nb; + + nb = utf32_to_utf8(uni, out, out_len); + if (nb < 0) { + __putname(shfl_path); + __putname(buf); + return ERR_PTR(-ENAMETOOLONG); + } + out += nb; + out_len -= nb; + } + *out = 0; + shfl_path->length = out - shfl_path->string.utf8; + shfl_path->size = shfl_path->length + 1; + __putname(buf); + } else { + if ((SHFLSTRING_HEADER_SIZE + path_len + 1) > PATH_MAX) { + __putname(buf); + return ERR_PTR(-ENAMETOOLONG); + } + /* + * dentry_path stores the name at the end of buf, but the + * shfl_string string we return must be properly aligned. + */ + shfl_path = (struct shfl_string *)buf; + memmove(shfl_path->string.utf8, path, path_len); + shfl_path->string.utf8[path_len] = 0; + shfl_path->length = path_len; + shfl_path->size = path_len + 1; + } + + return shfl_path; +} + +int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len) +{ + const char *in; + char *out; + size_t out_len; + size_t out_bound_len; + size_t in_bound_len; + + in = utf8_name; + in_bound_len = utf8_len; + + out = name; + out_len = 0; + /* Reserve space for terminating 0 */ + out_bound_len = name_bound_len - 1; + + while (in_bound_len) { + int nb; + unicode_t uni; + + nb = utf8_to_utf32(in, in_bound_len, &uni); + if (nb < 0) + return -EINVAL; + + in += nb; + in_bound_len -= nb; + + nb = sbi->nls->uni2char(uni, out, out_bound_len); + if (nb < 0) + return nb; + + out += nb; + out_bound_len -= nb; + out_len += nb; + } + + *out = 0; + + return 0; +} + +static struct vboxsf_dir_buf *vboxsf_dir_buf_alloc(struct list_head *list) +{ + struct vboxsf_dir_buf *b; + + b = kmalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return NULL; + + b->buf = kmalloc(DIR_BUFFER_SIZE, GFP_KERNEL); + if (!b->buf) { + kfree(b); + return NULL; + } + + b->entries = 0; + b->used = 0; + b->free = DIR_BUFFER_SIZE; + list_add(&b->head, list); + + return b; +} + +static void vboxsf_dir_buf_free(struct vboxsf_dir_buf *b) +{ + list_del(&b->head); + kfree(b->buf); + kfree(b); +} + +struct vboxsf_dir_info *vboxsf_dir_info_alloc(void) +{ + struct vboxsf_dir_info *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return NULL; + + INIT_LIST_HEAD(&p->info_list); + return p; +} + +void vboxsf_dir_info_free(struct vboxsf_dir_info *p) +{ + struct list_head *list, *pos, *tmp; + + list = &p->info_list; + list_for_each_safe(pos, tmp, list) { + struct vboxsf_dir_buf *b; + + b = list_entry(pos, struct vboxsf_dir_buf, head); + vboxsf_dir_buf_free(b); + } + kfree(p); +} + +int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, + u64 handle) +{ + struct vboxsf_dir_buf *b; + u32 entries, size; + int err = 0; + void *buf; + + /* vboxsf_dirinfo returns 1 on end of dir */ + while (err == 0) { + b = vboxsf_dir_buf_alloc(&sf_d->info_list); + if (!b) { + err = -ENOMEM; + break; + } + + buf = b->buf; + size = b->free; + + err = vboxsf_dirinfo(sbi->root, handle, NULL, 0, 0, + &size, buf, &entries); + if (err < 0) + break; + + b->entries += entries; + b->free -= size; + b->used += size; + } + + if (b && b->used == 0) + vboxsf_dir_buf_free(b); + + /* -EILSEQ means the host could not translate a filename, ignore */ + if (err > 0 || err == -EILSEQ) + err = 0; + + return err; +} diff --git a/fs/vboxsf/vboxsf_wrappers.c b/fs/vboxsf/vboxsf_wrappers.c new file mode 100644 index 000000000000..bfc78a097dae --- /dev/null +++ b/fs/vboxsf/vboxsf_wrappers.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: MIT +/* + * Wrapper functions for the shfl host calls. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vbox_err.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +#define SHFL_REQUEST \ + (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV_OTHER | \ + VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN) + +static u32 vboxsf_client_id; + +int vboxsf_connect(void) +{ + struct vbg_dev *gdev; + struct vmmdev_hgcm_service_location loc; + int err, vbox_status; + + loc.type = VMMDEV_HGCM_LOC_LOCALHOST_EXISTING; + strcpy(loc.u.localhost.service_name, "VBoxSharedFolders"); + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return -ENODEV; /* No guest-device */ + + err = vbg_hgcm_connect(gdev, SHFL_REQUEST, &loc, + &vboxsf_client_id, &vbox_status); + vbg_put_gdev(gdev); + + return err ? err : vbg_status_code_to_errno(vbox_status); +} + +void vboxsf_disconnect(void) +{ + struct vbg_dev *gdev; + int vbox_status; + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return; /* guest-device is gone, already disconnected */ + + vbg_hgcm_disconnect(gdev, SHFL_REQUEST, vboxsf_client_id, &vbox_status); + vbg_put_gdev(gdev); +} + +static int vboxsf_call(u32 function, void *parms, u32 parm_count, int *status) +{ + struct vbg_dev *gdev; + int err, vbox_status; + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return -ESHUTDOWN; /* guest-dev removed underneath us */ + + err = vbg_hgcm_call(gdev, SHFL_REQUEST, vboxsf_client_id, function, + U32_MAX, parms, parm_count, &vbox_status); + vbg_put_gdev(gdev); + + if (err < 0) + return err; + + if (status) + *status = vbox_status; + + return vbg_status_code_to_errno(vbox_status); +} + +int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root) +{ + struct shfl_map_folder parms; + int err, status; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.path.u.pointer.size = shfl_string_buf_size(folder_name); + parms.path.u.pointer.u.linear_addr = (uintptr_t)folder_name; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = 0; + + parms.delimiter.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.delimiter.u.value32 = '/'; + + parms.case_sensitive.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.case_sensitive.u.value32 = 1; + + err = vboxsf_call(SHFL_FN_MAP_FOLDER, &parms, SHFL_CPARMS_MAP_FOLDER, + &status); + if (err == -ENOSYS && status == VERR_NOT_IMPLEMENTED) + vbg_err("%s: Error host is too old\n", __func__); + + *root = parms.root.u.value32; + return err; +} + +int vboxsf_unmap_folder(u32 root) +{ + struct shfl_unmap_folder parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + return vboxsf_call(SHFL_FN_UNMAP_FOLDER, &parms, + SHFL_CPARMS_UNMAP_FOLDER, NULL); +} + +/** + * vboxsf_create - Create a new file or folder + * @root: Root of the shared folder in which to create the file + * @parsed_path: The path of the file or folder relative to the shared folder + * @param: create_parms Parameters for file/folder creation. + * + * Create a new file or folder or open an existing one in a shared folder. + * Note this function always returns 0 / success unless an exceptional condition + * occurs - out of memory, invalid arguments, etc. If the file or folder could + * not be opened or created, create_parms->handle will be set to + * SHFL_HANDLE_NIL on return. In this case the value in create_parms->result + * provides information as to why (e.g. SHFL_FILE_EXISTS), create_parms->result + * is also set on success as additional information. + * + * Returns: + * 0 or negative errno value. + */ +int vboxsf_create(u32 root, struct shfl_string *parsed_path, + struct shfl_createparms *create_parms) +{ + struct shfl_create parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.parms.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.parms.u.pointer.size = sizeof(struct shfl_createparms); + parms.parms.u.pointer.u.linear_addr = (uintptr_t)create_parms; + + return vboxsf_call(SHFL_FN_CREATE, &parms, SHFL_CPARMS_CREATE, NULL); +} + +int vboxsf_close(u32 root, u64 handle) +{ + struct shfl_close parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + + return vboxsf_call(SHFL_FN_CLOSE, &parms, SHFL_CPARMS_CLOSE, NULL); +} + +int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags) +{ + struct shfl_remove parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + + return vboxsf_call(SHFL_FN_REMOVE, &parms, SHFL_CPARMS_REMOVE, NULL); +} + +int vboxsf_rename(u32 root, struct shfl_string *src_path, + struct shfl_string *dest_path, u32 flags) +{ + struct shfl_rename parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.src.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.src.u.pointer.size = shfl_string_buf_size(src_path); + parms.src.u.pointer.u.linear_addr = (uintptr_t)src_path; + + parms.dest.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.dest.u.pointer.size = shfl_string_buf_size(dest_path); + parms.dest.u.pointer.u.linear_addr = (uintptr_t)dest_path; + + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + + return vboxsf_call(SHFL_FN_RENAME, &parms, SHFL_CPARMS_RENAME, NULL); +} + +int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf) +{ + struct shfl_read parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.offset.u.value64 = offset; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_READ, &parms, SHFL_CPARMS_READ, NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf) +{ + struct shfl_write parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.offset.u.value64 = offset; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_WRITE, &parms, SHFL_CPARMS_WRITE, NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +/* Returns 0 on success, 1 on end-of-dir, negative errno otherwise */ +int vboxsf_dirinfo(u32 root, u64 handle, + struct shfl_string *parsed_path, u32 flags, u32 index, + u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count) +{ + struct shfl_list parms; + int err, status; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + if (parsed_path) { + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + } else { + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_IN; + parms.path.u.pointer.size = 0; + parms.path.u.pointer.u.linear_addr = 0; + } + + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + parms.resume_point.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.resume_point.u.value32 = index; + parms.file_count.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.file_count.u.value32 = 0; /* out parameter only */ + + err = vboxsf_call(SHFL_FN_LIST, &parms, SHFL_CPARMS_LIST, &status); + if (err == -ENODATA && status == VERR_NO_MORE_FILES) + err = 1; + + *buf_len = parms.cb.u.value32; + *file_count = parms.file_count.u.value32; + return err; +} + +int vboxsf_fsinfo(u32 root, u64 handle, u32 flags, + u32 *buf_len, void *buf) +{ + struct shfl_information parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.info.u.pointer.size = *buf_len; + parms.info.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_INFORMATION, &parms, SHFL_CPARMS_INFORMATION, + NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +int vboxsf_readlink(u32 root, struct shfl_string *parsed_path, + u32 buf_len, u8 *buf) +{ + struct shfl_readLink parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + return vboxsf_call(SHFL_FN_READLINK, &parms, SHFL_CPARMS_READLINK, + NULL); +} + +int vboxsf_symlink(u32 root, struct shfl_string *new_path, + struct shfl_string *old_path, struct shfl_fsobjinfo *buf) +{ + struct shfl_symlink parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.new_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.new_path.u.pointer.size = shfl_string_buf_size(new_path); + parms.new_path.u.pointer.u.linear_addr = (uintptr_t)new_path; + + parms.old_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.old_path.u.pointer.size = shfl_string_buf_size(old_path); + parms.old_path.u.pointer.u.linear_addr = (uintptr_t)old_path; + + parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.info.u.pointer.size = sizeof(struct shfl_fsobjinfo); + parms.info.u.pointer.u.linear_addr = (uintptr_t)buf; + + return vboxsf_call(SHFL_FN_SYMLINK, &parms, SHFL_CPARMS_SYMLINK, NULL); +} + +int vboxsf_set_utf8(void) +{ + return vboxsf_call(SHFL_FN_SET_UTF8, NULL, 0, NULL); +} + +int vboxsf_set_symlinks(void) +{ + return vboxsf_call(SHFL_FN_SET_SYMLINKS, NULL, 0, NULL); +} diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h new file mode 100644 index 000000000000..18f95b00fc33 --- /dev/null +++ b/fs/vboxsf/vfsmod.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: MIT */ +/* + * VirtualBox Guest Shared Folders support: module header. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#ifndef VFSMOD_H +#define VFSMOD_H + +#include <linux/backing-dev.h> +#include <linux/idr.h> +#include "shfl_hostintf.h" + +#define DIR_BUFFER_SIZE SZ_16K + +/* The cast is to prevent assignment of void * to pointers of arbitrary type */ +#define VBOXSF_SBI(sb) ((struct vboxsf_sbi *)(sb)->s_fs_info) +#define VBOXSF_I(i) container_of(i, struct vboxsf_inode, vfs_inode) + +struct vboxsf_options { + unsigned long ttl; + kuid_t uid; + kgid_t gid; + bool dmode_set; + bool fmode_set; + umode_t dmode; + umode_t fmode; + umode_t dmask; + umode_t fmask; +}; + +struct vboxsf_fs_context { + struct vboxsf_options o; + char *nls_name; +}; + +/* per-shared folder information */ +struct vboxsf_sbi { + struct vboxsf_options o; + struct shfl_fsobjinfo root_info; + struct idr ino_idr; + spinlock_t ino_idr_lock; /* This protects ino_idr */ + struct nls_table *nls; + u32 next_generation; + u32 root; + int bdi_id; +}; + +/* per-inode information */ +struct vboxsf_inode { + /* some information was changed, update data on next revalidate */ + int force_restat; + /* list of open handles for this inode + lock protecting it */ + struct list_head handle_list; + /* This mutex protects handle_list accesses */ + struct mutex handle_list_mutex; + /* The VFS inode struct */ + struct inode vfs_inode; +}; + +struct vboxsf_dir_info { + struct list_head info_list; +}; + +struct vboxsf_dir_buf { + size_t entries; + size_t free; + size_t used; + void *buf; + struct list_head head; +}; + +/* globals */ +extern const struct inode_operations vboxsf_dir_iops; +extern const struct inode_operations vboxsf_lnk_iops; +extern const struct inode_operations vboxsf_reg_iops; +extern const struct file_operations vboxsf_dir_fops; +extern const struct file_operations vboxsf_reg_fops; +extern const struct address_space_operations vboxsf_reg_aops; +extern const struct dentry_operations vboxsf_dentry_ops; + +/* from utils.c */ +struct inode *vboxsf_new_inode(struct super_block *sb); +void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, + const struct shfl_fsobjinfo *info); +int vboxsf_create_at_dentry(struct dentry *dentry, + struct shfl_createparms *params); +int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, + struct shfl_fsobjinfo *info); +int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info); +int vboxsf_inode_revalidate(struct dentry *dentry); +int vboxsf_getattr(const struct path *path, struct kstat *kstat, + u32 request_mask, unsigned int query_flags); +int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr); +struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, + struct dentry *dentry); +int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len); +struct vboxsf_dir_info *vboxsf_dir_info_alloc(void); +void vboxsf_dir_info_free(struct vboxsf_dir_info *p); +int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, + u64 handle); + +/* from vboxsf_wrappers.c */ +int vboxsf_connect(void); +void vboxsf_disconnect(void); + +int vboxsf_create(u32 root, struct shfl_string *parsed_path, + struct shfl_createparms *create_parms); + +int vboxsf_close(u32 root, u64 handle); +int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags); +int vboxsf_rename(u32 root, struct shfl_string *src_path, + struct shfl_string *dest_path, u32 flags); + +int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf); +int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf); + +int vboxsf_dirinfo(u32 root, u64 handle, + struct shfl_string *parsed_path, u32 flags, u32 index, + u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count); +int vboxsf_fsinfo(u32 root, u64 handle, u32 flags, + u32 *buf_len, void *buf); + +int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root); +int vboxsf_unmap_folder(u32 root); + +int vboxsf_readlink(u32 root, struct shfl_string *parsed_path, + u32 buf_len, u8 *buf); +int vboxsf_symlink(u32 root, struct shfl_string *new_path, + struct shfl_string *old_path, struct shfl_fsobjinfo *buf); + +int vboxsf_set_utf8(void); +int vboxsf_set_symlinks(void); + +#endif diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3a688eb5c5ae..58e937be24ce 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -587,7 +587,7 @@ xfs_dax_writepages( xfs_iflags_clear(ip, XFS_ITRUNCATED); return dax_writeback_mapping_range(mapping, - xfs_inode_buftarg(ip)->bt_bdev, wbc); + xfs_inode_buftarg(ip)->bt_daxdev, wbc); } STATIC sector_t diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig new file mode 100644 index 000000000000..fb87ad372e29 --- /dev/null +++ b/fs/zonefs/Kconfig @@ -0,0 +1,9 @@ +config ZONEFS_FS + tristate "zonefs filesystem support" + depends on BLOCK + depends on BLK_DEV_ZONED + help + zonefs is a simple file system which exposes zones of a zoned block + device (e.g. host-managed or host-aware SMR disk drives) as files. + + If unsure, say N. diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile new file mode 100644 index 000000000000..75a380aa1ae1 --- /dev/null +++ b/fs/zonefs/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ZONEFS_FS) += zonefs.o + +zonefs-y := super.o diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c new file mode 100644 index 000000000000..8bc6ef82d693 --- /dev/null +++ b/fs/zonefs/super.c @@ -0,0 +1,1439 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple file system for zoned block devices exposing zones as files. + * + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + */ +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/magic.h> +#include <linux/iomap.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/statfs.h> +#include <linux/writeback.h> +#include <linux/quotaops.h> +#include <linux/seq_file.h> +#include <linux/parser.h> +#include <linux/uio.h> +#include <linux/mman.h> +#include <linux/sched/mm.h> +#include <linux/crc32.h> + +#include "zonefs.h" + +static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + + /* All I/Os should always be within the file maximum size */ + if (WARN_ON_ONCE(offset + length > zi->i_max_size)) + return -EIO; + + /* + * Sequential zones can only accept direct writes. This is already + * checked when writes are issued, so warn if we see a page writeback + * operation. + */ + if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ && + (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT))) + return -EIO; + + /* + * For conventional zones, all blocks are always mapped. For sequential + * zones, all blocks after always mapped below the inode size (zone + * write pointer) and unwriten beyond. + */ + mutex_lock(&zi->i_truncate_mutex); + isize = i_size_read(inode); + if (offset >= isize) + iomap->type = IOMAP_UNWRITTEN; + else + iomap->type = IOMAP_MAPPED; + if (flags & IOMAP_WRITE) + length = zi->i_max_size - offset; + else + length = min(length, isize - offset); + mutex_unlock(&zi->i_truncate_mutex); + + iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); + iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset; + iomap->bdev = inode->i_sb->s_bdev; + iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; + + return 0; +} + +static const struct iomap_ops zonefs_iomap_ops = { + .iomap_begin = zonefs_iomap_begin, +}; + +static int zonefs_readpage(struct file *unused, struct page *page) +{ + return iomap_readpage(page, &zonefs_iomap_ops); +} + +static int zonefs_readpages(struct file *unused, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + return iomap_readpages(mapping, pages, nr_pages, &zonefs_iomap_ops); +} + +/* + * Map blocks for page writeback. This is used only on conventional zone files, + * which implies that the page range can only be within the fixed inode size. + */ +static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, + struct inode *inode, loff_t offset) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV)) + return -EIO; + if (WARN_ON_ONCE(offset >= i_size_read(inode))) + return -EIO; + + /* If the mapping is already OK, nothing needs to be done */ + if (offset >= wpc->iomap.offset && + offset < wpc->iomap.offset + wpc->iomap.length) + return 0; + + return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset, + IOMAP_WRITE, &wpc->iomap, NULL); +} + +static const struct iomap_writeback_ops zonefs_writeback_ops = { + .map_blocks = zonefs_map_blocks, +}; + +static int zonefs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct iomap_writepage_ctx wpc = { }; + + return iomap_writepage(page, wbc, &wpc, &zonefs_writeback_ops); +} + +static int zonefs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct iomap_writepage_ctx wpc = { }; + + return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops); +} + +static const struct address_space_operations zonefs_file_aops = { + .readpage = zonefs_readpage, + .readpages = zonefs_readpages, + .writepage = zonefs_writepage, + .writepages = zonefs_writepages, + .set_page_dirty = iomap_set_page_dirty, + .releasepage = iomap_releasepage, + .invalidatepage = iomap_invalidatepage, + .migratepage = iomap_migrate_page, + .is_partially_uptodate = iomap_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, + .direct_IO = noop_direct_IO, +}; + +static void zonefs_update_stats(struct inode *inode, loff_t new_isize) +{ + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + loff_t old_isize = i_size_read(inode); + loff_t nr_blocks; + + if (new_isize == old_isize) + return; + + spin_lock(&sbi->s_lock); + + /* + * This may be called for an update after an IO error. + * So beware of the values seen. + */ + if (new_isize < old_isize) { + nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits; + if (sbi->s_used_blocks > nr_blocks) + sbi->s_used_blocks -= nr_blocks; + else + sbi->s_used_blocks = 0; + } else { + sbi->s_used_blocks += + (new_isize - old_isize) >> sb->s_blocksize_bits; + if (sbi->s_used_blocks > sbi->s_blocks) + sbi->s_used_blocks = sbi->s_blocks; + } + + spin_unlock(&sbi->s_lock); +} + +/* + * Check a zone condition and adjust its file inode access permissions for + * offline and readonly zones. Return the inode size corresponding to the + * amount of readable data in the zone. + */ +static loff_t zonefs_check_zone_condition(struct inode *inode, + struct blk_zone *zone, bool warn) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + switch (zone->cond) { + case BLK_ZONE_COND_OFFLINE: + /* + * Dead zone: make the inode immutable, disable all accesses + * and set the file size to 0 (zone wp set to zone start). + */ + if (warn) + zonefs_warn(inode->i_sb, "inode %lu: offline zone\n", + inode->i_ino); + inode->i_flags |= S_IMMUTABLE; + inode->i_mode &= ~0777; + zone->wp = zone->start; + return 0; + case BLK_ZONE_COND_READONLY: + /* Do not allow writes in read-only zones */ + if (warn) + zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n", + inode->i_ino); + inode->i_flags |= S_IMMUTABLE; + inode->i_mode &= ~0222; + /* fallthrough */ + default: + if (zi->i_ztype == ZONEFS_ZTYPE_CNV) + return zi->i_max_size; + return (zone->wp - zone->start) << SECTOR_SHIFT; + } +} + +struct zonefs_ioerr_data { + struct inode *inode; + bool write; +}; + +static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct zonefs_ioerr_data *err = data; + struct inode *inode = err->inode; + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + loff_t isize, data_size; + + /* + * Check the zone condition: if the zone is not "bad" (offline or + * read-only), read errors are simply signaled to the IO issuer as long + * as there is no inconsistency between the inode size and the amount of + * data writen in the zone (data_size). + */ + data_size = zonefs_check_zone_condition(inode, zone, true); + isize = i_size_read(inode); + if (zone->cond != BLK_ZONE_COND_OFFLINE && + zone->cond != BLK_ZONE_COND_READONLY && + !err->write && isize == data_size) + return 0; + + /* + * At this point, we detected either a bad zone or an inconsistency + * between the inode size and the amount of data written in the zone. + * For the latter case, the cause may be a write IO error or an external + * action on the device. Two error patterns exist: + * 1) The inode size is lower than the amount of data in the zone: + * a write operation partially failed and data was writen at the end + * of the file. This can happen in the case of a large direct IO + * needing several BIOs and/or write requests to be processed. + * 2) The inode size is larger than the amount of data in the zone: + * this can happen with a deferred write error with the use of the + * device side write cache after getting successful write IO + * completions. Other possibilities are (a) an external corruption, + * e.g. an application reset the zone directly, or (b) the device + * has a serious problem (e.g. firmware bug). + * + * In all cases, warn about inode size inconsistency and handle the + * IO error according to the zone condition and to the mount options. + */ + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size) + zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", + inode->i_ino, isize, data_size); + + /* + * First handle bad zones signaled by hardware. The mount options + * errors=zone-ro and errors=zone-offline result in changing the + * zone condition to read-only and offline respectively, as if the + * condition was signaled by the hardware. + */ + if (zone->cond == BLK_ZONE_COND_OFFLINE || + sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) { + zonefs_warn(sb, "inode %lu: read/write access disabled\n", + inode->i_ino); + if (zone->cond != BLK_ZONE_COND_OFFLINE) { + zone->cond = BLK_ZONE_COND_OFFLINE; + data_size = zonefs_check_zone_condition(inode, zone, + false); + } + } else if (zone->cond == BLK_ZONE_COND_READONLY || + sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) { + zonefs_warn(sb, "inode %lu: write access disabled\n", + inode->i_ino); + if (zone->cond != BLK_ZONE_COND_READONLY) { + zone->cond = BLK_ZONE_COND_READONLY; + data_size = zonefs_check_zone_condition(inode, zone, + false); + } + } + + /* + * If error=remount-ro was specified, any error result in remounting + * the volume as read-only. + */ + if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) { + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + } + + /* + * Update block usage stats and the inode size to prevent access to + * invalid data. + */ + zonefs_update_stats(inode, data_size); + i_size_write(inode, data_size); + zi->i_wpoffset = data_size; + + return 0; +} + +/* + * When an file IO error occurs, check the file zone to see if there is a change + * in the zone condition (e.g. offline or read-only). For a failed write to a + * sequential zone, the zone write pointer position must also be checked to + * eventually correct the file size and zonefs inode write pointer offset + * (which can be out of sync with the drive due to partial write failures). + */ +static void zonefs_io_error(struct inode *inode, bool write) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + unsigned int noio_flag; + unsigned int nr_zones = + zi->i_max_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + struct zonefs_ioerr_data err = { + .inode = inode, + .write = write, + }; + int ret; + + mutex_lock(&zi->i_truncate_mutex); + + /* + * Memory allocations in blkdev_report_zones() can trigger a memory + * reclaim which may in turn cause a recursion into zonefs as well as + * struct request allocations for the same device. The former case may + * end up in a deadlock on the inode truncate mutex, while the latter + * may prevent IO forward progress. Executing the report zones under + * the GFP_NOIO context avoids both problems. + */ + noio_flag = memalloc_noio_save(); + ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones, + zonefs_io_error_cb, &err); + if (ret != nr_zones) + zonefs_err(sb, "Get inode %lu zone information failed %d\n", + inode->i_ino, ret); + memalloc_noio_restore(noio_flag); + + mutex_unlock(&zi->i_truncate_mutex); +} + +static int zonefs_file_truncate(struct inode *inode, loff_t isize) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + loff_t old_isize; + enum req_opf op; + int ret = 0; + + /* + * Only sequential zone files can be truncated and truncation is allowed + * only down to a 0 size, which is equivalent to a zone reset, and to + * the maximum file size, which is equivalent to a zone finish. + */ + if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) + return -EPERM; + + if (!isize) + op = REQ_OP_ZONE_RESET; + else if (isize == zi->i_max_size) + op = REQ_OP_ZONE_FINISH; + else + return -EPERM; + + inode_dio_wait(inode); + + /* Serialize against page faults */ + down_write(&zi->i_mmap_sem); + + /* Serialize against zonefs_iomap_begin() */ + mutex_lock(&zi->i_truncate_mutex); + + old_isize = i_size_read(inode); + if (isize == old_isize) + goto unlock; + + ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, + zi->i_max_size >> SECTOR_SHIFT, GFP_NOFS); + if (ret) { + zonefs_err(inode->i_sb, + "Zone management operation at %llu failed %d", + zi->i_zsector, ret); + goto unlock; + } + + zonefs_update_stats(inode, isize); + truncate_setsize(inode, isize); + zi->i_wpoffset = isize; + +unlock: + mutex_unlock(&zi->i_truncate_mutex); + up_write(&zi->i_mmap_sem); + + return ret; +} + +static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = d_inode(dentry); + int ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + ret = setattr_prepare(dentry, iattr); + if (ret) + return ret; + + /* + * Since files and directories cannot be created nor deleted, do not + * allow setting any write attributes on the sub-directories grouping + * files by zone type. + */ + if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && + (iattr->ia_mode & 0222)) + return -EPERM; + + if (((iattr->ia_valid & ATTR_UID) && + !uid_eq(iattr->ia_uid, inode->i_uid)) || + ((iattr->ia_valid & ATTR_GID) && + !gid_eq(iattr->ia_gid, inode->i_gid))) { + ret = dquot_transfer(inode, iattr); + if (ret) + return ret; + } + + if (iattr->ia_valid & ATTR_SIZE) { + ret = zonefs_file_truncate(inode, iattr->ia_size); + if (ret) + return ret; + } + + setattr_copy(inode, iattr); + + return 0; +} + +static const struct inode_operations zonefs_file_inode_operations = { + .setattr = zonefs_inode_setattr, +}; + +static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct inode *inode = file_inode(file); + int ret = 0; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + /* + * Since only direct writes are allowed in sequential files, page cache + * flush is needed only for conventional zone files. + */ + if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV) + ret = file_write_and_wait_range(file, start, end); + if (!ret) + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + + if (ret) + zonefs_io_error(inode, true); + + return ret; +} + +static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf) +{ + struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file)); + vm_fault_t ret; + + down_read(&zi->i_mmap_sem); + ret = filemap_fault(vmf); + up_read(&zi->i_mmap_sem); + + return ret; +} + +static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + vm_fault_t ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return VM_FAULT_SIGBUS; + + /* + * Sanity check: only conventional zone files can have shared + * writeable mappings. + */ + if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV)) + return VM_FAULT_NOPAGE; + + sb_start_pagefault(inode->i_sb); + file_update_time(vmf->vma->vm_file); + + /* Serialize against truncates */ + down_read(&zi->i_mmap_sem); + ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); + up_read(&zi->i_mmap_sem); + + sb_end_pagefault(inode->i_sb); + return ret; +} + +static const struct vm_operations_struct zonefs_file_vm_ops = { + .fault = zonefs_filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = zonefs_filemap_page_mkwrite, +}; + +static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* + * Conventional zones accept random writes, so their files can support + * shared writable mappings. For sequential zone files, only read + * mappings are possible since there are no guarantees for write + * ordering between msync() and page cache writeback. + */ + if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ && + (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + return -EINVAL; + + file_accessed(file); + vma->vm_ops = &zonefs_file_vm_ops; + + return 0; +} + +static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence) +{ + loff_t isize = i_size_read(file_inode(file)); + + /* + * Seeks are limited to below the zone size for conventional zones + * and below the zone write pointer for sequential zones. In both + * cases, this limit is the inode size. + */ + return generic_file_llseek_size(file, offset, whence, isize, isize); +} + +static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (error) { + zonefs_io_error(inode, true); + return error; + } + + if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) { + /* + * Note that we may be seeing completions out of order, + * but that is not a problem since a write completed + * successfully necessarily means that all preceding writes + * were also successful. So we can safely increase the inode + * size to the write end location. + */ + mutex_lock(&zi->i_truncate_mutex); + if (i_size_read(inode) < iocb->ki_pos + size) { + zonefs_update_stats(inode, iocb->ki_pos + size); + i_size_write(inode, iocb->ki_pos + size); + } + mutex_unlock(&zi->i_truncate_mutex); + } + + return 0; +} + +static const struct iomap_dio_ops zonefs_write_dio_ops = { + .end_io = zonefs_file_write_dio_end_io, +}; + +/* + * Handle direct writes. For sequential zone files, this is the only possible + * write path. For these files, check that the user is issuing writes + * sequentially from the end of the file. This code assumes that the block layer + * delivers write requests to the device in sequential order. This is always the + * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE + * elevator feature is being used (e.g. mq-deadline). The block layer always + * automatically select such an elevator for zoned block devices during the + * device initialization. + */ +static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + size_t count; + ssize_t ret; + + /* + * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT + * as this can cause write reordering (e.g. the first aio gets EAGAIN + * on the inode lock but the second goes through but is now unaligned). + */ + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) + && (iocb->ki_flags & IOCB_NOWAIT)) + iocb->ki_flags &= ~IOCB_NOWAIT; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) + return -EAGAIN; + } else { + inode_lock(inode); + } + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto inode_unlock; + + iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); + count = iov_iter_count(from); + + if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { + ret = -EINVAL; + goto inode_unlock; + } + + /* Enforce sequential writes (append only) in sequential zones */ + mutex_lock(&zi->i_truncate_mutex); + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && iocb->ki_pos != zi->i_wpoffset) { + mutex_unlock(&zi->i_truncate_mutex); + ret = -EINVAL; + goto inode_unlock; + } + mutex_unlock(&zi->i_truncate_mutex); + + ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, + &zonefs_write_dio_ops, is_sync_kiocb(iocb)); + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && + (ret > 0 || ret == -EIOCBQUEUED)) { + if (ret > 0) + count = ret; + mutex_lock(&zi->i_truncate_mutex); + zi->i_wpoffset += count; + mutex_unlock(&zi->i_truncate_mutex); + } + +inode_unlock: + inode_unlock(inode); + + return ret; +} + +static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, + struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + ssize_t ret; + + /* + * Direct IO writes are mandatory for sequential zone files so that the + * write IO issuing order is preserved. + */ + if (zi->i_ztype != ZONEFS_ZTYPE_CNV) + return -EIO; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) + return -EAGAIN; + } else { + inode_lock(inode); + } + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto inode_unlock; + + iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); + + ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); + if (ret > 0) + iocb->ki_pos += ret; + else if (ret == -EIO) + zonefs_io_error(inode, true); + +inode_unlock: + inode_unlock(inode); + if (ret > 0) + ret = generic_write_sync(iocb, ret); + + return ret; +} + +static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (sb_rdonly(inode->i_sb)) + return -EROFS; + + /* Write operations beyond the zone size are not allowed */ + if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size) + return -EFBIG; + + if (iocb->ki_flags & IOCB_DIRECT) + return zonefs_file_dio_write(iocb, from); + + return zonefs_file_buffered_write(iocb, from); +} + +static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + if (error) { + zonefs_io_error(file_inode(iocb->ki_filp), false); + return error; + } + + return 0; +} + +static const struct iomap_dio_ops zonefs_read_dio_ops = { + .end_io = zonefs_file_read_dio_end_io, +}; + +static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + ssize_t ret; + + /* Offline zones cannot be read */ + if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777))) + return -EPERM; + + if (iocb->ki_pos >= zi->i_max_size) + return 0; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) + return -EAGAIN; + } else { + inode_lock_shared(inode); + } + + /* Limit read operations to written data */ + mutex_lock(&zi->i_truncate_mutex); + isize = i_size_read(inode); + if (iocb->ki_pos >= isize) { + mutex_unlock(&zi->i_truncate_mutex); + ret = 0; + goto inode_unlock; + } + iov_iter_truncate(to, isize - iocb->ki_pos); + mutex_unlock(&zi->i_truncate_mutex); + + if (iocb->ki_flags & IOCB_DIRECT) { + size_t count = iov_iter_count(to); + + if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { + ret = -EINVAL; + goto inode_unlock; + } + file_accessed(iocb->ki_filp); + ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, + &zonefs_read_dio_ops, is_sync_kiocb(iocb)); + } else { + ret = generic_file_read_iter(iocb, to); + if (ret == -EIO) + zonefs_io_error(inode, false); + } + +inode_unlock: + inode_unlock_shared(inode); + + return ret; +} + +static const struct file_operations zonefs_file_operations = { + .open = generic_file_open, + .fsync = zonefs_file_fsync, + .mmap = zonefs_file_mmap, + .llseek = zonefs_file_llseek, + .read_iter = zonefs_file_read_iter, + .write_iter = zonefs_file_write_iter, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, + .iopoll = iomap_dio_iopoll, +}; + +static struct kmem_cache *zonefs_inode_cachep; + +static struct inode *zonefs_alloc_inode(struct super_block *sb) +{ + struct zonefs_inode_info *zi; + + zi = kmem_cache_alloc(zonefs_inode_cachep, GFP_KERNEL); + if (!zi) + return NULL; + + inode_init_once(&zi->i_vnode); + mutex_init(&zi->i_truncate_mutex); + init_rwsem(&zi->i_mmap_sem); + + return &zi->i_vnode; +} + +static void zonefs_free_inode(struct inode *inode) +{ + kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode)); +} + +/* + * File system stat. + */ +static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + enum zonefs_ztype t; + u64 fsid; + + buf->f_type = ZONEFS_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_namelen = ZONEFS_NAME_MAX; + + spin_lock(&sbi->s_lock); + + buf->f_blocks = sbi->s_blocks; + if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks)) + buf->f_bfree = 0; + else + buf->f_bfree = buf->f_blocks - sbi->s_used_blocks; + buf->f_bavail = buf->f_bfree; + + for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { + if (sbi->s_nr_files[t]) + buf->f_files += sbi->s_nr_files[t] + 1; + } + buf->f_ffree = 0; + + spin_unlock(&sbi->s_lock); + + fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^ + le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64)); + buf->f_fsid.val[0] = (u32)fsid; + buf->f_fsid.val[1] = (u32)(fsid >> 32); + + return 0; +} + +enum { + Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair, + Opt_err, +}; + +static const match_table_t tokens = { + { Opt_errors_ro, "errors=remount-ro"}, + { Opt_errors_zro, "errors=zone-ro"}, + { Opt_errors_zol, "errors=zone-offline"}, + { Opt_errors_repair, "errors=repair"}, + { Opt_err, NULL} +}; + +static int zonefs_parse_options(struct super_block *sb, char *options) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_errors_ro: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO; + break; + case Opt_errors_zro: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO; + break; + case Opt_errors_zol: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL; + break; + case Opt_errors_repair: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static int zonefs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); + + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) + seq_puts(seq, ",errors=remount-ro"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) + seq_puts(seq, ",errors=zone-ro"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) + seq_puts(seq, ",errors=zone-offline"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) + seq_puts(seq, ",errors=repair"); + + return 0; +} + +static int zonefs_remount(struct super_block *sb, int *flags, char *data) +{ + sync_filesystem(sb); + + return zonefs_parse_options(sb, data); +} + +static const struct super_operations zonefs_sops = { + .alloc_inode = zonefs_alloc_inode, + .free_inode = zonefs_free_inode, + .statfs = zonefs_statfs, + .remount_fs = zonefs_remount, + .show_options = zonefs_show_options, +}; + +static const struct inode_operations zonefs_dir_inode_operations = { + .lookup = simple_lookup, + .setattr = zonefs_inode_setattr, +}; + +static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, + enum zonefs_ztype type) +{ + struct super_block *sb = parent->i_sb; + + inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1; + inode_init_owner(inode, parent, S_IFDIR | 0555); + inode->i_op = &zonefs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + set_nlink(inode, 2); + inc_nlink(parent); +} + +static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, + enum zonefs_ztype type) +{ + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; + inode->i_mode = S_IFREG | sbi->s_perm; + + zi->i_ztype = type; + zi->i_zsector = zone->start; + zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, + zone->len << SECTOR_SHIFT); + zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true); + + inode->i_uid = sbi->s_uid; + inode->i_gid = sbi->s_gid; + inode->i_size = zi->i_wpoffset; + inode->i_blocks = zone->len; + + inode->i_op = &zonefs_file_inode_operations; + inode->i_fop = &zonefs_file_operations; + inode->i_mapping->a_ops = &zonefs_file_aops; + + sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); + sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; + sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; +} + +static struct dentry *zonefs_create_inode(struct dentry *parent, + const char *name, struct blk_zone *zone, + enum zonefs_ztype type) +{ + struct inode *dir = d_inode(parent); + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = new_inode(parent->d_sb); + if (!inode) + goto dput; + + inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; + if (zone) + zonefs_init_file_inode(inode, zone, type); + else + zonefs_init_dir_inode(dir, inode, type); + d_add(dentry, inode); + dir->i_size++; + + return dentry; + +dput: + dput(dentry); + + return NULL; +} + +struct zonefs_zone_data { + struct super_block *sb; + unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; + struct blk_zone *zones; +}; + +/* + * Create a zone group and populate it with zone files. + */ +static int zonefs_create_zgroup(struct zonefs_zone_data *zd, + enum zonefs_ztype type) +{ + struct super_block *sb = zd->sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct blk_zone *zone, *next, *end; + const char *zgroup_name; + char *file_name; + struct dentry *dir; + unsigned int n = 0; + int ret = -ENOMEM; + + /* If the group is empty, there is nothing to do */ + if (!zd->nr_zones[type]) + return 0; + + file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); + if (!file_name) + return -ENOMEM; + + if (type == ZONEFS_ZTYPE_CNV) + zgroup_name = "cnv"; + else + zgroup_name = "seq"; + + dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); + if (!dir) + goto free; + + /* + * The first zone contains the super block: skip it. + */ + end = zd->zones + blkdev_nr_zones(sb->s_bdev->bd_disk); + for (zone = &zd->zones[1]; zone < end; zone = next) { + + next = zone + 1; + if (zonefs_zone_type(zone) != type) + continue; + + /* + * For conventional zones, contiguous zones can be aggregated + * together to form larger files. Note that this overwrites the + * length of the first zone of the set of contiguous zones + * aggregated together. If one offline or read-only zone is + * found, assume that all zones aggregated have the same + * condition. + */ + if (type == ZONEFS_ZTYPE_CNV && + (sbi->s_features & ZONEFS_F_AGGRCNV)) { + for (; next < end; next++) { + if (zonefs_zone_type(next) != type) + break; + zone->len += next->len; + if (next->cond == BLK_ZONE_COND_READONLY && + zone->cond != BLK_ZONE_COND_OFFLINE) + zone->cond = BLK_ZONE_COND_READONLY; + else if (next->cond == BLK_ZONE_COND_OFFLINE) + zone->cond = BLK_ZONE_COND_OFFLINE; + } + } + + /* + * Use the file number within its group as file name. + */ + snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); + if (!zonefs_create_inode(dir, file_name, zone, type)) + goto free; + + n++; + } + + zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", + zgroup_name, n, n > 1 ? "s" : ""); + + sbi->s_nr_files[type] = n; + ret = 0; + +free: + kfree(file_name); + + return ret; +} + +static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct zonefs_zone_data *zd = data; + + /* + * Count the number of usable zones: the first zone at index 0 contains + * the super block and is ignored. + */ + switch (zone->type) { + case BLK_ZONE_TYPE_CONVENTIONAL: + zone->wp = zone->start + zone->len; + if (idx) + zd->nr_zones[ZONEFS_ZTYPE_CNV]++; + break; + case BLK_ZONE_TYPE_SEQWRITE_REQ: + case BLK_ZONE_TYPE_SEQWRITE_PREF: + if (idx) + zd->nr_zones[ZONEFS_ZTYPE_SEQ]++; + break; + default: + zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", + zone->type); + return -EIO; + } + + memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone)); + + return 0; +} + +static int zonefs_get_zone_info(struct zonefs_zone_data *zd) +{ + struct block_device *bdev = zd->sb->s_bdev; + int ret; + + zd->zones = kvcalloc(blkdev_nr_zones(bdev->bd_disk), + sizeof(struct blk_zone), GFP_KERNEL); + if (!zd->zones) + return -ENOMEM; + + /* Get zones information from the device */ + ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, + zonefs_get_zone_info_cb, zd); + if (ret < 0) { + zonefs_err(zd->sb, "Zone report failed %d\n", ret); + return ret; + } + + if (ret != blkdev_nr_zones(bdev->bd_disk)) { + zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", + ret, blkdev_nr_zones(bdev->bd_disk)); + return -EIO; + } + + return 0; +} + +static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd) +{ + kvfree(zd->zones); +} + +/* + * Read super block information from the device. + */ +static int zonefs_read_super(struct super_block *sb) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct zonefs_super *super; + u32 crc, stored_crc; + struct page *page; + struct bio_vec bio_vec; + struct bio bio; + int ret; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + bio_init(&bio, &bio_vec, 1); + bio.bi_iter.bi_sector = 0; + bio.bi_opf = REQ_OP_READ; + bio_set_dev(&bio, sb->s_bdev); + bio_add_page(&bio, page, PAGE_SIZE, 0); + + ret = submit_bio_wait(&bio); + if (ret) + goto free_page; + + super = kmap(page); + + ret = -EINVAL; + if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC) + goto unmap; + + stored_crc = le32_to_cpu(super->s_crc); + super->s_crc = 0; + crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super)); + if (crc != stored_crc) { + zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)", + crc, stored_crc); + goto unmap; + } + + sbi->s_features = le64_to_cpu(super->s_features); + if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) { + zonefs_err(sb, "Unknown features set 0x%llx\n", + sbi->s_features); + goto unmap; + } + + if (sbi->s_features & ZONEFS_F_UID) { + sbi->s_uid = make_kuid(current_user_ns(), + le32_to_cpu(super->s_uid)); + if (!uid_valid(sbi->s_uid)) { + zonefs_err(sb, "Invalid UID feature\n"); + goto unmap; + } + } + + if (sbi->s_features & ZONEFS_F_GID) { + sbi->s_gid = make_kgid(current_user_ns(), + le32_to_cpu(super->s_gid)); + if (!gid_valid(sbi->s_gid)) { + zonefs_err(sb, "Invalid GID feature\n"); + goto unmap; + } + } + + if (sbi->s_features & ZONEFS_F_PERM) + sbi->s_perm = le32_to_cpu(super->s_perm); + + if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) { + zonefs_err(sb, "Reserved area is being used\n"); + goto unmap; + } + + uuid_copy(&sbi->s_uuid, (uuid_t *)super->s_uuid); + ret = 0; + +unmap: + kunmap(page); +free_page: + __free_page(page); + + return ret; +} + +/* + * Check that the device is zoned. If it is, get the list of zones and create + * sub-directories and files according to the device zone configuration and + * format options. + */ +static int zonefs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct zonefs_zone_data zd; + struct zonefs_sb_info *sbi; + struct inode *inode; + enum zonefs_ztype t; + int ret; + + if (!bdev_is_zoned(sb->s_bdev)) { + zonefs_err(sb, "Not a zoned block device\n"); + return -EINVAL; + } + + /* + * Initialize super block information: the maximum file size is updated + * when the zone files are created so that the format option + * ZONEFS_F_AGGRCNV which increases the maximum file size of a file + * beyond the zone size is taken into account. + */ + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + spin_lock_init(&sbi->s_lock); + sb->s_fs_info = sbi; + sb->s_magic = ZONEFS_MAGIC; + sb->s_maxbytes = 0; + sb->s_op = &zonefs_sops; + sb->s_time_gran = 1; + + /* + * The block size is set to the device physical sector size to ensure + * that write operations on 512e devices (512B logical block and 4KB + * physical block) are always aligned to the device physical blocks, + * as mandated by the ZBC/ZAC specifications. + */ + sb_set_blocksize(sb, bdev_physical_block_size(sb->s_bdev)); + sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev)); + sbi->s_uid = GLOBAL_ROOT_UID; + sbi->s_gid = GLOBAL_ROOT_GID; + sbi->s_perm = 0640; + sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; + + ret = zonefs_read_super(sb); + if (ret) + return ret; + + ret = zonefs_parse_options(sb, data); + if (ret) + return ret; + + memset(&zd, 0, sizeof(struct zonefs_zone_data)); + zd.sb = sb; + ret = zonefs_get_zone_info(&zd); + if (ret) + goto cleanup; + + zonefs_info(sb, "Mounting %u zones", + blkdev_nr_zones(sb->s_bdev->bd_disk)); + + /* Create root directory inode */ + ret = -ENOMEM; + inode = new_inode(sb); + if (!inode) + goto cleanup; + + inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk); + inode->i_mode = S_IFDIR | 0555; + inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode); + inode->i_op = &zonefs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + set_nlink(inode, 2); + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + goto cleanup; + + /* Create and populate files in zone groups directories */ + for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { + ret = zonefs_create_zgroup(&zd, t); + if (ret) + break; + } + +cleanup: + zonefs_cleanup_zone_info(&zd); + + return ret; +} + +static struct dentry *zonefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super); +} + +static void zonefs_kill_super(struct super_block *sb) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + + if (sb->s_root) + d_genocide(sb->s_root); + kill_block_super(sb); + kfree(sbi); +} + +/* + * File system definition and registration. + */ +static struct file_system_type zonefs_type = { + .owner = THIS_MODULE, + .name = "zonefs", + .mount = zonefs_mount, + .kill_sb = zonefs_kill_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init zonefs_init_inodecache(void) +{ + zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache", + sizeof(struct zonefs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), + NULL); + if (zonefs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void zonefs_destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy the inode cache. + */ + rcu_barrier(); + kmem_cache_destroy(zonefs_inode_cachep); +} + +static int __init zonefs_init(void) +{ + int ret; + + BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); + + ret = zonefs_init_inodecache(); + if (ret) + return ret; + + ret = register_filesystem(&zonefs_type); + if (ret) { + zonefs_destroy_inodecache(); + return ret; + } + + return 0; +} + +static void __exit zonefs_exit(void) +{ + zonefs_destroy_inodecache(); + unregister_filesystem(&zonefs_type); +} + +MODULE_AUTHOR("Damien Le Moal"); +MODULE_DESCRIPTION("Zone file system for zoned block devices"); +MODULE_LICENSE("GPL"); +module_init(zonefs_init); +module_exit(zonefs_exit); diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h new file mode 100644 index 000000000000..ad17fef7ce91 --- /dev/null +++ b/fs/zonefs/zonefs.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Simple zone file system for zoned block devices. + * + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + */ +#ifndef __ZONEFS_H__ +#define __ZONEFS_H__ + +#include <linux/fs.h> +#include <linux/magic.h> +#include <linux/uuid.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> + +/* + * Maximum length of file names: this only needs to be large enough to fit + * the zone group directory names and a decimal zone number for file names. + * 16 characters is plenty. + */ +#define ZONEFS_NAME_MAX 16 + +/* + * Zone types: ZONEFS_ZTYPE_SEQ is used for all sequential zone types + * defined in linux/blkzoned.h, that is, BLK_ZONE_TYPE_SEQWRITE_REQ and + * BLK_ZONE_TYPE_SEQWRITE_PREF. + */ +enum zonefs_ztype { + ZONEFS_ZTYPE_CNV, + ZONEFS_ZTYPE_SEQ, + ZONEFS_ZTYPE_MAX, +}; + +static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) +{ + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return ZONEFS_ZTYPE_CNV; + return ZONEFS_ZTYPE_SEQ; +} + +/* + * In-memory inode data. + */ +struct zonefs_inode_info { + struct inode i_vnode; + + /* File zone type */ + enum zonefs_ztype i_ztype; + + /* File zone start sector (512B unit) */ + sector_t i_zsector; + + /* File zone write pointer position (sequential zones only) */ + loff_t i_wpoffset; + + /* File maximum size */ + loff_t i_max_size; + + /* + * To serialise fully against both syscall and mmap based IO and + * sequential file truncation, two locks are used. For serializing + * zonefs_seq_file_truncate() against zonefs_iomap_begin(), that is, + * file truncate operations against block mapping, i_truncate_mutex is + * used. i_truncate_mutex also protects against concurrent accesses + * and changes to the inode private data, and in particular changes to + * a sequential file size on completion of direct IO writes. + * Serialization of mmap read IOs with truncate and syscall IO + * operations is done with i_mmap_sem in addition to i_truncate_mutex. + * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first, + * i_truncate_mutex second). + */ + struct mutex i_truncate_mutex; + struct rw_semaphore i_mmap_sem; +}; + +static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode) +{ + return container_of(inode, struct zonefs_inode_info, i_vnode); +} + +/* + * On-disk super block (block 0). + */ +#define ZONEFS_LABEL_LEN 64 +#define ZONEFS_UUID_SIZE 16 +#define ZONEFS_SUPER_SIZE 4096 + +struct zonefs_super { + + /* Magic number */ + __le32 s_magic; + + /* Checksum */ + __le32 s_crc; + + /* Volume label */ + char s_label[ZONEFS_LABEL_LEN]; + + /* 128-bit uuid */ + __u8 s_uuid[ZONEFS_UUID_SIZE]; + + /* Features */ + __le64 s_features; + + /* UID/GID to use for files */ + __le32 s_uid; + __le32 s_gid; + + /* File permissions */ + __le32 s_perm; + + /* Padding to ZONEFS_SUPER_SIZE bytes */ + __u8 s_reserved[3988]; + +} __packed; + +/* + * Feature flags: specified in the s_features field of the on-disk super + * block struct zonefs_super and in-memory in the s_feartures field of + * struct zonefs_sb_info. + */ +enum zonefs_features { + /* + * Aggregate contiguous conventional zones into a single file. + */ + ZONEFS_F_AGGRCNV = 1ULL << 0, + /* + * Use super block specified UID for files instead of default 0. + */ + ZONEFS_F_UID = 1ULL << 1, + /* + * Use super block specified GID for files instead of default 0. + */ + ZONEFS_F_GID = 1ULL << 2, + /* + * Use super block specified file permissions instead of default 640. + */ + ZONEFS_F_PERM = 1ULL << 3, +}; + +#define ZONEFS_F_DEFINED_FEATURES \ + (ZONEFS_F_AGGRCNV | ZONEFS_F_UID | ZONEFS_F_GID | ZONEFS_F_PERM) + +/* + * Mount options for zone write pointer error handling. + */ +#define ZONEFS_MNTOPT_ERRORS_RO (1 << 0) /* Make zone file readonly */ +#define ZONEFS_MNTOPT_ERRORS_ZRO (1 << 1) /* Make zone file offline */ +#define ZONEFS_MNTOPT_ERRORS_ZOL (1 << 2) /* Make zone file offline */ +#define ZONEFS_MNTOPT_ERRORS_REPAIR (1 << 3) /* Remount read-only */ +#define ZONEFS_MNTOPT_ERRORS_MASK \ + (ZONEFS_MNTOPT_ERRORS_RO | ZONEFS_MNTOPT_ERRORS_ZRO | \ + ZONEFS_MNTOPT_ERRORS_ZOL | ZONEFS_MNTOPT_ERRORS_REPAIR) + +/* + * In-memory Super block information. + */ +struct zonefs_sb_info { + + unsigned long s_mount_opts; + + spinlock_t s_lock; + + unsigned long long s_features; + kuid_t s_uid; + kgid_t s_gid; + umode_t s_perm; + uuid_t s_uuid; + unsigned int s_zone_sectors_shift; + + unsigned int s_nr_files[ZONEFS_ZTYPE_MAX]; + + loff_t s_blocks; + loff_t s_used_blocks; +}; + +static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +#define zonefs_info(sb, format, args...) \ + pr_info("zonefs (%s): " format, sb->s_id, ## args) +#define zonefs_err(sb, format, args...) \ + pr_err("zonefs (%s) ERROR: " format, sb->s_id, ## args) +#define zonefs_warn(sb, format, args...) \ + pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args) + +#endif diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 00994b1b8681..5867777bb7d0 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -752,6 +752,7 @@ ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u3 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) +ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_gpe_status_set(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_get_gpe_device(u32 gpe_index, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 018dce868de6..0fb561d1b524 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -201,9 +201,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) return cpumask_weight(policy->cpus) > 1; } -/* /sys/devices/system/cpu/cpufreq: entry point for global variables */ -extern struct kobject *cpufreq_global_kobject; - #ifdef CONFIG_CPU_FREQ unsigned int cpufreq_get(unsigned int cpu); unsigned int cpufreq_quick_get(unsigned int cpu); diff --git a/include/linux/dax.h b/include/linux/dax.h index 9bd8528bd305..328c2dbb4409 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -129,11 +129,6 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev, sectors); } -static inline struct dax_device *fs_dax_get_by_host(const char *host) -{ - return dax_get_by_host(host); -} - static inline void fs_put_dax(struct dax_device *dax_dev) { put_dax(dax_dev); @@ -141,7 +136,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev) struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc); + struct dax_device *dax_dev, struct writeback_control *wbc); struct page *dax_layout_busy_page(struct address_space *mapping); dax_entry_t dax_lock_page(struct page *page); @@ -160,11 +155,6 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev, return false; } -static inline struct dax_device *fs_dax_get_by_host(const char *host) -{ - return NULL; -} - static inline void fs_put_dax(struct dax_device *dax_dev) { } @@ -180,7 +170,7 @@ static inline struct page *dax_layout_busy_page(struct address_space *mapping) } static inline int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc) + struct dax_device *dax_dev, struct writeback_control *wbc) { return -EOPNOTSUPP; } diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index ef1cbb5f454f..93338fd54af8 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -31,6 +31,12 @@ static inline void icmpv6_send(struct sk_buff *skb, } #endif +#if IS_ENABLED(CONFIG_NF_NAT) +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); +#else +#define icmpv6_ndo_send icmpv6_send +#endif + extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); diff --git a/include/linux/irq.h b/include/linux/irq.h index 7853eb9301f2..3ed5a055b5f4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -209,6 +209,8 @@ struct irq_data { * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set * IRQD_CAN_RESERVE - Can use reservation mode + * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change + * required */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -231,6 +233,7 @@ enum { IRQD_SINGLE_TARGET = (1 << 24), IRQD_DEFAULT_TRIGGER_SET = (1 << 25), IRQD_CAN_RESERVE = (1 << 26), + IRQD_MSI_NOMASK_QUIRK = (1 << 27), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -390,6 +393,21 @@ static inline bool irqd_can_reserve(struct irq_data *d) return __irqd_to_state(d) & IRQD_CAN_RESERVE; } +static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; +} + +static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; +} + +static inline bool irqd_msi_nomask_quirk(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index f0b8ca766e7d..83439bfb6c5b 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -652,10 +652,10 @@ struct rdists { struct { void __iomem *rd_base; struct page *pend_page; - struct page *vpe_l1_page; phys_addr_t phys_base; bool lpi_enabled; cpumask_t *vpe_table_mask; + void *vpe_l1_base; } __percpu *rdist; phys_addr_t prop_table_pa; void *prop_table_va; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 698749f42ced..b2d47571ab67 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -207,6 +207,13 @@ enum { IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), /* + * Quirk to handle MSI implementations which do not provide + * masking. Currently known to affect x86, but partially + * handled in core code. + */ + IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6), + + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the * core code. diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ff8c9d527bb4..bfdf41537cf1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -688,7 +688,10 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; u8 nic_rx_multi_path_tirs_fts[0x1]; u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; - u8 reserved_at_3[0x1d]; + u8 reserved_at_3[0x4]; + u8 sw_owner_reformat_supported[0x1]; + u8 reserved_at_8[0x18]; + u8 encap_general_header[0x1]; u8 reserved_at_21[0xa]; u8 log_max_packet_reformat_context[0x5]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a9c6b5c61d27..6c3f7032e8d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -72,6 +72,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ +#define MAX_NEST_DEV 8 + /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -1616,6 +1618,7 @@ enum netdev_priv_flags { * and drivers will need to set them appropriately. * * @mpls_features: Mask of features inheritable by MPLS + * @gso_partial_features: value(s) from NETIF_F_GSO\* * * @ifindex: interface index * @group: The group the device belongs to @@ -1640,8 +1643,11 @@ enum netdev_priv_flags { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @ethtool_ops: Management operations + * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour * discovery handling. Necessary for e.g. 6LoWPAN. + * @xfrmdev_ops: Transformation offload operations + * @tlsdev_ops: Transport Layer Security offload operations * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * @@ -1680,6 +1686,7 @@ enum netdev_priv_flags { * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one + * @name_assign_type: network interface name assignment type * @uc_promisc: Counter that indicates promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that @@ -1702,6 +1709,9 @@ enum netdev_priv_flags { * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering + * @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network + * device struct + * @mpls_ptr: mpls_dev struct pointer * * @dev_addr: Hw address (before bcast, * because most packets are unicast) @@ -1710,6 +1720,8 @@ enum netdev_priv_flags { * @num_rx_queues: Number of RX queues * allocated at register_netdev() time * @real_num_rx_queues: Number of RX queues currently active in device + * @xdp_prog: XDP sockets filter program pointer + * @gro_flush_timeout: timeout for GRO layer in NAPI * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one @@ -1731,10 +1743,14 @@ enum netdev_priv_flags { * @qdisc: Root qdisc from userspace point of view * @tx_queue_len: Max frames per queue allowed * @tx_global_lock: XXX: need comments on this one + * @xdp_bulkq: XDP device bulk queue + * @xps_cpus_map: all CPUs map for XPS device + * @xps_rxqs_map: all RXQs map for XPS device * * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for * egress processing + * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -3548,7 +3564,7 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, } /** - * netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p + * netif_attrmask_next_and - get the next CPU/Rx queue in \*src1p & \*src2p * @n: CPU/Rx queue index * @src1p: the first CPUs/Rx queues mask pointer * @src2p: the second CPUs/Rx queues mask pointer @@ -4375,11 +4391,8 @@ void *netdev_lower_get_next(struct net_device *dev, ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) -struct net_device *netdev_all_lower_get_next(struct net_device *dev, +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter); -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter); - int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, void *data), diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cf65763af0cb..547773f5894e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1544,4 +1544,8 @@ int perf_event_exit_cpu(unsigned int cpu); #define perf_event_exit_cpu NULL #endif +extern void __weak arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, + u64 now); + #endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index d5765039652a..ae58fad7f1e0 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -29,7 +29,8 @@ struct pipe_buffer { /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing - * @wait: reader/writer wait point in case of empty/full pipe + * @rd_wait: reader wait point in case of empty pipe + * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption * @max_usage: The maximum number of slots that may be used in the ring diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index e5b752027a03..9670b54b484a 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -145,6 +145,13 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, } } +/* after that hlist_nulls_del will work */ +static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) +{ + n->pprev = &n->next; + n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL); +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca8806b69388..5b50278c4bc8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -611,9 +611,15 @@ typedef unsigned char *sk_buff_data_t; * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left + * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point + * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp + * @list: queue head * @sk: Socket we are owned by + * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in + * fragmentation management * @dev: Device we arrived on/are leaving by + * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) * @sp: the security path, used for xfrm @@ -632,6 +638,9 @@ typedef unsigned char *sk_buff_data_t; * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @inner_protocol_type: whether the inner protocol is + * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO + * @remcsum_offload: remote checksum offload is enabled * @offload_fwd_mark: Packet was L2-forwarded in hardware * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device @@ -650,6 +659,8 @@ typedef unsigned char *sk_buff_data_t; * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices + * @head_frag: skb was allocated from page fragments, + * not allocated by kmalloc() or vmalloc(). * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) @@ -660,15 +671,28 @@ typedef unsigned char *sk_buff_data_t; * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @encapsulation: indicates the inner headers in the skbuff are valid + * @encap_hdr_csum: software checksum is needed + * @csum_valid: checksum is already valid * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL + * @csum_complete_sw: checksum was completed by software + * @csum_level: indicates the number of consecutive checksums found in + * the packet minus one that have been verified as + * CHECKSUM_UNNECESSARY (max 3) * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @napi_id: id of the NAPI struct this skb came from + * @sender_cpu: (aka @napi_id) source CPU in XPS * @secmark: security marking * @mark: Generic packet mark + * @reserved_tailroom: (aka @mark) number of bytes of free space available + * at the tail of an sk_buff + * @vlan_present: VLAN tag is present * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) + * @inner_ipproto: (aka @inner_protocol) stores ipproto when + * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO; * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) * @inner_mac_header: Link layer header (encapsulation) @@ -750,7 +774,9 @@ struct sk_buff { #endif #define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + /* private: */ __u8 __cloned_offset[0]; + /* public: */ __u8 cloned:1, nohdr:1, fclone:2, @@ -775,7 +801,9 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) + /* private: */ __u8 __pkt_type_offset[0]; + /* public: */ __u8 pkt_type:3; __u8 ignore_df:1; __u8 nf_trace:1; @@ -798,7 +826,9 @@ struct sk_buff { #define PKT_VLAN_PRESENT_BIT 0 #endif #define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) + /* private: */ __u8 __pkt_vlan_present_offset[0]; + /* public: */ __u8 vlan_present:1; __u8 csum_complete_sw:1; __u8 csum_level:2; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 4a230c2f1c31..2b2055b035ee 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -191,7 +191,7 @@ struct platform_s2idle_ops { int (*begin)(void); int (*prepare)(void); int (*prepare_late)(void); - void (*wake)(void); + bool (*wake)(void); void (*restore_early)(void); void (*restore)(void); void (*end)(void); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index af2c85d3a1dd..6c7a10a6d71e 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -440,7 +440,7 @@ struct synth_event_trace_state { struct synth_event *event; unsigned int cur_field; unsigned int n_u64; - bool enabled; + bool disabled; bool add_next; bool add_name; }; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d93017a7ce5c..628383915827 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/in6.h> #include <linux/siphash.h> +#include <linux/string.h> #include <uapi/linux/if_ether.h> struct sk_buff; @@ -33,7 +34,6 @@ enum flow_dissect_ret { /** * struct flow_dissector_key_basic: - * @thoff: Transport header offset * @n_proto: Network header protocol (eg. IPv4/IPv6) * @ip_proto: Transport header protocol (eg. TCP/UDP) */ @@ -349,4 +349,12 @@ struct bpf_flow_dissector { void *data_end; }; +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif diff --git a/include/net/icmp.h b/include/net/icmp.h index 5d4bfdba9adf..9ac2d2672a93 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -43,6 +43,12 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); } +#if IS_ENABLED(CONFIG_NF_NAT) +void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); +#else +#define icmp_ndo_send icmp_send +#endif + int icmp_rcv(struct sk_buff *skb); int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index aa145808e57a..77e6b5a83b06 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1004,12 +1004,11 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) struct ieee80211_tx_info { /* common information */ u32 flags; - u8 band; - - u8 hw_queue; - - u16 ack_frame_id:6; - u16 tx_time_est:10; + u32 band:3, + ack_frame_id:13, + hw_queue:4, + tx_time_est:10; + /* 2 free bits */ union { struct { diff --git a/include/net/sock.h b/include/net/sock.h index 02162b0378f7..328564525526 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -117,19 +117,26 @@ typedef __u64 __bitwise __addrpair; * struct sock_common - minimal network layer representation of sockets * @skc_daddr: Foreign IPv4 addr * @skc_rcv_saddr: Bound local IPv4 addr + * @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr * @skc_hash: hash value used with various protocol lookup tables * @skc_u16hashes: two u16 hash values used by UDP lookup tables * @skc_dport: placeholder for inet_dport/tw_dport * @skc_num: placeholder for inet_num/tw_num + * @skc_portpair: __u32 union of @skc_dport & @skc_num * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting * @skc_reuseport: %SO_REUSEPORT setting + * @skc_ipv6only: socket is IPV6 only + * @skc_net_refcnt: socket is using net ref counting * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket + * @skc_v6_daddr: IPV6 destination address + * @skc_v6_rcv_saddr: IPV6 source address + * @skc_cookie: socket's cookie value * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_tx_queue_mapping: tx queue number for this connection @@ -137,7 +144,15 @@ typedef __u64 __bitwise __addrpair; * @skc_flags: place holder for sk_flags * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings + * @skc_listener: connection request listener socket (aka rsk_listener) + * [union with @skc_flags] + * @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row + * [union with @skc_flags] * @skc_incoming_cpu: record/match cpu processing incoming packets + * @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled) + * [union with @skc_incoming_cpu] + * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number + * [union with @skc_incoming_cpu] * @skc_refcnt: reference count * * This is the minimal network layer representation of sockets, the header @@ -245,6 +260,7 @@ struct bpf_sk_storage; * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy + * @sk_rx_skb_cache: cache copy of recently accessed RX skb * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_tsq_flags: TCP Small Queues flags @@ -265,6 +281,8 @@ struct bpf_sk_storage; * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) + * @sk_route_forced_caps: static, forced route capabilities + * (set in tcp_init_sock()) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments @@ -303,6 +321,8 @@ struct bpf_sk_storage; * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit + * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head] + * @sk_tx_skb_cache: cache copy of recently accessed TX skb * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_cgrp_data: cgroup data for this cgroup @@ -313,11 +333,14 @@ struct bpf_sk_storage; * @sk_write_space: callback to indicate there is bf sending space available * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) * @sk_backlog_rcv: callback to process the backlog + * @sk_validate_xmit_skb: ptr to an optional validate function * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 * @sk_reuseport_cb: reuseport group container + * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage * @sk_rcu: used during RCU grace period * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME + * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags */ struct sock { @@ -393,7 +416,9 @@ struct sock { struct sk_filter __rcu *sk_filter; union { struct socket_wq __rcu *sk_wq; + /* private: */ struct socket_wq *sk_wq_raw; + /* public: */ }; #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; @@ -2017,7 +2042,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro * sk_wmem_alloc_get - returns write allocations * @sk: socket * - * Returns sk_wmem_alloc minus initial offset of one + * Return: sk_wmem_alloc minus initial offset of one */ static inline int sk_wmem_alloc_get(const struct sock *sk) { @@ -2028,7 +2053,7 @@ static inline int sk_wmem_alloc_get(const struct sock *sk) * sk_rmem_alloc_get - returns read allocations * @sk: socket * - * Returns sk_rmem_alloc + * Return: sk_rmem_alloc */ static inline int sk_rmem_alloc_get(const struct sock *sk) { @@ -2039,7 +2064,7 @@ static inline int sk_rmem_alloc_get(const struct sock *sk) * sk_has_allocations - check if allocations are outstanding * @sk: socket * - * Returns true if socket has write or read allocations + * Return: true if socket has write or read allocations */ static inline bool sk_has_allocations(const struct sock *sk) { @@ -2050,7 +2075,7 @@ static inline bool sk_has_allocations(const struct sock *sk) * skwq_has_sleeper - check if there are any waiting processes * @wq: struct socket_wq * - * Returns true if socket_wq has waiting processes + * Return: true if socket_wq has waiting processes * * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. @@ -2238,6 +2263,9 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest * inside other socket operations and end up recursing into sk_page_frag() * while it's already in use. + * + * Return: a per task page_frag if context allows that, + * otherwise a per socket one. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { @@ -2432,6 +2460,7 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from @@ -2440,7 +2469,6 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) * This routine must be called with interrupts disabled or with the socket * locked so that the sk_buff queue operation is ok. */ -DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 3ac436376d79..d78064007b17 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -87,6 +87,7 @@ #define NSFS_MAGIC 0x6e736673 #define BPF_FS_MAGIC 0xcafe4a11 #define AAFS_MAGIC 0x5a3c69f0 +#define ZONEFS_MAGIC 0x5a4f4653 /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC 0x15013346 diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 336014bf8868..b6f0bb1dc799 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -97,6 +97,15 @@ enum ip_conntrack_status { IPS_UNTRACKED_BIT = 12, IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), +#ifdef __KERNEL__ + /* Re-purposed for in-kernel use: + * Tags a conntrack entry that clashed with an existing entry + * on insert. + */ + IPS_NAT_CLASH_BIT = IPS_UNTRACKED_BIT, + IPS_NAT_CLASH = IPS_UNTRACKED, +#endif + /* Conntrack got a helper explicitly attached via CT target. */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), @@ -110,7 +119,8 @@ enum ip_conntrack_status { */ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | - IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), + IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED | + IPS_OFFLOAD), __IPS_MAX_BIT = 15, }; diff --git a/init/Kconfig b/init/Kconfig index cfee56c151f1..452bc1835cd4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1227,7 +1227,6 @@ endif config BOOT_CONFIG bool "Boot config support" depends on BLK_DEV_INITRD - select LIBXBC default y help Extra boot config allows system admin to pass a config file as diff --git a/init/main.c b/init/main.c index cc0ee4873419..f95b014a5479 100644 --- a/init/main.c +++ b/init/main.c @@ -142,6 +142,15 @@ static char *extra_command_line; /* Extra init arguments */ static char *extra_init_args; +#ifdef CONFIG_BOOT_CONFIG +/* Is bootconfig on command line? */ +static bool bootconfig_found; +static bool initargs_found; +#else +# define bootconfig_found false +# define initargs_found false +#endif + static char *execute_command; static char *ramdisk_execute_command; @@ -336,17 +345,30 @@ u32 boot_config_checksum(unsigned char *p, u32 size) return ret; } +static int __init bootconfig_params(char *param, char *val, + const char *unused, void *arg) +{ + if (strcmp(param, "bootconfig") == 0) { + bootconfig_found = true; + } else if (strcmp(param, "--") == 0) { + initargs_found = true; + } + return 0; +} + static void __init setup_boot_config(const char *cmdline) { + static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; u32 size, csum; char *data, *copy; - const char *p; u32 *hdr; int ret; - p = strstr(cmdline, "bootconfig"); - if (!p || (p != cmdline && !isspace(*(p-1))) || - (p[10] && !isspace(p[10]))) + strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); + parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL, + bootconfig_params); + + if (!bootconfig_found) return; if (!initrd_end) @@ -562,11 +584,12 @@ static void __init setup_command_line(char *command_line) * to init. */ len = strlen(saved_command_line); - if (!strstr(boot_command_line, " -- ")) { + if (initargs_found) { + saved_command_line[len++] = ' '; + } else { strcpy(saved_command_line + len, " -- "); len += 4; - } else - saved_command_line[len++] = ' '; + } strcpy(saved_command_line + len, extra_init_args); } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index db552b9f9377..75f687301bbf 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5927,11 +5927,14 @@ void cgroup_post_fork(struct task_struct *child) spin_lock_irq(&css_set_lock); - WARN_ON_ONCE(!list_empty(&child->cg_list)); - cset = task_css_set(current); /* current is @child's parent */ - get_css_set(cset); - cset->nr_tasks++; - css_set_move_task(child, NULL, cset, false); + /* init tasks are special, only link regular threads */ + if (likely(child->pid)) { + WARN_ON_ONCE(!list_empty(&child->cg_list)); + cset = task_css_set(current); /* current is @child's parent */ + get_css_set(cset); + cset->nr_tasks++; + css_set_move_task(child, NULL, cset, false); + } /* * If the cgroup has to be frozen, the new task has too. Let's set diff --git a/kernel/events/core.c b/kernel/events/core.c index 17f9a4a909eb..e453589da97c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -951,9 +951,9 @@ list_update_cgroup_event(struct perf_event *event, /* * Because cgroup events are always per-cpu events, - * this will always be called from the right CPU. + * @ctx == &cpuctx->ctx. */ - cpuctx = __get_cpu_context(ctx); + cpuctx = container_of(ctx, struct perf_cpu_context, ctx); /* * Since setting cpuctx->cgrp is conditional on the current @cgrp @@ -979,7 +979,8 @@ list_update_cgroup_event(struct perf_event *event, cpuctx_entry = &cpuctx->cgrp_cpuctx_entry; if (add) - list_add(cpuctx_entry, this_cpu_ptr(&cgrp_cpuctx_list)); + list_add(cpuctx_entry, + per_cpu_ptr(&cgrp_cpuctx_list, event->cpu)); else list_del(cpuctx_entry); } @@ -5916,7 +5917,15 @@ accounting: */ user_lock_limit *= num_online_cpus(); - user_locked = atomic_long_read(&user->locked_vm) + user_extra; + user_locked = atomic_long_read(&user->locked_vm); + + /* + * sysctl_perf_event_mlock may have changed, so that + * user->locked_vm > user_lock_limit + */ + if (user_locked > user_lock_limit) + user_locked = user_lock_limit; + user_locked += user_extra; if (user_locked > user_lock_limit) { /* diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index c1eccd4f6520..a949bd39e343 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -114,6 +114,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), BIT_MASK_DESCR(IRQD_CAN_RESERVE), + BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 818b2802d3e7..3089a60ea8f9 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -731,6 +731,13 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) * * Wakeup mode lets this IRQ wake the system from sleep * states like "suspend to RAM". + * + * Note: irq enable/disable state is completely orthogonal + * to the enable/disable state of irq wake. An irq can be + * disabled with disable_irq() and still wake the system as + * long as the irq has wake enabled. If this does not hold, + * then the underlying irq chip and the related driver need + * to be investigated. */ int irq_set_irq_wake(unsigned int irq, unsigned int on) { diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index ad26fbcfbfc8..eb95f6106a1e 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -453,8 +453,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, continue; irq_data = irq_domain_get_irq_data(domain, desc->irq); - if (!can_reserve) + if (!can_reserve) { irqd_clr_can_reserve(irq_data); + if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) + irqd_set_msi_nomask_quirk(irq_data); + } ret = irq_domain_activate_irq(irq_data, can_reserve); if (ret) goto cleanup; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index d812b90f4c86..a9b3f660dee7 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -44,7 +44,7 @@ __attribute__((weak, section(".rodata"))); extern const unsigned long kallsyms_relative_base __attribute__((weak, section(".rodata"))); -extern const u8 kallsyms_token_table[] __weak; +extern const char kallsyms_token_table[] __weak; extern const u16 kallsyms_token_index[] __weak; extern const unsigned int kallsyms_markers[] __weak; @@ -58,7 +58,8 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result, size_t maxlen) { int len, skipped_first = 0; - const u8 *tptr, *data; + const char *tptr; + const u8 *data; /* Get the compressed symbol length from the first symbol byte. */ data = &kallsyms_names[off]; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 2c47280fbfc7..8b1bb5ee7e5d 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -131,11 +131,12 @@ static void s2idle_loop(void) * to avoid them upfront. */ for (;;) { - if (s2idle_ops && s2idle_ops->wake) - s2idle_ops->wake(); - - if (pm_wakeup_pending()) + if (s2idle_ops && s2idle_ops->wake) { + if (s2idle_ops->wake()) + break; + } else if (pm_wakeup_pending()) { break; + } pm_wakeup_clear(false); diff --git a/kernel/smp.c b/kernel/smp.c index 3b7bedc97af3..d0ada39eb4d4 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -435,7 +435,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, /* Fastpath: do that cpu by itself. */ if (next_cpu >= nr_cpu_ids) { - if (!cond_func || (cond_func && cond_func(cpu, info))) + if (!cond_func || cond_func(cpu, info)) smp_call_function_single(cpu, func, info, wait); return; } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index fff5f64981c6..428beb69426a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused) next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); + + /* + * Arm timer if not already pending: could race with concurrent + * pair clocksource_stop_watchdog() clocksource_start_watchdog(). + */ + if (!timer_pending(&watchdog_timer)) { + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); + } out: spin_unlock(&watchdog_lock); } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index e7ce7cdac62f..483b3fd1094f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1798,6 +1798,60 @@ void synth_event_cmd_init(struct dynevent_cmd *cmd, char *buf, int maxlen) } EXPORT_SYMBOL_GPL(synth_event_cmd_init); +static inline int +__synth_event_trace_start(struct trace_event_file *file, + struct synth_event_trace_state *trace_state) +{ + int entry_size, fields_size = 0; + int ret = 0; + + /* + * Normal event tracing doesn't get called at all unless the + * ENABLED bit is set (which attaches the probe thus allowing + * this code to be called, etc). Because this is called + * directly by the user, we don't have that but we still need + * to honor not logging when disabled. For the the iterated + * trace case, we save the enabed state upon start and just + * ignore the following data calls. + */ + if (!(file->flags & EVENT_FILE_FL_ENABLED) || + trace_trigger_soft_disabled(file)) { + trace_state->disabled = true; + ret = -ENOENT; + goto out; + } + + trace_state->event = file->event_call->data; + + fields_size = trace_state->event->n_u64 * sizeof(u64); + + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + trace_state->buffer = file->tr->array_buffer.buffer; + ring_buffer_nest_start(trace_state->buffer); + + entry_size = sizeof(*trace_state->entry) + fields_size; + trace_state->entry = trace_event_buffer_reserve(&trace_state->fbuffer, + file, + entry_size); + if (!trace_state->entry) { + ring_buffer_nest_end(trace_state->buffer); + ret = -EINVAL; + } +out: + return ret; +} + +static inline void +__synth_event_trace_end(struct synth_event_trace_state *trace_state) +{ + trace_event_buffer_commit(&trace_state->fbuffer); + + ring_buffer_nest_end(trace_state->buffer); +} + /** * synth_event_trace - Trace a synthetic event * @file: The trace_event_file representing the synthetic event @@ -1819,71 +1873,38 @@ EXPORT_SYMBOL_GPL(synth_event_cmd_init); */ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) { - struct trace_event_buffer fbuffer; - struct synth_trace_event *entry; - struct trace_buffer *buffer; - struct synth_event *event; + struct synth_event_trace_state state; unsigned int i, n_u64; - int fields_size = 0; va_list args; - int ret = 0; - - /* - * Normal event generation doesn't get called at all unless - * the ENABLED bit is set (which attaches the probe thus - * allowing this code to be called, etc). Because this is - * called directly by the user, we don't have that but we - * still need to honor not logging when disabled. - */ - if (!(file->flags & EVENT_FILE_FL_ENABLED)) - return 0; - - event = file->event_call->data; - - if (n_vals != event->n_fields) - return -EINVAL; - - if (trace_trigger_soft_disabled(file)) - return -EINVAL; - - fields_size = event->n_u64 * sizeof(u64); - - /* - * Avoid ring buffer recursion detection, as this event - * is being performed within another event. - */ - buffer = file->tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + int ret; - entry = trace_event_buffer_reserve(&fbuffer, file, - sizeof(*entry) + fields_size); - if (!entry) { - ret = -EINVAL; - goto out; + ret = __synth_event_trace_start(file, &state); + if (ret) { + if (ret == -ENOENT) + ret = 0; /* just disabled, not really an error */ + return ret; } va_start(args, n_vals); - for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) { u64 val; val = va_arg(args, u64); - if (event->fields[i]->is_string) { + if (state.event->fields[i]->is_string) { char *str_val = (char *)(long)val; - char *str_field = (char *)&entry->fields[n_u64]; + char *str_field = (char *)&state.entry->fields[n_u64]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { - entry->fields[n_u64] = val; + state.entry->fields[n_u64] = val; n_u64++; } } va_end(args); - trace_event_buffer_commit(&fbuffer); -out: - ring_buffer_nest_end(buffer); + __synth_event_trace_end(&state); return ret; } @@ -1910,64 +1931,31 @@ EXPORT_SYMBOL_GPL(synth_event_trace); int synth_event_trace_array(struct trace_event_file *file, u64 *vals, unsigned int n_vals) { - struct trace_event_buffer fbuffer; - struct synth_trace_event *entry; - struct trace_buffer *buffer; - struct synth_event *event; + struct synth_event_trace_state state; unsigned int i, n_u64; - int fields_size = 0; - int ret = 0; - - /* - * Normal event generation doesn't get called at all unless - * the ENABLED bit is set (which attaches the probe thus - * allowing this code to be called, etc). Because this is - * called directly by the user, we don't have that but we - * still need to honor not logging when disabled. - */ - if (!(file->flags & EVENT_FILE_FL_ENABLED)) - return 0; - - event = file->event_call->data; - - if (n_vals != event->n_fields) - return -EINVAL; - - if (trace_trigger_soft_disabled(file)) - return -EINVAL; - - fields_size = event->n_u64 * sizeof(u64); - - /* - * Avoid ring buffer recursion detection, as this event - * is being performed within another event. - */ - buffer = file->tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + int ret; - entry = trace_event_buffer_reserve(&fbuffer, file, - sizeof(*entry) + fields_size); - if (!entry) { - ret = -EINVAL; - goto out; + ret = __synth_event_trace_start(file, &state); + if (ret) { + if (ret == -ENOENT) + ret = 0; /* just disabled, not really an error */ + return ret; } - for (i = 0, n_u64 = 0; i < event->n_fields; i++) { - if (event->fields[i]->is_string) { + for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) { + if (state.event->fields[i]->is_string) { char *str_val = (char *)(long)vals[i]; - char *str_field = (char *)&entry->fields[n_u64]; + char *str_field = (char *)&state.entry->fields[n_u64]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { - entry->fields[n_u64] = vals[i]; + state.entry->fields[n_u64] = vals[i]; n_u64++; } } - trace_event_buffer_commit(&fbuffer); -out: - ring_buffer_nest_end(buffer); + __synth_event_trace_end(&state); return ret; } @@ -2004,58 +1992,17 @@ EXPORT_SYMBOL_GPL(synth_event_trace_array); int synth_event_trace_start(struct trace_event_file *file, struct synth_event_trace_state *trace_state) { - struct synth_trace_event *entry; - int fields_size = 0; - int ret = 0; + int ret; - if (!trace_state) { - ret = -EINVAL; - goto out; - } + if (!trace_state) + return -EINVAL; memset(trace_state, '\0', sizeof(*trace_state)); - /* - * Normal event tracing doesn't get called at all unless the - * ENABLED bit is set (which attaches the probe thus allowing - * this code to be called, etc). Because this is called - * directly by the user, we don't have that but we still need - * to honor not logging when disabled. For the the iterated - * trace case, we save the enabed state upon start and just - * ignore the following data calls. - */ - if (!(file->flags & EVENT_FILE_FL_ENABLED)) { - trace_state->enabled = false; - goto out; - } - - trace_state->enabled = true; - - trace_state->event = file->event_call->data; - - if (trace_trigger_soft_disabled(file)) { - ret = -EINVAL; - goto out; - } + ret = __synth_event_trace_start(file, trace_state); + if (ret == -ENOENT) + ret = 0; /* just disabled, not really an error */ - fields_size = trace_state->event->n_u64 * sizeof(u64); - - /* - * Avoid ring buffer recursion detection, as this event - * is being performed within another event. - */ - trace_state->buffer = file->tr->array_buffer.buffer; - ring_buffer_nest_start(trace_state->buffer); - - entry = trace_event_buffer_reserve(&trace_state->fbuffer, file, - sizeof(*entry) + fields_size); - if (!entry) { - ret = -EINVAL; - goto out; - } - - trace_state->entry = entry; -out: return ret; } EXPORT_SYMBOL_GPL(synth_event_trace_start); @@ -2088,7 +2035,7 @@ static int __synth_event_add_val(const char *field_name, u64 val, trace_state->add_next = true; } - if (!trace_state->enabled) + if (trace_state->disabled) goto out; event = trace_state->event; @@ -2223,9 +2170,7 @@ int synth_event_trace_end(struct synth_event_trace_state *trace_state) if (!trace_state) return -EINVAL; - trace_event_buffer_commit(&trace_state->fbuffer); - - ring_buffer_nest_end(trace_state->buffer); + __synth_event_trace_end(trace_state); return 0; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d8264ebb9581..362cca52f5de 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1012,7 +1012,7 @@ int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...) { struct dynevent_arg arg; va_list args; - int ret; + int ret = 0; if (cmd->type != DYNEVENT_TYPE_KPROBE) return -EINVAL; diff --git a/kernel/up.c b/kernel/up.c index 53144d056252..c6f323dcd45b 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -14,7 +14,8 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, { unsigned long flags; - WARN_ON(cpu != 0); + if (cpu != 0) + return -ENXIO; local_irq_save(flags); func(info); diff --git a/lib/Kconfig b/lib/Kconfig index 0cf875fd627c..bc7e56370129 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -573,9 +573,6 @@ config DIMLIB config LIBFDT bool -config LIBXBC - bool - config OID_REGISTRY tristate help diff --git a/lib/Makefile b/lib/Makefile index 8bb91176c2a1..611872c06926 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -230,7 +230,7 @@ $(foreach file, $(libfdt_files), \ $(eval CFLAGS_$(file) = -I $(srctree)/scripts/dtc/libfdt)) lib-$(CONFIG_LIBFDT) += $(libfdt_files) -lib-$(CONFIG_LIBXBC) += bootconfig.o +lib-$(CONFIG_BOOT_CONFIG) += bootconfig.o obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o @@ -241,8 +241,8 @@ obj-$(CONFIG_ASN1) += asn1_decoder.o obj-$(CONFIG_FONT_SUPPORT) += fonts/ -hostprogs-y := gen_crc32table -hostprogs-y += gen_crc64table +hostprogs := gen_crc32table +hostprogs += gen_crc64table clean-files := crc32table.h clean-files += crc64table.h diff --git a/lib/bootconfig.c b/lib/bootconfig.c index afb2e767e6fe..3ea601a2eba5 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -6,12 +6,13 @@ #define pr_fmt(fmt) "bootconfig: " fmt +#include <linux/bootconfig.h> #include <linux/bug.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/printk.h> -#include <linux/bootconfig.h> #include <linux/string.h> /* @@ -23,7 +24,7 @@ * node (for array). */ -static struct xbc_node xbc_nodes[XBC_NODE_MAX] __initdata; +static struct xbc_node *xbc_nodes __initdata; static int xbc_node_num __initdata; static char *xbc_data __initdata; static size_t xbc_data_size __initdata; @@ -719,7 +720,8 @@ void __init xbc_destroy_all(void) xbc_data = NULL; xbc_data_size = 0; xbc_node_num = 0; - memset(xbc_nodes, 0, sizeof(xbc_nodes)); + memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX); + xbc_nodes = NULL; } /** @@ -748,6 +750,13 @@ int __init xbc_init(char *buf) return -ERANGE; } + xbc_nodes = memblock_alloc(sizeof(struct xbc_node) * XBC_NODE_MAX, + SMP_CACHE_BYTES); + if (!xbc_nodes) { + pr_err("Failed to allocate memory for bootconfig nodes.\n"); + return -ENOMEM; + } + memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX); xbc_data = buf; xbc_data_size = ret + 1; last_parent = NULL; diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 0083b5cc646c..b4c0df6d706d 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -10,7 +10,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o -hostprogs-y += mktables +hostprogs += mktables ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec $(call cc-option,-mabi=altivec) diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index aa945ab5b655..36580301da70 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -3,7 +3,7 @@ # Makefile for the Linux BPFILTER layer. # -hostprogs-y := bpfilter_umh +hostprogs := bpfilter_umh bpfilter_umh-objs := main.o KBUILD_HOSTCFLAGS += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi HOSTCC := $(CC) diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 6856a6d9282b..1f14b8455345 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -63,7 +63,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) { struct net_bridge_port *p; - list_for_each_entry_rcu(p, &br->port_list, list) { + list_for_each_entry_rcu(p, &br->port_list, list, + lockdep_is_held(&br->lock)) { if (p->port_no == port_no) return p; } diff --git a/net/core/dev.c b/net/core/dev.c index a69e8bd7ed74..e10bd680dc03 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -146,7 +146,6 @@ #include "net-sysfs.h" #define MAX_GRO_SKBS 8 -#define MAX_NEST_DEV 8 /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) @@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) name_node = netdev_name_node_lookup(net, name); if (!name_node) return -ENOENT; + /* lookup might have found our primary name or a name belonging + * to another device. + */ + if (name_node == dev->name_node || name_node->dev != dev) + return -EINVAL; + __netdev_name_node_alt_destroy(name_node); return 0; @@ -3657,26 +3662,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) && - qdisc_run_begin(q)) { - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, - &q->state))) { - __qdisc_drop(skb, &to_free); - rc = NET_XMIT_DROP; - goto end_run; - } - qdisc_bstats_cpu_update(q, skb); - - rc = NET_XMIT_SUCCESS; - if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) - __qdisc_run(q); - -end_run: - qdisc_run_end(q); - } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - qdisc_run(q); - } + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + qdisc_run(q); if (unlikely(to_free)) kfree_skb_list(to_free); @@ -4527,14 +4514,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, /* Reinjected packets coming from act_mirred or similar should * not get XDP generic processing. */ - if (skb_cloned(skb) || skb_is_tc_redirected(skb)) + if (skb_is_tc_redirected(skb)) return XDP_PASS; /* XDP packets must be linear and must have sufficient headroom * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also * native XDP provides, thus we need to do it here as well. */ - if (skb_is_nonlinear(skb) || + if (skb_cloned(skb) || skb_is_nonlinear(skb) || skb_headroom(skb) < XDP_PACKET_HEADROOM) { int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); int troom = skb->tail + skb->data_len - skb->end; @@ -7201,8 +7188,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, return 0; } -static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, - struct list_head **iter) +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; @@ -7214,6 +7201,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, return lower->dev; } +EXPORT_SYMBOL(netdev_next_lower_dev_rcu); static u8 __netdev_upper_depth(struct net_device *dev) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3e7e15278c46..bd7eba9066f8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 9b7cbe35df37..10d2b255df5e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -99,8 +99,7 @@ EXPORT_SYMBOL(page_pool_create); static void __page_pool_return_page(struct page_pool *pool, struct page *page); noinline -static struct page *page_pool_refill_alloc_cache(struct page_pool *pool, - bool refill) +static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; struct page *page; @@ -141,8 +140,7 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool, page = NULL; break; } - } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL && - refill); + } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ if (likely(pool->alloc.count > 0)) @@ -155,20 +153,16 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool, /* fast path */ static struct page *__page_pool_get_cached(struct page_pool *pool) { - bool refill = false; struct page *page; - /* Test for safe-context, caller should provide this guarantee */ - if (likely(in_serving_softirq())) { - if (likely(pool->alloc.count)) { - /* Fast-path */ - page = pool->alloc.cache[--pool->alloc.count]; - return page; - } - refill = true; + /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ + if (likely(pool->alloc.count)) { + /* Fast-path */ + page = pool->alloc.cache[--pool->alloc.count]; + } else { + page = page_pool_refill_alloc_cache(pool); } - page = page_pool_refill_alloc_cache(pool, refill); return page; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 09c44bf2e1d2..e1152f4ffe33 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3504,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, if (err) return err; - alt_ifname = nla_data(attr); + alt_ifname = nla_strdup(attr, GFP_KERNEL); + if (!alt_ifname) + return -ENOMEM; + if (cmd == RTM_NEWLINKPROP) { - alt_ifname = kstrdup(alt_ifname, GFP_KERNEL); - if (!alt_ifname) - return -ENOMEM; err = netdev_name_node_alt_create(dev, alt_ifname); - if (err) { - kfree(alt_ifname); - return err; - } + if (!err) + alt_ifname = NULL; } else if (cmd == RTM_DELLINKPROP) { err = netdev_name_node_alt_destroy(dev, alt_ifname); - if (err) - return err; } else { - WARN_ON(1); - return 0; + WARN_ON_ONCE(1); + err = -EINVAL; } - *changed = true; - return 0; + kfree(alt_ifname); + if (!err) + *changed = true; + return err; } static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 864cb9e9622f..1365a556152c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -467,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -527,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (nc->page.pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 466ffa92a474..55b00694cdba 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -31,7 +31,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, __le16 *phdr; u16 hdr; - if (skb_cow_head(skb, 0) < 0) + if (skb_cow_head(skb, AR9331_HDR_LEN) < 0) return NULL; phdr = skb_push(skb, AR9331_HDR_LEN); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index c8a128c9e5e0..70db7c909f74 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -33,7 +33,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_port *dp = dsa_slave_to_port(dev); u16 *phdr, hdr; - if (skb_cow_head(skb, 0) < 0) + if (skb_cow_head(skb, QCA_HDR_LEN) < 0) return NULL; skb_push(skb, QCA_HDR_LEN); diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index fce45dac4205..8977fe1f3946 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -447,7 +447,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, "mask only allowed in compact bitset"); return -EINVAL; } + no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; + if (no_mask) + ethnl_bitmap32_clear(bitmap, 0, nbits, mod); nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { bool old_val, new_val; diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 364ea2cc028e..3ba7f61be107 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -155,7 +155,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, new_node->seq_out[i] = seq_out; spin_lock_bh(&hsr->list_lock); - list_for_each_entry_rcu(node, node_db, mac_list) { + list_for_each_entry_rcu(node, node_db, mac_list, + lockdep_is_held(&hsr->list_lock)) { if (ether_addr_equal(node->macaddress_A, addr)) goto out; if (ether_addr_equal(node->macaddress_B, addr)) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 18068ed42f25..f369e7ce685b 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -748,6 +748,39 @@ out:; } EXPORT_SYMBOL(__icmp_send); +#if IS_ENABLED(CONFIG_NF_NAT) +#include <net/netfilter/nf_conntrack.h> +void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + struct sk_buff *cloned_skb = NULL; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + __be32 orig_ip; + + ct = nf_ct_get(skb_in, &ctinfo); + if (!ct || !(ct->status & IPS_SRC_NAT)) { + icmp_send(skb_in, type, code, info); + return; + } + + if (skb_shared(skb_in)) + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); + + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || + (skb_network_header(skb_in) + sizeof(struct iphdr)) > + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, + skb_network_offset(skb_in) + sizeof(struct iphdr)))) + goto out; + + orig_ip = ip_hdr(skb_in)->saddr; + ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; + icmp_send(skb_in, type, code, info); + ip_hdr(skb_in)->saddr = orig_ip; +out: + consume_skb(cloned_skb); +} +EXPORT_SYMBOL(icmp_ndo_send); +#endif static void icmp_socket_deliver(struct sk_buff *skb, u32 info) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db76b9609299..08a41f1e1cd2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1857,8 +1857,12 @@ int __udp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) { inet_reset_saddr(sk); + if (sk->sk_prot->rehash && + (sk->sk_userlocks & SOCK_BINDPORT_LOCK)) + sk->sk_prot->rehash(sk); + } if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { sk->sk_prot->unhash(sk); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 58fbde244381..72abf892302f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1102,8 +1102,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, found++; break; } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; + fallback_ins = fallback_ins ?: ins; goto next_iter; } @@ -1146,7 +1145,9 @@ next_iter: } if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ + /* No matching route with same ecmp-able-ness found, replace + * first matching route + */ ins = fallback_ins; iter = rcu_dereference_protected(*ins, lockdep_is_held(&rt->fib6_table->tb6_lock)); diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 02045494c24c..e0086758b6ee 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -45,4 +45,38 @@ out: rcu_read_unlock(); } EXPORT_SYMBOL(icmpv6_send); + +#if IS_ENABLED(CONFIG_NF_NAT) +#include <net/netfilter/nf_conntrack.h> +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) +{ + struct sk_buff *cloned_skb = NULL; + enum ip_conntrack_info ctinfo; + struct in6_addr orig_ip; + struct nf_conn *ct; + + ct = nf_ct_get(skb_in, &ctinfo); + if (!ct || !(ct->status & IPS_SRC_NAT)) { + icmpv6_send(skb_in, type, code, info); + return; + } + + if (skb_shared(skb_in)) + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); + + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || + (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) > + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, + skb_network_offset(skb_in) + sizeof(struct ipv6hdr)))) + goto out; + + orig_ip = ipv6_hdr(skb_in)->saddr; + ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; + icmpv6_send(skb_in, type, code, info); + ipv6_hdr(skb_in)->saddr = orig_ip; +out: + consume_skb(cloned_skb); +} +EXPORT_SYMBOL(icmpv6_ndo_send); +#endif #endif diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b5dd20c4599b..5d65436ad5ad 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -121,6 +121,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses + * @link: ifindex of underlying interface * @remote: the address of the tunnel exit-point * @local: the address of the tunnel entry-point * @@ -134,37 +135,56 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) static struct ip6_tnl * -ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) +ip6_tnl_lookup(struct net *net, int link, + const struct in6_addr *remote, const struct in6_addr *local) { unsigned int hash = HASH(remote, local); - struct ip6_tnl *t; + struct ip6_tnl *t, *cand = NULL; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct in6_addr any; for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { - if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr) && - (t->dev->flags & IFF_UP)) + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !ipv6_addr_equal(remote, &t->parms.raddr) || + !(t->dev->flags & IFF_UP)) + continue; + + if (link == t->parms.link) return t; + else + cand = t; } memset(&any, 0, sizeof(any)); hash = HASH(&any, local); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { - if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_any(&t->parms.raddr) && - (t->dev->flags & IFF_UP)) + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !ipv6_addr_any(&t->parms.raddr) || + !(t->dev->flags & IFF_UP)) + continue; + + if (link == t->parms.link) return t; + else if (!cand) + cand = t; } hash = HASH(remote, &any); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { - if (ipv6_addr_equal(remote, &t->parms.raddr) && - ipv6_addr_any(&t->parms.laddr) && - (t->dev->flags & IFF_UP)) + if (!ipv6_addr_equal(remote, &t->parms.raddr) || + !ipv6_addr_any(&t->parms.laddr) || + !(t->dev->flags & IFF_UP)) + continue; + + if (link == t->parms.link) return t; + else if (!cand) + cand = t; } + if (cand) + return cand; + t = rcu_dereference(ip6n->collect_md_tun); if (t && t->dev->flags & IFF_UP) return t; @@ -351,7 +371,8 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net, (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr)) { + ipv6_addr_equal(remote, &t->parms.raddr) && + p->link == t->parms.link) { if (create) return ERR_PTR(-EEXIST); @@ -485,7 +506,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, processing of the error. */ rcu_read_lock(); - t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); + t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr); if (!t) goto out; @@ -887,7 +908,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, int ret = -1; rcu_read_lock(); - t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr); if (t) { u8 tproto = READ_ONCE(t->parms.proto); @@ -1420,8 +1441,10 @@ tx_err: static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; + struct net_device *tdev = NULL; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; + unsigned int mtu; int t_hlen; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -1457,22 +1480,25 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, NULL, strict); + if (rt) { + tdev = rt->dst.dev; + ip6_rt_put(rt); + } - if (!rt) - return; + if (!tdev && p->link) + tdev = __dev_get_by_index(t->net, p->link); - if (rt->dst.dev) { - dev->hard_header_len = rt->dst.dev->hard_header_len + - t_hlen; + if (tdev) { + dev->hard_header_len = tdev->hard_header_len + t_hlen; + mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); - dev->mtu = rt->dst.dev->mtu - t_hlen; + dev->mtu = mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - ip6_rt_put(rt); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4fbdc60b4e07..2931224b674e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5198,6 +5198,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; nhn++; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 000c742d0527..6aee699deb28 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3450,7 +3450,7 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, spin_lock_irqsave(&local->ack_status_lock, spin_flags); id = idr_alloc(&local->ack_status_frames, ack_skb, - 1, 0x40, GFP_ATOMIC); + 1, 0x2000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, spin_flags); if (id < 0) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5fa13176036f..e041af2f021a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <[email protected]> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2020 Intel Corporation */ #include <linux/delay.h> @@ -1311,7 +1311,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (!res) { ch_switch.timestamp = timestamp; ch_switch.device_timestamp = device_timestamp; - ch_switch.block_tx = beacon ? csa_ie.mode : 0; + ch_switch.block_tx = csa_ie.mode; ch_switch.chandef = csa_ie.chandef; ch_switch.count = csa_ie.count; ch_switch.delay = csa_ie.max_switch_time; @@ -1404,7 +1404,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata->vif.csa_active = true; sdata->csa_chandef = csa_ie.chandef; - sdata->csa_block_tx = ch_switch.block_tx; + sdata->csa_block_tx = csa_ie.mode; ifmgd->csa_ignored_same_chan = false; if (sdata->csa_block_tx) @@ -1438,7 +1438,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, * reset when the disconnection worker runs. */ sdata->vif.csa_active = true; - sdata->csa_block_tx = ch_switch.block_tx; + sdata->csa_block_tx = csa_ie.mode; ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); mutex_unlock(&local->chanctx_mtx); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4bd1faf4f779..87def9cb91ff 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2442,7 +2442,7 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local, spin_lock_irqsave(&local->ack_status_lock, flags); id = idr_alloc(&local->ack_status_frames, ack_skb, - 1, 0x40, GFP_ATOMIC); + 1, 0x2000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, flags); if (id >= 0) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 32a7a53833c0..decd46b38393 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1063,16 +1063,22 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem_parse_failed = true; break; case WLAN_EID_VHT_OPERATION: - if (elen >= sizeof(struct ieee80211_vht_operation)) + if (elen >= sizeof(struct ieee80211_vht_operation)) { elems->vht_operation = (void *)pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_OPMODE_NOTIF: - if (elen > 0) + if (elen > 0) { elems->opmode_notif = pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; @@ -2987,10 +2993,22 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, int cf0, cf1; int ccfs0, ccfs1, ccfs2; int ccf0, ccf1; + u32 vht_cap; + bool support_80_80 = false; + bool support_160 = false; if (!oper || !htop) return false; + vht_cap = hw->wiphy->bands[chandef->chan->band]->vht_cap.cap; + support_160 = (vht_cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK | + IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)); + support_80_80 = ((vht_cap & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) || + (vht_cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && + vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) || + ((vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) >> + IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT > 1)); ccfs0 = oper->center_freq_seg0_idx; ccfs1 = oper->center_freq_seg1_idx; ccfs2 = (le16_to_cpu(htop->operation_mode) & @@ -3018,10 +3036,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, unsigned int diff; diff = abs(ccf1 - ccf0); - if (diff == 8) { + if ((diff == 8) && support_160) { new.width = NL80211_CHAN_WIDTH_160; new.center_freq1 = cf1; - } else if (diff > 8) { + } else if ((diff > 8) && support_80_80) { new.width = NL80211_CHAN_WIDTH_80P80; new.center_freq2 = cf1; } diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 49f6054e7f4e..a9ed3bf1d93f 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -4,6 +4,7 @@ config MPTCP depends on INET select SKB_EXTENSIONS select CRYPTO_LIB_SHA256 + select CRYPTO help Multipath TCP (MPTCP) connections send and receive data over multiple subflows in order to utilize multiple network paths. Each subflow diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 030dee668e0a..e9aa6807b5be 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -755,60 +755,50 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct mptcp_sock *msk = mptcp_sk(sk); - int ret = -EOPNOTSUPP; struct socket *ssock; - struct sock *ssk; pr_debug("msk=%p", msk); /* @@ the meaning of setsockopt() when the socket is connected and - * there are multiple subflows is not defined. + * there are multiple subflows is not yet defined. It is up to the + * MPTCP-level socket to configure the subflows until the subflow + * is in TCP fallback, when TCP socket options are passed through + * to the one remaining subflow. */ lock_sock(sk); - ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (IS_ERR(ssock)) { - release_sock(sk); - return ret; - } + ssock = __mptcp_tcp_fallback(msk); + if (ssock) + return tcp_setsockopt(ssock->sk, level, optname, optval, + optlen); - ssk = ssock->sk; - sock_hold(ssk); release_sock(sk); - ret = tcp_setsockopt(ssk, level, optname, optval, optlen); - sock_put(ssk); - - return ret; + return -EOPNOTSUPP; } static int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option) { struct mptcp_sock *msk = mptcp_sk(sk); - int ret = -EOPNOTSUPP; struct socket *ssock; - struct sock *ssk; pr_debug("msk=%p", msk); - /* @@ the meaning of getsockopt() when the socket is connected and - * there are multiple subflows is not defined. + /* @@ the meaning of setsockopt() when the socket is connected and + * there are multiple subflows is not yet defined. It is up to the + * MPTCP-level socket to configure the subflows until the subflow + * is in TCP fallback, when socket options are passed through + * to the one remaining subflow. */ lock_sock(sk); - ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (IS_ERR(ssock)) { - release_sock(sk); - return ret; - } + ssock = __mptcp_tcp_fallback(msk); + if (ssock) + return tcp_getsockopt(ssock->sk, level, optname, optval, + option); - ssk = ssock->sk; - sock_hold(ssk); release_sock(sk); - ret = tcp_getsockopt(ssk, level, optname, optval, option); - sock_put(ssk); - - return ret; + return -EOPNOTSUPP; } static int mptcp_get_port(struct sock *sk, unsigned short snum) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 8a99a2930284..9f8663b30456 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -56,8 +56,8 @@ #define MPTCP_DSS_FLAG_MASK (0x1F) /* MPTCP socket flags */ -#define MPTCP_DATA_READY BIT(0) -#define MPTCP_SEND_SPACE BIT(1) +#define MPTCP_DATA_READY 0 +#define MPTCP_SEND_SPACE 1 /* MPTCP connection sock */ struct mptcp_sock { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d1305423640f..1927fc296f95 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -894,32 +894,175 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, } } -/* Resolve race on insertion if this protocol allows this. */ +static void __nf_conntrack_insert_prepare(struct nf_conn *ct) +{ + struct nf_conn_tstamp *tstamp; + + atomic_inc(&ct->ct_general.use); + ct->status |= IPS_CONFIRMED; + + /* set conntrack timestamp, if enabled. */ + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) + tstamp->start = ktime_get_real_ns(); +} + +static int __nf_ct_resolve_clash(struct sk_buff *skb, + struct nf_conntrack_tuple_hash *h) +{ + /* This is the conntrack entry already in hashes that won race. */ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + enum ip_conntrack_info ctinfo; + struct nf_conn *loser_ct; + + loser_ct = nf_ct_get(skb, &ctinfo); + + if (nf_ct_is_dying(ct)) + return NF_DROP; + + if (!atomic_inc_not_zero(&ct->ct_general.use)) + return NF_DROP; + + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || + nf_ct_match(ct, loser_ct)) { + struct net *net = nf_ct_net(ct); + + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_ct_add_to_dying_list(loser_ct); + nf_conntrack_put(&loser_ct->ct_general); + nf_ct_set(skb, ct, ctinfo); + + NF_CT_STAT_INC(net, insert_failed); + return NF_ACCEPT; + } + + nf_ct_put(ct); + return NF_DROP; +} + +/** + * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry + * + * @skb: skb that causes the collision + * @repl_idx: hash slot for reply direction + * + * Called when origin or reply direction had a clash. + * The skb can be handled without packet drop provided the reply direction + * is unique or there the existing entry has the identical tuple in both + * directions. + * + * Caller must hold conntrack table locks to prevent concurrent updates. + * + * Returns NF_DROP if the clash could not be handled. + */ +static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) +{ + struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb); + const struct nf_conntrack_zone *zone; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + struct net *net; + + zone = nf_ct_zone(loser_ct); + net = nf_ct_net(loser_ct); + + /* Reply direction must never result in a clash, unless both origin + * and reply tuples are identical. + */ + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) { + if (nf_ct_key_equal(h, + &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple, + zone, net)) + return __nf_ct_resolve_clash(skb, h); + } + + /* We want the clashing entry to go away real soon: 1 second timeout. */ + loser_ct->timeout = nfct_time_stamp + HZ; + + /* IPS_NAT_CLASH removes the entry automatically on the first + * reply. Also prevents UDP tracker from moving the entry to + * ASSURED state, i.e. the entry can always be evicted under + * pressure. + */ + loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH; + + __nf_conntrack_insert_prepare(loser_ct); + + /* fake add for ORIGINAL dir: we want lookups to only find the entry + * already in the table. This also hides the clashing entry from + * ctnetlink iteration, i.e. conntrack -L won't show them. + */ + hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + + hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, + &nf_conntrack_hash[repl_idx]); + return NF_ACCEPT; +} + +/** + * nf_ct_resolve_clash - attempt to handle clash without packet drop + * + * @skb: skb that causes the clash + * @h: tuplehash of the clashing entry already in table + * @hash_reply: hash slot for reply direction + * + * A conntrack entry can be inserted to the connection tracking table + * if there is no existing entry with an identical tuple. + * + * If there is one, @skb (and the assocated, unconfirmed conntrack) has + * to be dropped. In case @skb is retransmitted, next conntrack lookup + * will find the already-existing entry. + * + * The major problem with such packet drop is the extra delay added by + * the packet loss -- it will take some time for a retransmit to occur + * (or the sender to time out when waiting for a reply). + * + * This function attempts to handle the situation without packet drop. + * + * If @skb has no NAT transformation or if the colliding entries are + * exactly the same, only the to-be-confirmed conntrack entry is discarded + * and @skb is associated with the conntrack entry already in the table. + * + * Failing that, the new, unconfirmed conntrack is still added to the table + * provided that the collision only occurs in the ORIGINAL direction. + * The new entry will be added after the existing one in the hash list, + * so packets in the ORIGINAL direction will continue to match the existing + * entry. The new entry will also have a fixed timeout so it expires -- + * due to the collision, it will not see bidirectional traffic. + * + * Returns NF_DROP if the clash could not be resolved. + */ static __cold noinline int -nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_tuple_hash *h) +nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h, + u32 reply_hash) { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); const struct nf_conntrack_l4proto *l4proto; - enum ip_conntrack_info oldinfo; - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); + enum ip_conntrack_info ctinfo; + struct nf_conn *loser_ct; + struct net *net; + int ret; + + loser_ct = nf_ct_get(skb, &ctinfo); + net = nf_ct_net(loser_ct); l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); - if (l4proto->allow_clash && - !nf_ct_is_dying(ct) && - atomic_inc_not_zero(&ct->ct_general.use)) { - if (((ct->status & IPS_NAT_DONE_MASK) == 0) || - nf_ct_match(ct, loser_ct)) { - nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_conntrack_put(&loser_ct->ct_general); - nf_ct_set(skb, ct, oldinfo); - return NF_ACCEPT; - } - nf_ct_put(ct); - } + if (!l4proto->allow_clash) + goto drop; + + ret = __nf_ct_resolve_clash(skb, h); + if (ret == NF_ACCEPT) + return ret; + + ret = nf_ct_resolve_clash_harder(skb, reply_hash); + if (ret == NF_ACCEPT) + return ret; + +drop: + nf_ct_add_to_dying_list(loser_ct); NF_CT_STAT_INC(net, drop); + NF_CT_STAT_INC(net, insert_failed); return NF_DROP; } @@ -932,7 +1075,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; - struct nf_conn_tstamp *tstamp; struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; @@ -989,6 +1131,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (unlikely(nf_ct_is_dying(ct))) { nf_ct_add_to_dying_list(ct); + NF_CT_STAT_INC(net, insert_failed); goto dying; } @@ -1009,13 +1152,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout += nfct_time_stamp; - atomic_inc(&ct->ct_general.use); - ct->status |= IPS_CONFIRMED; - /* set conntrack timestamp, if enabled. */ - tstamp = nf_conn_tstamp_find(ct); - if (tstamp) - tstamp->start = ktime_get_real_ns(); + __nf_conntrack_insert_prepare(ct); /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers @@ -1035,11 +1173,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; out: - nf_ct_add_to_dying_list(ct); - ret = nf_ct_resolve_clash(net, skb, ctinfo, h); + ret = nf_ct_resolve_clash(skb, h, reply_hash); dying: nf_conntrack_double_unlock(hash, reply_hash); - NF_CT_STAT_INC(net, insert_failed); local_bh_enable(); return ret; } diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7365b43f8f98..760ca2422816 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -81,6 +81,18 @@ static bool udp_error(struct sk_buff *skb, return false; } +static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + u32 extra_jiffies) +{ + if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY && + ct->status & IPS_NAT_CLASH)) + nf_ct_kill(ct); + else + nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies); +} + /* Returns verdict for packet, and may modify conntracktype */ int nf_conntrack_udp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -116,8 +128,8 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDP_CT_UNREPLIED]); + nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo, + timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } @@ -198,8 +210,8 @@ int nf_conntrack_udplite_packet(struct nf_conn *ct, if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDP_CT_UNREPLIED]); + nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo, + timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 83e1db37c3b0..06f00cdc3891 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -847,9 +847,6 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, { int err; - if (!nf_flowtable_hw_offload(flowtable)) - return 0; - if (!dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; @@ -876,6 +873,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct flow_block_offload bo; int err; + if (!nf_flowtable_hw_offload(flowtable)) + return 0; + err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack); if (err < 0) return err; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index f0cb1e13af50..feac8553f6d9 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -203,7 +203,7 @@ * :: * * rule indices in last field: 0 1 - * map to elements: 0x42 0x66 + * map to elements: 0x66 0x42 * * * Matching @@ -298,7 +298,7 @@ * :: * * rule indices in last field: 0 1 - * map to elements: 0x42 0x66 + * map to elements: 0x66 0x42 * * the matching element is at 0x42. * @@ -503,7 +503,7 @@ static int pipapo_refill(unsigned long *map, int len, int rules, return -1; } - if (unlikely(match_only)) { + if (match_only) { bitmap_clear(map, i, 1); return i; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index bccd47cd7190..7a2c4b8408c4 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -36,6 +36,7 @@ #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/mutex.h> #include <linux/kernel.h> +#include <linux/refcount.h> #include <uapi/linux/netfilter/xt_hashlimit.h> #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ @@ -114,7 +115,7 @@ struct dsthash_ent { struct xt_hashlimit_htable { struct hlist_node node; /* global list of all htables */ - int use; + refcount_t use; u_int8_t family; bool rnd_initialized; @@ -315,7 +316,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, for (i = 0; i < hinfo->cfg.size; i++) INIT_HLIST_HEAD(&hinfo->hash[i]); - hinfo->use = 1; + refcount_set(&hinfo->use, 1); hinfo->count = 0; hinfo->family = family; hinfo->rnd_initialized = false; @@ -420,7 +421,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { if (!strcmp(name, hinfo->name) && hinfo->family == family) { - hinfo->use++; + refcount_inc(&hinfo->use); return hinfo; } } @@ -429,12 +430,11 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, static void htable_put(struct xt_hashlimit_htable *hinfo) { - mutex_lock(&hashlimit_mutex); - if (--hinfo->use == 0) { + if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) { hlist_del(&hinfo->node); + mutex_unlock(&hashlimit_mutex); htable_destroy(hinfo); } - mutex_unlock(&hashlimit_mutex); } /* The algorithm used is the Simple Token Bucket Filter (TBF) @@ -837,6 +837,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); } +#define HASHLIMIT_MAX_SIZE 1048576 + static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, struct xt_hashlimit_htable **hinfo, struct hashlimit_cfg3 *cfg, @@ -847,6 +849,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; + if (cfg->size > HASHLIMIT_MAX_SIZE) { + cfg->size = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size); + } + if (cfg->max > HASHLIMIT_MAX_SIZE) { + cfg->max = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max); + } if (par->family == NFPROTO_IPV4) { if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index f5d34da0646e..a1f2320ecc16 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -143,7 +143,8 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, if (domain != NULL) { bkt = netlbl_domhsh_hash(domain); bkt_list = &netlbl_domhsh_rcu_deref(netlbl_domhsh)->tbl[bkt]; - list_for_each_entry_rcu(iter, bkt_list, list) + list_for_each_entry_rcu(iter, bkt_list, list, + lockdep_is_held(&netlbl_domhsh_lock)) if (iter->valid && netlbl_family_match(iter->family, family) && strcmp(iter->domain, domain) == 0) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index d2e4ab8d1cb1..77bb1bb22c3b 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -207,7 +207,8 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) bkt = netlbl_unlhsh_hash(ifindex); bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]; - list_for_each_entry_rcu(iter, bkt_list, list) + list_for_each_entry_rcu(iter, bkt_list, list, + lockdep_is_held(&netlbl_unlhsh_lock)) if (iter->valid && iter->ifindex == ifindex) return iter; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 659c2a790fe7..c047afd12116 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -179,7 +179,8 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) struct hlist_head *head; head = vport_hash_bucket(dp, port_no); - hlist_for_each_entry_rcu(vport, head, dp_hash_node) { + hlist_for_each_entry_rcu(vport, head, dp_hash_node, + lockdep_ovsl_is_held()) { if (vport->port_no == port_no) return vport; } @@ -2042,7 +2043,8 @@ static unsigned int ovs_get_max_headroom(struct datapath *dp) int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { - hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, + lockdep_ovsl_is_held()) { dev = vport->dev; dev_headroom = netdev_get_fwd_headroom(dev); if (dev_headroom > max_headroom) @@ -2061,7 +2063,8 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) dp->max_headroom = new_headroom; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) - hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, + lockdep_ovsl_is_held()) netdev_set_rx_headroom(vport->dev, new_headroom); } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 5904e93e5765..fd8a01ca7a2d 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -585,7 +585,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, head = find_bucket(ti, hash); (*n_mask_hit)++; - hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver], + lockdep_ovsl_is_held()) { if (flow->mask == mask && flow->flow_table.hash == hash && flow_cmp_masked_key(flow, &masked_key, &mask->range)) return flow; @@ -769,7 +770,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, hash = ufid_hash(ufid); head = find_bucket(ti, hash); - hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver], + lockdep_ovsl_is_held()) { if (flow->ufid_table.hash == hash && ovs_flow_cmp_ufid(flow, ufid)) return flow; diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 3323b79ff548..5010d1ddd4bd 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -61,7 +61,8 @@ static struct dp_meter *lookup_meter(const struct datapath *dp, struct hlist_head *head; head = meter_hash_bucket(dp, meter_id); - hlist_for_each_entry_rcu(meter, head, dp_hash_node) { + hlist_for_each_entry_rcu(meter, head, dp_hash_node, + lockdep_ovsl_is_held()) { if (meter->id == meter_id) return meter; } diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 5da9392b03d6..47febb4504f0 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -96,7 +96,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name) struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; - hlist_for_each_entry_rcu(vport, bucket, hash_node) + hlist_for_each_entry_rcu(vport, bucket, hash_node, + lockdep_ovsl_is_held()) if (!strcmp(name, ovs_vport_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 3341eee87bf9..585e6b3b69ce 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -162,10 +162,9 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, if (write) gup_flags |= FOLL_WRITE; - ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages); + ret = pin_user_pages_fast(user_addr, nr_pages, gup_flags, pages); if (ret >= 0 && ret < nr_pages) { - while (ret--) - put_page(pages[ret]); + unpin_user_pages(pages, ret); ret = -EFAULT; } @@ -300,8 +299,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, * to release anything. */ if (!need_odp) { - for (i = 0 ; i < nents; i++) - put_page(sg_page(&sg[i])); + unpin_user_pages(pages, nr_pages); kfree(sg); } ret = PTR_ERR(trans_private); @@ -325,7 +323,12 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, if (cookie_ret) *cookie_ret = cookie; - if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) { + if (args->cookie_addr && + put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) { + if (!need_odp) { + unpin_user_pages(pages, nr_pages); + kfree(sg); + } ret = -EFAULT; goto out; } @@ -496,9 +499,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro) * is the case for a RDMA_READ which copies from remote * to local memory */ - if (!ro->op_write) - set_page_dirty(page); - put_page(page); + unpin_user_pages_dirty_lock(&page, 1, !ro->op_write); } } @@ -515,8 +516,7 @@ void rds_atomic_free_op(struct rm_atomic_op *ao) /* Mark page dirty if it was possibly modified, which * is the case for a RDMA_READ which copies from remote * to local memory */ - set_page_dirty(page); - put_page(page); + unpin_user_pages_dirty_lock(&page, 1, true); kfree(ao->op_notifier); ao->op_notifier = NULL; @@ -944,7 +944,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, return ret; err: if (page) - put_page(page); + unpin_user_page(page); rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f9c0d1e8d380..d32d4233d337 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -305,6 +305,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; list_for_each_entry_rcu(mask, &head->masks, list) { + flow_dissector_init_keys(&skb_key.control, &skb_key.basic); fl_clear_masked_range(&skb_key, mask); skb_flow_dissect_meta(skb, &mask->dissector, &skb_key); @@ -691,6 +692,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { .len = 128 / BITS_PER_BYTE }, [TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY, .len = 128 / BITS_PER_BYTE }, + [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, }; static const struct nla_policy diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 039cc86974f4..610a0b728161 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -157,6 +157,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle) static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC }, [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 }, + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 }, }; static int mall_set_parms(struct net *net, struct tcf_proto *tp, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 748e3b19ec1d..6a16af4b1ef6 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -170,6 +170,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, return true; } +/* Check for format error in an ABORT chunk */ +static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) +{ + struct sctp_errhdr *err; + + sctp_walk_errors(err, chunk->chunk_hdr); + + return (void *)err == (void *)chunk->chunk_end; +} + /********************************************************** * These are the state functions for handling chunk events. **********************************************************/ @@ -2255,6 +2265,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2298,6 +2311,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); @@ -2565,6 +2581,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2582,16 +2601,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); - if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { - struct sctp_errhdr *err; - - sctp_walk_errors(err, chunk->chunk_hdr); - if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, - commands); - + if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((struct sctp_errhdr *)chunk->skb->data)->cause; - } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index cee5bf4a9bb9..90988a511cd5 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -470,6 +470,8 @@ static void smc_switch_to_fallback(struct smc_sock *smc) if (smc->sk.sk_socket && smc->sk.sk_socket->file) { smc->clcsock->file = smc->sk.sk_socket->file; smc->clcsock->file->private_data = smc->clcsock; + smc->clcsock->wq.fasync_list = + smc->sk.sk_socket->wq.fasync_list; } } diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 0879f7bed967..86cccc24e52e 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -372,7 +372,9 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); dclc.hdr.version = SMC_CLC_V1; dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; - memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); + if (smc->conn.lgr && !smc->conn.lgr->is_smcd) + memcpy(dclc.id_for_peer, local_systemid, + sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index f38727ecf8b2..e1f64f4ba236 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -39,16 +39,15 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + memset(r, 0, sizeof(*r)); r->diag_family = sk->sk_family; + sock_diag_save_cookie(sk, r->id.idiag_cookie); if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); if (sk->sk_protocol == SMCPROTO_SMC) { - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 1ba5a92832bb..1c5574e2e058 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -593,7 +593,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn) { u64 record_sn = context->hint_record_sn; - struct tls_record_info *info; + struct tls_record_info *info, *last; info = context->retransmit_hint; if (!info || @@ -605,6 +605,24 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, struct tls_record_info, list); if (!info) return NULL; + /* send the start_marker record if seq number is before the + * tls offload start marker sequence number. This record is + * required to handle TCP packets which are before TLS offload + * started. + * And if it's not start marker, look if this seq number + * belongs to the list. + */ + if (likely(!tls_record_is_start_marker(info))) { + /* we have the first record, get the last record to see + * if this seq number belongs to the list. + */ + last = list_last_entry(&context->records_list, + struct tls_record_info, list); + + if (!between(seq, tls_record_start_seq(info), + last->end_seq)) + return NULL; + } record_sn = context->unacked_record_sn; } diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index a9c0f368db5d..24e18405cdb4 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -7,9 +7,13 @@ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct device *pdev = wiphy_dev(wdev->wiphy); - strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name, - sizeof(info->driver)); + if (pdev->driver) + strlcpy(info->driver, pdev->driver->name, + sizeof(info->driver)); + else + strlcpy(info->driver, "N/A", sizeof(info->driver)); strlcpy(info->version, init_utsname()->release, sizeof(info->version)); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 123b8d720a59..cedf17d4933f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -437,6 +437,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, + [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index dc651a628dcf..3361e3ac5714 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -300,10 +300,10 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); } dst_release(dst); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b0e8adf7eb01..79b0fee6943b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -111,65 +111,65 @@ ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs -always := $(tprogs-y) -always += sockex1_kern.o -always += sockex2_kern.o -always += sockex3_kern.o -always += tracex1_kern.o -always += tracex2_kern.o -always += tracex3_kern.o -always += tracex4_kern.o -always += tracex5_kern.o -always += tracex6_kern.o -always += tracex7_kern.o -always += sock_flags_kern.o -always += test_probe_write_user_kern.o -always += trace_output_kern.o -always += tcbpf1_kern.o -always += tc_l2_redirect_kern.o -always += lathist_kern.o -always += offwaketime_kern.o -always += spintest_kern.o -always += map_perf_test_kern.o -always += test_overhead_tp_kern.o -always += test_overhead_raw_tp_kern.o -always += test_overhead_kprobe_kern.o -always += parse_varlen.o parse_simple.o parse_ldabs.o -always += test_cgrp2_tc_kern.o -always += xdp1_kern.o -always += xdp2_kern.o -always += xdp_router_ipv4_kern.o -always += test_current_task_under_cgroup_kern.o -always += trace_event_kern.o -always += sampleip_kern.o -always += lwt_len_hist_kern.o -always += xdp_tx_iptunnel_kern.o -always += test_map_in_map_kern.o -always += tcp_synrto_kern.o -always += tcp_rwnd_kern.o -always += tcp_bufs_kern.o -always += tcp_cong_kern.o -always += tcp_iw_kern.o -always += tcp_clamp_kern.o -always += tcp_basertt_kern.o -always += tcp_tos_reflect_kern.o -always += tcp_dumpstats_kern.o -always += xdp_redirect_kern.o -always += xdp_redirect_map_kern.o -always += xdp_redirect_cpu_kern.o -always += xdp_monitor_kern.o -always += xdp_rxq_info_kern.o -always += xdp2skb_meta_kern.o -always += syscall_tp_kern.o -always += cpustat_kern.o -always += xdp_adjust_tail_kern.o -always += xdp_fwd_kern.o -always += task_fd_query_kern.o -always += xdp_sample_pkts_kern.o -always += ibumad_kern.o -always += hbm_out_kern.o -always += hbm_edt_kern.o -always += xdpsock_kern.o +always-y := $(tprogs-y) +always-y += sockex1_kern.o +always-y += sockex2_kern.o +always-y += sockex3_kern.o +always-y += tracex1_kern.o +always-y += tracex2_kern.o +always-y += tracex3_kern.o +always-y += tracex4_kern.o +always-y += tracex5_kern.o +always-y += tracex6_kern.o +always-y += tracex7_kern.o +always-y += sock_flags_kern.o +always-y += test_probe_write_user_kern.o +always-y += trace_output_kern.o +always-y += tcbpf1_kern.o +always-y += tc_l2_redirect_kern.o +always-y += lathist_kern.o +always-y += offwaketime_kern.o +always-y += spintest_kern.o +always-y += map_perf_test_kern.o +always-y += test_overhead_tp_kern.o +always-y += test_overhead_raw_tp_kern.o +always-y += test_overhead_kprobe_kern.o +always-y += parse_varlen.o parse_simple.o parse_ldabs.o +always-y += test_cgrp2_tc_kern.o +always-y += xdp1_kern.o +always-y += xdp2_kern.o +always-y += xdp_router_ipv4_kern.o +always-y += test_current_task_under_cgroup_kern.o +always-y += trace_event_kern.o +always-y += sampleip_kern.o +always-y += lwt_len_hist_kern.o +always-y += xdp_tx_iptunnel_kern.o +always-y += test_map_in_map_kern.o +always-y += tcp_synrto_kern.o +always-y += tcp_rwnd_kern.o +always-y += tcp_bufs_kern.o +always-y += tcp_cong_kern.o +always-y += tcp_iw_kern.o +always-y += tcp_clamp_kern.o +always-y += tcp_basertt_kern.o +always-y += tcp_tos_reflect_kern.o +always-y += tcp_dumpstats_kern.o +always-y += xdp_redirect_kern.o +always-y += xdp_redirect_map_kern.o +always-y += xdp_redirect_cpu_kern.o +always-y += xdp_monitor_kern.o +always-y += xdp_rxq_info_kern.o +always-y += xdp2skb_meta_kern.o +always-y += syscall_tp_kern.o +always-y += cpustat_kern.o +always-y += xdp_adjust_tail_kern.o +always-y += xdp_fwd_kern.o +always-y += task_fd_query_kern.o +always-y += xdp_sample_pkts_kern.o +always-y += ibumad_kern.o +always-y += hbm_out_kern.o +always-y += hbm_edt_kern.o +always-y += xdpsock_kern.o ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux diff --git a/samples/connector/Makefile b/samples/connector/Makefile index 6ad71620e503..b785cbde5ffa 100644 --- a/samples/connector/Makefile +++ b/samples/connector/Makefile @@ -2,12 +2,8 @@ obj-$(CONFIG_SAMPLE_CONNECTOR) += cn_test.o # List of programs to build -ifdef CONFIG_SAMPLE_CONNECTOR -hostprogs-y := ucon -endif - -# Tell kbuild to always build the programs -always := $(hostprogs-y) +hostprogs := ucon +always-y := $(hostprogs) HOSTCFLAGS_ucon.o += -I$(objtree)/usr/include diff --git a/samples/hidraw/Makefile b/samples/hidraw/Makefile index dec1b22adf54..8bd25f77671f 100644 --- a/samples/hidraw/Makefile +++ b/samples/hidraw/Makefile @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # List of programs to build -hostprogs-y := hid-example - -# Tell kbuild to always build the programs -always := $(hostprogs-y) +hostprogs := hid-example +always-y := $(hostprogs) HOSTCFLAGS_hid-example.o += -I$(objtree)/usr/include diff --git a/samples/mei/Makefile b/samples/mei/Makefile index 27f37efdadb4..f5b9d02be2cd 100644 --- a/samples/mei/Makefile +++ b/samples/mei/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (c) 2012-2019, Intel Corporation. All rights reserved. -hostprogs-y := mei-amt-version +hostprogs := mei-amt-version HOSTCFLAGS_mei-amt-version.o += -I$(objtree)/usr/include -always := $(hostprogs-y) +always-y := $(hostprogs) all: mei-amt-version diff --git a/samples/pidfd/Makefile b/samples/pidfd/Makefile index 0ff97784177a..ee2979849d92 100644 --- a/samples/pidfd/Makefile +++ b/samples/pidfd/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -hostprogs-y := pidfd-metadata -always := $(hostprogs-y) +hostprogs := pidfd-metadata +always-y := $(hostprogs) HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include all: pidfd-metadata diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile index 009775b52538..89279e8b87df 100644 --- a/samples/seccomp/Makefile +++ b/samples/seccomp/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 ifndef CROSS_COMPILE -hostprogs-y := bpf-fancy dropper bpf-direct user-trap +hostprogs := bpf-fancy dropper bpf-direct user-trap HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include @@ -40,5 +40,5 @@ HOSTLDLIBS_bpf-fancy += $(MFLAG) HOSTLDLIBS_dropper += $(MFLAG) HOSTLDLIBS_user-trap += $(MFLAG) endif -always := $(hostprogs-y) +always-y := $(hostprogs) endif diff --git a/samples/uhid/Makefile b/samples/uhid/Makefile index 8c9bc9f98d37..5f44ea40d6d5 100644 --- a/samples/uhid/Makefile +++ b/samples/uhid/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only # List of programs to build -hostprogs-y := uhid-example +hostprogs := uhid-example # Tell kbuild to always build the programs -always := $(hostprogs-y) +always-y := $(hostprogs) HOSTCFLAGS_uhid-example.o += -I$(objtree)/usr/include diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile index e21c9f6fe9be..65acdde5c117 100644 --- a/samples/vfs/Makefile +++ b/samples/vfs/Makefile @@ -1,11 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only # List of programs to build -hostprogs-y := \ +hostprogs := \ test-fsmount \ test-statx -# Tell kbuild to always build the programs -always := $(hostprogs-y) +always-y := $(hostprogs) HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include diff --git a/scripts/Makefile b/scripts/Makefile index 4d41f48e7376..5e75802b1a44 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -7,14 +7,14 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include -hostprogs-$(CONFIG_BUILD_BIN2C) += bin2c -hostprogs-$(CONFIG_KALLSYMS) += kallsyms -hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount -hostprogs-$(CONFIG_BUILDTIME_TABLE_SORT) += sorttable -hostprogs-$(CONFIG_ASN1) += asn1_compiler -hostprogs-$(CONFIG_MODULE_SIG_FORMAT) += sign-file -hostprogs-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert -hostprogs-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert +always-$(CONFIG_BUILD_BIN2C) += bin2c +always-$(CONFIG_KALLSYMS) += kallsyms +always-$(BUILD_C_RECORDMCOUNT) += recordmcount +always-$(CONFIG_BUILDTIME_TABLE_SORT) += sorttable +always-$(CONFIG_ASN1) += asn1_compiler +always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file +always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert +always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include @@ -30,10 +30,10 @@ HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED HOSTLDLIBS_sorttable = -lpthread endif -always := $(hostprogs-y) $(hostprogs-m) +hostprogs := $(always-y) $(always-m) -# The following hostprogs-y programs are only build on demand -hostprogs-y += unifdef +# The following programs are only built on demand +hostprogs += unifdef subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins subdir-$(CONFIG_MODVERSIONS) += genksyms diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a562d695f0fa..a1730d42e5f3 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -16,6 +16,8 @@ obj-m := lib-y := lib-m := always := +always-y := +always-m := targets := subdir-y := subdir-m := @@ -44,7 +46,7 @@ include $(kbuild-file) include scripts/Makefile.lib # Do not include host rules unless needed -ifneq ($(hostprogs-y)$(hostprogs-m)$(hostlibs-y)$(hostlibs-m)$(hostcxxlibs-y)$(hostcxxlibs-m),) +ifneq ($(hostprogs)$(hostlibs-y)$(hostlibs-m)$(hostcxxlibs-y)$(hostcxxlibs-m),) include scripts/Makefile.host endif @@ -348,7 +350,7 @@ $(obj)/%.o: $(src)/%.S $(objtool_dep) FORCE $(call if_changed_rule,as_o_S) targets += $(filter-out $(subdir-obj-y), $(real-obj-y)) $(real-obj-m) $(lib-y) -targets += $(extra-y) $(MAKECMDGOALS) $(always) +targets += $(extra-y) $(always-y) $(MAKECMDGOALS) # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- @@ -490,7 +492,7 @@ else __build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \ $(if $(KBUILD_MODULES),$(obj-m) $(mod-targets) $(modorder-target)) \ - $(subdir-ym) $(always) + $(subdir-ym) $(always-y) @: endif diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index e367eb95c5c0..1e4206566a82 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -28,8 +28,8 @@ subdir-ymn := $(addprefix $(obj)/,$(subdir-ymn)) # directory __clean-files := $(extra-y) $(extra-m) $(extra-) \ - $(always) $(targets) $(clean-files) \ - $(hostprogs-y) $(hostprogs-m) $(hostprogs-) \ + $(always) $(always-y) $(always-m) $(always-) $(targets) $(clean-files) \ + $(hostprogs) $(hostprogs-y) $(hostprogs-m) $(hostprogs-) \ $(hostlibs-y) $(hostlibs-m) $(hostlibs-) \ $(hostcxxlibs-y) $(hostcxxlibs-m) diff --git a/scripts/Makefile.host b/scripts/Makefile.host index 4c51c95d40f4..3b7121d43324 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -24,21 +24,21 @@ $(obj)/%.tab.c $(obj)/%.tab.h: $(src)/%.y FORCE # Both C and C++ are supported, but preferred language is C for such utilities. # # Sample syntax (see Documentation/kbuild/makefiles.rst for reference) -# hostprogs-y := bin2hex +# hostprogs := bin2hex # Will compile bin2hex.c and create an executable named bin2hex # -# hostprogs-y := lxdialog +# hostprogs := lxdialog # lxdialog-objs := checklist.o lxdialog.o # Will compile lxdialog.c and checklist.c, and then link the executable # lxdialog, based on checklist.o and lxdialog.o # -# hostprogs-y := qconf +# hostprogs := qconf # qconf-cxxobjs := qconf.o # qconf-objs := menu.o # Will compile qconf as a C++ program, and menu as a C program. # They are linked as C++ code to the executable qconf -__hostprogs := $(sort $(hostprogs-y) $(hostprogs-m)) +__hostprogs := $(sort $(hostprogs)) host-cshlib := $(sort $(hostlibs-y) $(hostlibs-m)) host-cxxshlib := $(sort $(hostcxxlibs-y) $(hostcxxlibs-m)) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index d10f7a03e0ee..bae62549e3d2 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -4,6 +4,8 @@ asflags-y += $(EXTRA_AFLAGS) ccflags-y += $(EXTRA_CFLAGS) cppflags-y += $(EXTRA_CPPFLAGS) ldflags-y += $(EXTRA_LDFLAGS) +always-y += $(always) +hostprogs += $(hostprogs-y) $(hostprogs-m) # flags that take effect in current and sub directories KBUILD_AFLAGS += $(subdir-asflags-y) @@ -59,6 +61,8 @@ subdir-obj-y := $(filter %/built-in.a, $(obj-y)) real-obj-y := $(foreach m, $(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-))),$($(m:.o=-objs)) $($(m:.o=-y)),$(m))) real-obj-m := $(foreach m, $(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m)) $($(m:.o=-))),$($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m)),$(m))) +always-y += $(always-m) + # DTB # If CONFIG_OF_ALL_DTBS is enabled, all DT blobs are built extra-y += $(dtb-y) @@ -72,7 +76,7 @@ endif # Add subdir path extra-y := $(addprefix $(obj)/,$(extra-y)) -always := $(addprefix $(obj)/,$(always)) +always-y := $(addprefix $(obj)/,$(always-y)) targets := $(addprefix $(obj)/,$(targets)) modorder := $(addprefix $(obj)/,$(modorder)) obj-m := $(addprefix $(obj)/,$(obj-m)) diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile index 7c9cb80d097b..290dd27d2809 100644 --- a/scripts/basic/Makefile +++ b/scripts/basic/Makefile @@ -2,5 +2,5 @@ # # fixdep: used to generate dependency information during build process -hostprogs-y := fixdep -always := $(hostprogs-y) +hostprogs := fixdep +always-y := $(hostprogs) diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile index b5a5b1c548c9..3acbb410904c 100644 --- a/scripts/dtc/Makefile +++ b/scripts/dtc/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # scripts/dtc makefile -hostprogs-$(CONFIG_DTC) := dtc -always := $(hostprogs-y) +hostprogs := dtc +always-$(CONFIG_DTC) := $(hostprogs) dtc-objs := dtc.o flattree.o fstree.o data.o livetree.o treesource.o \ srcpos.o checks.o util.o diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index aa0d0ec6936d..f2ee8bd7abc6 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -23,7 +23,7 @@ $(objtree)/$(obj)/randomize_layout_seed.h: FORCE targets = randomize_layout_seed.h randomize_layout_hash.h $(HOSTLIBS)-y := $(foreach p,$(GCC_PLUGIN),$(if $(findstring /,$(p)),,$(p))) -always := $($(HOSTLIBS)-y) +always-y := $($(HOSTLIBS)-y) $(foreach p,$($(HOSTLIBS)-y:%.so=%),$(eval $(p)-objs := $(p).o)) diff --git a/scripts/genksyms/Makefile b/scripts/genksyms/Makefile index 78629f515e78..d328de1e10ee 100644 --- a/scripts/genksyms/Makefile +++ b/scripts/genksyms/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -hostprogs-y := genksyms -always := $(hostprogs-y) +hostprogs := genksyms +always-y := $(hostprogs) genksyms-objs := genksyms.o parse.tab.o lex.lex.o diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 94153732ec00..0133dfaaf352 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -33,8 +33,8 @@ struct sym_entry { unsigned long long addr; unsigned int len; unsigned int start_pos; - unsigned char *sym; unsigned int percpu_absolute; + unsigned char sym[0]; }; struct addr_range { @@ -55,7 +55,7 @@ static struct addr_range percpu_range = { "__per_cpu_start", "__per_cpu_end", -1ULL, 0 }; -static struct sym_entry *table; +static struct sym_entry **table; static unsigned int table_size, table_cnt; static int all_symbols; static int absolute_percpu; @@ -174,49 +174,55 @@ static void check_symbol_range(const char *sym, unsigned long long addr, } } -static int read_symbol(FILE *in, struct sym_entry *s) +static struct sym_entry *read_symbol(FILE *in) { - char sym[500], stype; + char name[500], type; + unsigned long long addr; + unsigned int len; + struct sym_entry *sym; int rc; - rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, sym); + rc = fscanf(in, "%llx %c %499s\n", &addr, &type, name); if (rc != 3) { - if (rc != EOF && fgets(sym, 500, in) == NULL) + if (rc != EOF && fgets(name, 500, in) == NULL) fprintf(stderr, "Read error or end of file.\n"); - return -1; + return NULL; } - if (strlen(sym) >= KSYM_NAME_LEN) { + if (strlen(name) >= KSYM_NAME_LEN) { fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n" "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n", - sym, strlen(sym), KSYM_NAME_LEN); - return -1; + name, strlen(name), KSYM_NAME_LEN); + return NULL; } - if (is_ignored_symbol(sym, stype)) - return -1; + if (is_ignored_symbol(name, type)) + return NULL; /* Ignore most absolute/undefined (?) symbols. */ - if (strcmp(sym, "_text") == 0) - _text = s->addr; + if (strcmp(name, "_text") == 0) + _text = addr; - check_symbol_range(sym, s->addr, text_ranges, ARRAY_SIZE(text_ranges)); - check_symbol_range(sym, s->addr, &percpu_range, 1); + check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges)); + check_symbol_range(name, addr, &percpu_range, 1); /* include the type field in the symbol name, so that it gets * compressed together */ - s->len = strlen(sym) + 1; - s->sym = malloc(s->len + 1); - if (!s->sym) { + + len = strlen(name) + 1; + + sym = malloc(sizeof(*sym) + len + 1); + if (!sym) { fprintf(stderr, "kallsyms failure: " "unable to allocate required amount of memory\n"); exit(EXIT_FAILURE); } - strcpy(sym_name(s), sym); - s->sym[0] = stype; - - s->percpu_absolute = 0; + sym->addr = addr; + sym->len = len; + sym->sym[0] = type; + strcpy(sym_name(sym), name); + sym->percpu_absolute = 0; - return 0; + return sym; } static int symbol_in_range(const struct sym_entry *s, @@ -268,12 +274,12 @@ static void shrink_table(void) pos = 0; for (i = 0; i < table_cnt; i++) { - if (symbol_valid(&table[i])) { + if (symbol_valid(table[i])) { if (pos != i) table[pos] = table[i]; pos++; } else { - free(table[i].sym); + free(table[i]); } } table_cnt = pos; @@ -287,7 +293,15 @@ static void shrink_table(void) static void read_map(FILE *in) { + struct sym_entry *sym; + while (!feof(in)) { + sym = read_symbol(in); + if (!sym) + continue; + + sym->start_pos = table_cnt; + if (table_cnt >= table_size) { table_size += 10000; table = realloc(table, sizeof(*table) * table_size); @@ -296,10 +310,8 @@ static void read_map(FILE *in) exit (1); } } - if (read_symbol(in, &table[table_cnt]) == 0) { - table[table_cnt].start_pos = table_cnt; - table_cnt++; - } + + table[table_cnt++] = sym; } } @@ -387,27 +399,27 @@ static void write_src(void) int overflow; if (!absolute_percpu) { - offset = table[i].addr - relative_base; + offset = table[i]->addr - relative_base; overflow = (offset < 0 || offset > UINT_MAX); - } else if (symbol_absolute(&table[i])) { - offset = table[i].addr; + } else if (symbol_absolute(table[i])) { + offset = table[i]->addr; overflow = (offset < 0 || offset > INT_MAX); } else { - offset = relative_base - table[i].addr - 1; + offset = relative_base - table[i]->addr - 1; overflow = (offset < INT_MIN || offset >= 0); } if (overflow) { fprintf(stderr, "kallsyms failure: " "%s symbol value %#llx out of range in relative mode\n", - symbol_absolute(&table[i]) ? "absolute" : "relative", - table[i].addr); + symbol_absolute(table[i]) ? "absolute" : "relative", + table[i]->addr); exit(EXIT_FAILURE); } printf("\t.long\t%#x\n", (int)offset); - } else if (!symbol_absolute(&table[i])) { - output_address(table[i].addr); + } else if (!symbol_absolute(table[i])) { + output_address(table[i]->addr); } else { - printf("\tPTR\t%#llx\n", table[i].addr); + printf("\tPTR\t%#llx\n", table[i]->addr); } } printf("\n"); @@ -437,12 +449,12 @@ static void write_src(void) if ((i & 0xFF) == 0) markers[i >> 8] = off; - printf("\t.byte 0x%02x", table[i].len); - for (k = 0; k < table[i].len; k++) - printf(", 0x%02x", table[i].sym[k]); + printf("\t.byte 0x%02x", table[i]->len); + for (k = 0; k < table[i]->len; k++) + printf(", 0x%02x", table[i]->sym[k]); printf("\n"); - off += table[i].len + 1; + off += table[i]->len + 1; } printf("\n"); @@ -496,7 +508,7 @@ static void build_initial_tok_table(void) unsigned int i; for (i = 0; i < table_cnt; i++) - learn_symbol(table[i].sym, table[i].len); + learn_symbol(table[i]->sym, table[i]->len); } static unsigned char *find_token(unsigned char *str, int len, @@ -520,15 +532,15 @@ static void compress_symbols(const unsigned char *str, int idx) for (i = 0; i < table_cnt; i++) { - len = table[i].len; - p1 = table[i].sym; + len = table[i]->len; + p1 = table[i]->sym; /* find the token on the symbol */ p2 = find_token(p1, len, str); if (!p2) continue; /* decrease the counts for this symbol's tokens */ - forget_symbol(table[i].sym, len); + forget_symbol(table[i]->sym, len); size = len; @@ -547,10 +559,10 @@ static void compress_symbols(const unsigned char *str, int idx) } while (p2); - table[i].len = len; + table[i]->len = len; /* increase the counts for this symbol's new tokens */ - learn_symbol(table[i].sym, len); + learn_symbol(table[i]->sym, len); } } @@ -606,8 +618,8 @@ static void insert_real_symbols_in_table(void) unsigned int i, j, c; for (i = 0; i < table_cnt; i++) { - for (j = 0; j < table[i].len; j++) { - c = table[i].sym[j]; + for (j = 0; j < table[i]->len; j++) { + c = table[i]->sym[j]; best_table[c][0]=c; best_table_len[c]=1; } @@ -660,13 +672,10 @@ static int may_be_linker_script_provide_symbol(const struct sym_entry *se) static int compare_symbols(const void *a, const void *b) { - const struct sym_entry *sa; - const struct sym_entry *sb; + const struct sym_entry *sa = *(const struct sym_entry **)a; + const struct sym_entry *sb = *(const struct sym_entry **)b; int wa, wb; - sa = a; - sb = b; - /* sort by address first */ if (sa->addr > sb->addr) return 1; @@ -697,7 +706,7 @@ static int compare_symbols(const void *a, const void *b) static void sort_symbols(void) { - qsort(table, table_cnt, sizeof(struct sym_entry), compare_symbols); + qsort(table, table_cnt, sizeof(table[0]), compare_symbols); } static void make_percpus_absolute(void) @@ -705,14 +714,14 @@ static void make_percpus_absolute(void) unsigned int i; for (i = 0; i < table_cnt; i++) - if (symbol_in_range(&table[i], &percpu_range, 1)) { + if (symbol_in_range(table[i], &percpu_range, 1)) { /* * Keep the 'A' override for percpu symbols to * ensure consistent behavior compared to older * versions of this tool. */ - table[i].sym[0] = 'A'; - table[i].percpu_absolute = 1; + table[i]->sym[0] = 'A'; + table[i]->percpu_absolute = 1; } } @@ -722,12 +731,12 @@ static void record_relative_base(void) unsigned int i; for (i = 0; i < table_cnt; i++) - if (!symbol_absolute(&table[i])) { + if (!symbol_absolute(table[i])) { /* * The table is sorted by address. * Take the first non-absolute symbol value. */ - relative_base = table[i].addr; + relative_base = table[i]->addr; return; } } diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index fbeb62ae3401..5887ceb6229e 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -157,11 +157,11 @@ HOSTCFLAGS_lexer.lex.o := -I $(srctree)/$(src) HOSTCFLAGS_parser.tab.o := -I $(srctree)/$(src) # conf: Used for defconfig, oldconfig and related targets -hostprogs-y += conf +hostprogs += conf conf-objs := conf.o $(common-objs) # nconf: Used for the nconfig target based on ncurses -hostprogs-y += nconf +hostprogs += nconf nconf-objs := nconf.o nconf.gui.o $(common-objs) HOSTLDLIBS_nconf = $(shell . $(obj)/nconf-cfg && echo $$libs) @@ -171,7 +171,7 @@ HOSTCFLAGS_nconf.gui.o = $(shell . $(obj)/nconf-cfg && echo $$cflags) $(obj)/nconf.o $(obj)/nconf.gui.o: $(obj)/nconf-cfg # mconf: Used for the menuconfig target based on lxdialog -hostprogs-y += mconf +hostprogs += mconf lxdialog := $(addprefix lxdialog/, \ checklist.o inputbox.o menubox.o textbox.o util.o yesno.o) mconf-objs := mconf.o $(lxdialog) $(common-objs) @@ -183,7 +183,7 @@ $(foreach f, mconf.o $(lxdialog), \ $(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/mconf-cfg # qconf: Used for the xconfig target based on Qt -hostprogs-y += qconf +hostprogs += qconf qconf-cxxobjs := qconf.o qconf-objs := images.o $(common-objs) @@ -199,7 +199,7 @@ $(obj)/%.moc: $(src)/%.h $(obj)/qconf-cfg $(call cmd,moc) # gconf: Used for the gconfig target based on GTK+ -hostprogs-y += gconf +hostprogs += gconf gconf-objs := gconf.o images.o $(common-objs) HOSTLDLIBS_gconf = $(shell . $(obj)/gconf-cfg && echo $$libs) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 11f6c72c2eee..a39d93e3c6ae 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -1312,7 +1312,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode) sym_calc_value(csym); if (mode == def_random) - has_changed = randomize_choice_values(csym); + has_changed |= randomize_choice_values(csym); else { set_all_choice_values(csym); has_changed = true; @@ -1331,9 +1331,8 @@ void conf_rewrite_mod_or_yes(enum conf_def_mode mode) for_all_symbols(i, sym) { if (sym_get_type(sym) == S_TRISTATE && - sym->def[S_DEF_USER].tri == old_val) { + sym->def[S_DEF_USER].tri == old_val) sym->def[S_DEF_USER].tri = new_val; - sym_add_change_count(1); - } } + sym_clear_all_valid(); } diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 1919c311c149..dd484e92752e 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -239,7 +239,7 @@ else fi; # final build of init/ -${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init +${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1 #link vmlinux.o info LD vmlinux.o diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index 42c5d50f2bcc..296b6a3878b2 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD := y -hostprogs-y := modpost mk_elfconfig -always := $(hostprogs-y) empty.o +hostprogs := modpost mk_elfconfig +always-y := $(hostprogs) empty.o modpost-objs := modpost.o file2alias.o sumversion.o diff --git a/scripts/selinux/genheaders/Makefile b/scripts/selinux/genheaders/Makefile index e8c533140981..70cf8d95d07c 100644 --- a/scripts/selinux/genheaders/Makefile +++ b/scripts/selinux/genheaders/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -hostprogs-y := genheaders +hostprogs := genheaders HOST_EXTRACFLAGS += \ -I$(srctree)/include/uapi -I$(srctree)/include \ -I$(srctree)/security/selinux/include -always := $(hostprogs-y) +always-y := $(hostprogs) diff --git a/scripts/selinux/mdp/Makefile b/scripts/selinux/mdp/Makefile index 8a1269a9d0ba..3026f3c2aa2b 100644 --- a/scripts/selinux/mdp/Makefile +++ b/scripts/selinux/mdp/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -hostprogs-y := mdp +hostprogs := mdp HOST_EXTRACFLAGS += \ -I$(srctree)/include/uapi -I$(srctree)/include \ -I$(srctree)/security/selinux/include -I$(objtree)/include -always := $(hostprogs-y) +always-y := $(hostprogs) clean-files := policy.* file_contexts diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4b6991e178d3..1659b59fb5d7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -698,7 +698,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, if (!strcmp(sb->s_type->name, "debugfs") || !strcmp(sb->s_type->name, "tracefs") || - !strcmp(sb->s_type->name, "binderfs") || + !strcmp(sb->s_type->name, "binder") || !strcmp(sb->s_type->name, "pstore")) sbsec->flags |= SE_SBGENFS; diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index a308ce1e6a13..f511ffccb131 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -518,19 +518,13 @@ void sidtab_sid2str_put(struct sidtab *s, struct sidtab_entry *entry, const char *str, u32 str_len) { struct sidtab_str_cache *cache, *victim = NULL; + unsigned long flags; /* do not cache invalid contexts */ if (entry->context.len) return; - /* - * Skip the put operation when in non-task context to avoid the need - * to disable interrupts while holding s->cache_lock. - */ - if (!in_task()) - return; - - spin_lock(&s->cache_lock); + spin_lock_irqsave(&s->cache_lock, flags); cache = rcu_dereference_protected(entry->cache, lockdep_is_held(&s->cache_lock)); @@ -561,7 +555,7 @@ void sidtab_sid2str_put(struct sidtab *s, struct sidtab_entry *entry, rcu_assign_pointer(entry->cache, cache); out_unlock: - spin_unlock(&s->cache_lock); + spin_unlock_irqrestore(&s->cache_lock, flags); kfree_rcu(victim, rcu_member); } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 336406bcb59e..d5443eeb8b63 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2594,7 +2594,8 @@ void snd_pcm_release_substream(struct snd_pcm_substream *substream) snd_pcm_drop(substream); if (substream->hw_opened) { - do_hw_free(substream); + if (substream->runtime->status->state != SNDRV_PCM_STATE_OPEN) + do_hw_free(substream); substream->ops->close(substream); substream->hw_opened = 0; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4770fb3f51fb..6c8cb4ce517e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2447,6 +2447,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), @@ -5701,8 +5702,11 @@ static void alc_fixup_headset_jack(struct hda_codec *codec, break; case HDA_FIXUP_ACT_INIT: switch (codec->core.vendor_id) { + case 0x10ec0215: case 0x10ec0225: + case 0x10ec0285: case 0x10ec0295: + case 0x10ec0289: case 0x10ec0299: alc_write_coef_idx(codec, 0x48, 0xd011); alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045); diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 018b1ecb5404..a48313dfa967 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -151,8 +151,34 @@ static int uac_clock_selector_set_val(struct snd_usb_audio *chip, int selector_i return ret; } +/* + * Assume the clock is valid if clock source supports only one single sample + * rate, the terminal is connected directly to it (there is no clock selector) + * and clock type is internal. This is to deal with some Denon DJ controllers + * that always reports that clock is invalid. + */ +static bool uac_clock_source_is_valid_quirk(struct snd_usb_audio *chip, + struct audioformat *fmt, + int source_id) +{ + if (fmt->protocol == UAC_VERSION_2) { + struct uac_clock_source_descriptor *cs_desc = + snd_usb_find_clock_source(chip->ctrl_intf, source_id); + + if (!cs_desc) + return false; + + return (fmt->nr_rates == 1 && + (fmt->clock & 0xff) == cs_desc->bClockID && + (cs_desc->bmAttributes & 0x3) != + UAC_CLOCK_SOURCE_TYPE_EXT); + } + + return false; +} + static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, - int protocol, + struct audioformat *fmt, int source_id) { int err; @@ -160,7 +186,7 @@ static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, struct usb_device *dev = chip->dev; u32 bmControls; - if (protocol == UAC_VERSION_3) { + if (fmt->protocol == UAC_VERSION_3) { struct uac3_clock_source_descriptor *cs_desc = snd_usb_find_clock_source_v3(chip->ctrl_intf, source_id); @@ -194,10 +220,14 @@ static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, return false; } - return data ? true : false; + if (data) + return true; + else + return uac_clock_source_is_valid_quirk(chip, fmt, source_id); } -static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, +static int __uac_clock_find_source(struct snd_usb_audio *chip, + struct audioformat *fmt, int entity_id, unsigned long *visited, bool validate) { struct uac_clock_source_descriptor *source; @@ -217,7 +247,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, source = snd_usb_find_clock_source(chip->ctrl_intf, entity_id); if (source) { entity_id = source->bClockID; - if (validate && !uac_clock_source_is_valid(chip, UAC_VERSION_2, + if (validate && !uac_clock_source_is_valid(chip, fmt, entity_id)) { usb_audio_err(chip, "clock source %d is not valid, cannot use\n", @@ -248,8 +278,9 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, } cur = ret; - ret = __uac_clock_find_source(chip, selector->baCSourceID[ret - 1], - visited, validate); + ret = __uac_clock_find_source(chip, fmt, + selector->baCSourceID[ret - 1], + visited, validate); if (!validate || ret > 0 || !chip->autoclock) return ret; @@ -260,8 +291,9 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, if (i == cur) continue; - ret = __uac_clock_find_source(chip, selector->baCSourceID[i - 1], - visited, true); + ret = __uac_clock_find_source(chip, fmt, + selector->baCSourceID[i - 1], + visited, true); if (ret < 0) continue; @@ -281,14 +313,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, /* FIXME: multipliers only act as pass-thru element for now */ multiplier = snd_usb_find_clock_multiplier(chip->ctrl_intf, entity_id); if (multiplier) - return __uac_clock_find_source(chip, multiplier->bCSourceID, - visited, validate); + return __uac_clock_find_source(chip, fmt, + multiplier->bCSourceID, + visited, validate); return -EINVAL; } -static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, - unsigned long *visited, bool validate) +static int __uac3_clock_find_source(struct snd_usb_audio *chip, + struct audioformat *fmt, int entity_id, + unsigned long *visited, bool validate) { struct uac3_clock_source_descriptor *source; struct uac3_clock_selector_descriptor *selector; @@ -307,7 +341,7 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, source = snd_usb_find_clock_source_v3(chip->ctrl_intf, entity_id); if (source) { entity_id = source->bClockID; - if (validate && !uac_clock_source_is_valid(chip, UAC_VERSION_3, + if (validate && !uac_clock_source_is_valid(chip, fmt, entity_id)) { usb_audio_err(chip, "clock source %d is not valid, cannot use\n", @@ -338,7 +372,8 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, } cur = ret; - ret = __uac3_clock_find_source(chip, selector->baCSourceID[ret - 1], + ret = __uac3_clock_find_source(chip, fmt, + selector->baCSourceID[ret - 1], visited, validate); if (!validate || ret > 0 || !chip->autoclock) return ret; @@ -350,8 +385,9 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, if (i == cur) continue; - ret = __uac3_clock_find_source(chip, selector->baCSourceID[i - 1], - visited, true); + ret = __uac3_clock_find_source(chip, fmt, + selector->baCSourceID[i - 1], + visited, true); if (ret < 0) continue; @@ -372,7 +408,8 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, multiplier = snd_usb_find_clock_multiplier_v3(chip->ctrl_intf, entity_id); if (multiplier) - return __uac3_clock_find_source(chip, multiplier->bCSourceID, + return __uac3_clock_find_source(chip, fmt, + multiplier->bCSourceID, visited, validate); return -EINVAL; @@ -389,18 +426,18 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, * * Returns the clock source UnitID (>=0) on success, or an error. */ -int snd_usb_clock_find_source(struct snd_usb_audio *chip, int protocol, - int entity_id, bool validate) +int snd_usb_clock_find_source(struct snd_usb_audio *chip, + struct audioformat *fmt, bool validate) { DECLARE_BITMAP(visited, 256); memset(visited, 0, sizeof(visited)); - switch (protocol) { + switch (fmt->protocol) { case UAC_VERSION_2: - return __uac_clock_find_source(chip, entity_id, visited, + return __uac_clock_find_source(chip, fmt, fmt->clock, visited, validate); case UAC_VERSION_3: - return __uac3_clock_find_source(chip, entity_id, visited, + return __uac3_clock_find_source(chip, fmt, fmt->clock, visited, validate); default: return -EINVAL; @@ -501,8 +538,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface, * automatic clock selection if the current clock is not * valid. */ - clock = snd_usb_clock_find_source(chip, fmt->protocol, - fmt->clock, true); + clock = snd_usb_clock_find_source(chip, fmt, true); if (clock < 0) { /* We did not find a valid clock, but that might be * because the current sample rate does not match an @@ -510,8 +546,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface, * and we will do another validation after setting the * rate. */ - clock = snd_usb_clock_find_source(chip, fmt->protocol, - fmt->clock, false); + clock = snd_usb_clock_find_source(chip, fmt, false); if (clock < 0) return clock; } @@ -577,7 +612,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface, validation: /* validate clock after rate change */ - if (!uac_clock_source_is_valid(chip, fmt->protocol, clock)) + if (!uac_clock_source_is_valid(chip, fmt, clock)) return -ENXIO; return 0; } diff --git a/sound/usb/clock.h b/sound/usb/clock.h index 076e31b79ee0..68df0fbe09d0 100644 --- a/sound/usb/clock.h +++ b/sound/usb/clock.h @@ -6,7 +6,7 @@ int snd_usb_init_sample_rate(struct snd_usb_audio *chip, int iface, struct usb_host_interface *alts, struct audioformat *fmt, int rate); -int snd_usb_clock_find_source(struct snd_usb_audio *chip, int protocol, - int entity_id, bool validate); +int snd_usb_clock_find_source(struct snd_usb_audio *chip, + struct audioformat *fmt, bool validate); #endif /* __USBAUDIO_CLOCK_H */ diff --git a/sound/usb/format.c b/sound/usb/format.c index 9260136e4c9b..9f5cb4ed3a0c 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -151,6 +151,19 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, return pcm_formats; } +static int set_fixed_rate(struct audioformat *fp, int rate, int rate_bits) +{ + kfree(fp->rate_table); + fp->rate_table = kmalloc(sizeof(int), GFP_KERNEL); + if (!fp->rate_table) + return -ENOMEM; + fp->nr_rates = 1; + fp->rate_min = rate; + fp->rate_max = rate; + fp->rates = rate_bits; + fp->rate_table[0] = rate; + return 0; +} /* * parse the format descriptor and stores the possible sample rates @@ -223,6 +236,14 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof fp->rate_min = combine_triple(&fmt[offset + 1]); fp->rate_max = combine_triple(&fmt[offset + 4]); } + + /* Jabra Evolve 65 headset */ + if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) { + /* only 48kHz for playback while keeping 16kHz for capture */ + if (fp->nr_rates != 1) + return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); + } + return 0; } @@ -299,17 +320,7 @@ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip, case USB_ID(0x0e41, 0x4248): /* Line6 Helix >= fw 2.82 */ case USB_ID(0x0e41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */ case USB_ID(0x0e41, 0x424a): /* Line6 Helix LT >= fw 2.82 */ - /* supported rates: 48Khz */ - kfree(fp->rate_table); - fp->rate_table = kmalloc(sizeof(int), GFP_KERNEL); - if (!fp->rate_table) - return -ENOMEM; - fp->nr_rates = 1; - fp->rate_min = 48000; - fp->rate_max = 48000; - fp->rates = SNDRV_PCM_RATE_48000; - fp->rate_table[0] = 48000; - return 0; + return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); } return -ENODEV; @@ -325,8 +336,7 @@ static int parse_audio_format_rates_v2v3(struct snd_usb_audio *chip, struct usb_device *dev = chip->dev; unsigned char tmp[2], *data; int nr_triplets, data_size, ret = 0, ret_l6; - int clock = snd_usb_clock_find_source(chip, fp->protocol, - fp->clock, false); + int clock = snd_usb_clock_find_source(chip, fp, false); if (clock < 0) { dev_err(&dev->dev, diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index d659fdb475e2..81b2db0edd5f 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -897,6 +897,15 @@ static int parse_term_proc_unit(struct mixer_build *state, return 0; } +static int parse_term_effect_unit(struct mixer_build *state, + struct usb_audio_term *term, + void *p1, int id) +{ + term->type = UAC3_EFFECT_UNIT << 16; /* virtual type */ + term->id = id; + return 0; +} + static int parse_term_uac2_clock_source(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) @@ -981,8 +990,7 @@ static int __check_input_term(struct mixer_build *state, int id, UAC3_PROCESSING_UNIT); case PTYPE(UAC_VERSION_2, UAC2_EFFECT_UNIT): case PTYPE(UAC_VERSION_3, UAC3_EFFECT_UNIT): - return parse_term_proc_unit(state, term, p1, id, - UAC3_EFFECT_UNIT); + return parse_term_effect_unit(state, term, p1, id); case PTYPE(UAC_VERSION_1, UAC1_EXTENSION_UNIT): case PTYPE(UAC_VERSION_2, UAC2_EXTENSION_UNIT_V2): case PTYPE(UAC_VERSION_3, UAC3_EXTENSION_UNIT): diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 3a5242e383b2..7f558f4b4520 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1440,6 +1440,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x1395, 0x740a): /* Sennheiser DECT */ case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */ + case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */ return true; } diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h new file mode 100644 index 000000000000..7862f217d85d --- /dev/null +++ b/tools/bootconfig/include/linux/memblock.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XBC_LINUX_MEMBLOCK_H +#define _XBC_LINUX_MEMBLOCK_H + +#include <stdlib.h> + +#define __pa(addr) (addr) +#define SMP_CACHE_BYTES 0 +#define memblock_alloc(size, align) malloc(size) +#define memblock_free(paddr, size) free(paddr) + +#endif diff --git a/tools/bootconfig/include/linux/printk.h b/tools/bootconfig/include/linux/printk.h index 017bcd6912a5..e978a63d3222 100644 --- a/tools/bootconfig/include/linux/printk.h +++ b/tools/bootconfig/include/linux/printk.h @@ -7,7 +7,7 @@ /* controllable printf */ extern int pr_output; #define printk(fmt, ...) \ - (pr_output ? printf(fmt, __VA_ARGS__) : 0) + (pr_output ? printf(fmt, ##__VA_ARGS__) : 0) #define pr_err printk #define pr_warn printk diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 47f488458328..e18eeb070562 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -140,7 +140,7 @@ int load_xbc_from_initrd(int fd, char **buf) return 0; if (lseek(fd, -8, SEEK_END) < 0) { - printf("Failed to lseek: %d\n", -errno); + pr_err("Failed to lseek: %d\n", -errno); return -errno; } @@ -155,7 +155,7 @@ int load_xbc_from_initrd(int fd, char **buf) return 0; if (lseek(fd, stat.st_size - 8 - size, SEEK_SET) < 0) { - printf("Failed to lseek: %d\n", -errno); + pr_err("Failed to lseek: %d\n", -errno); return -errno; } @@ -166,7 +166,7 @@ int load_xbc_from_initrd(int fd, char **buf) /* Wrong Checksum, maybe no boot config here */ rcsum = checksum((unsigned char *)*buf, size); if (csum != rcsum) { - printf("checksum error: %d != %d\n", csum, rcsum); + pr_err("checksum error: %d != %d\n", csum, rcsum); return 0; } @@ -185,13 +185,13 @@ int show_xbc(const char *path) fd = open(path, O_RDONLY); if (fd < 0) { - printf("Failed to open initrd %s: %d\n", path, fd); + pr_err("Failed to open initrd %s: %d\n", path, fd); return -errno; } ret = load_xbc_from_initrd(fd, &buf); if (ret < 0) - printf("Failed to load a boot config from initrd: %d\n", ret); + pr_err("Failed to load a boot config from initrd: %d\n", ret); else xbc_show_compact_tree(); @@ -209,7 +209,7 @@ int delete_xbc(const char *path) fd = open(path, O_RDWR); if (fd < 0) { - printf("Failed to open initrd %s: %d\n", path, fd); + pr_err("Failed to open initrd %s: %d\n", path, fd); return -errno; } @@ -222,7 +222,7 @@ int delete_xbc(const char *path) pr_output = 1; if (size < 0) { ret = size; - printf("Failed to load a boot config from initrd: %d\n", ret); + pr_err("Failed to load a boot config from initrd: %d\n", ret); } else if (size > 0) { ret = fstat(fd, &stat); if (!ret) @@ -245,7 +245,7 @@ int apply_xbc(const char *path, const char *xbc_path) ret = load_xbc_file(xbc_path, &buf); if (ret < 0) { - printf("Failed to load %s : %d\n", xbc_path, ret); + pr_err("Failed to load %s : %d\n", xbc_path, ret); return ret; } size = strlen(buf) + 1; @@ -262,7 +262,7 @@ int apply_xbc(const char *path, const char *xbc_path) /* Check the data format */ ret = xbc_init(buf); if (ret < 0) { - printf("Failed to parse %s: %d\n", xbc_path, ret); + pr_err("Failed to parse %s: %d\n", xbc_path, ret); free(data); free(buf); return ret; @@ -279,20 +279,20 @@ int apply_xbc(const char *path, const char *xbc_path) /* Remove old boot config if exists */ ret = delete_xbc(path); if (ret < 0) { - printf("Failed to delete previous boot config: %d\n", ret); + pr_err("Failed to delete previous boot config: %d\n", ret); return ret; } /* Apply new one */ fd = open(path, O_RDWR | O_APPEND); if (fd < 0) { - printf("Failed to open %s: %d\n", path, fd); + pr_err("Failed to open %s: %d\n", path, fd); return fd; } /* TODO: Ensure the @path is initramfs/initrd image */ ret = write(fd, data, size + 8); if (ret < 0) { - printf("Failed to apply a boot config: %d\n", ret); + pr_err("Failed to apply a boot config: %d\n", ret); return ret; } close(fd); @@ -334,12 +334,12 @@ int main(int argc, char **argv) } if (apply && delete) { - printf("Error: You can not specify both -a and -d at once.\n"); + pr_err("Error: You can not specify both -a and -d at once.\n"); return usage(); } if (optind >= argc) { - printf("Error: No initrd is specified.\n"); + pr_err("Error: No initrd is specified.\n"); return usage(); } diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index 87725e8723f8..1de06de328e2 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -64,6 +64,15 @@ echo "File size check" new_size=$(stat -c %s $INITRD) xpass test $new_size -eq $initrd_size +echo "No error messge while applying" +OUTFILE=`mktemp tempout-XXXX` +dd if=/dev/zero of=$INITRD bs=4096 count=1 +printf " \0\0\0 \0\0\0" >> $INITRD +$BOOTCONF -a $TEMPCONF $INITRD > $OUTFILE 2>&1 +xfail grep -i "failed" $OUTFILE +xfail grep -i "error" $OUTFILE +rm $OUTFILE + echo "Max node number check" echo -n > $TEMPCONF diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index ede040cf82ad..2898cfdf8fe1 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -226,7 +226,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) continue; - sink = term->val.drv_cfg; + sink = term->val.str; snprintf(path, PATH_MAX, "sinks/%s", sink); ret = perf_pmu__scan_file(pmu, path, "%x", &hash); diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 4ac56741ac5f..29c793ac7d10 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -131,7 +131,6 @@ int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_un TEST_ASSERT_VAL("failed to merge map: bad nr", c->nr == 5); cpu_map__snprint(c, buf, sizeof(buf)); TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7")); - perf_cpu_map__put(a); perf_cpu_map__put(b); perf_cpu_map__put(c); return 0; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a69e64236120..c8dc4450884c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -808,12 +808,12 @@ static void apply_config_terms(struct evsel *evsel, perf_evsel__reset_sample_bit(evsel, TIME); break; case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: - callgraph_buf = term->val.callgraph; + callgraph_buf = term->val.str; break; case PERF_EVSEL__CONFIG_TERM_BRANCH: - if (term->val.branch && strcmp(term->val.branch, "no")) { + if (term->val.str && strcmp(term->val.str, "no")) { perf_evsel__set_sample_bit(evsel, BRANCH_STACK); - parse_branch_str(term->val.branch, + parse_branch_str(term->val.str, &attr->branch_sample_type); } else perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); @@ -1265,6 +1265,8 @@ static void perf_evsel__free_config_terms(struct evsel *evsel) list_for_each_entry_safe(term, h, &evsel->config_terms, list) { list_del_init(&term->list); + if (term->free_str) + zfree(&term->val.str); free(term); } } diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index 1f8d2fe0b66e..e026ab67b008 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -32,22 +32,21 @@ enum evsel_term_type { struct perf_evsel_config_term { struct list_head list; enum evsel_term_type type; + bool free_str; union { u64 period; u64 freq; bool time; - char *callgraph; - char *drv_cfg; u64 stack_user; int max_stack; bool inherit; bool overwrite; - char *branch; unsigned long max_events; bool percore; bool aux_output; u32 aux_sample_size; u64 cfg_chg; + char *str; } val; bool weak; }; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index fdd5bddb3075..f67960bedebb 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -549,6 +549,7 @@ void maps__insert(struct maps *maps, struct map *map) if (maps_by_name == NULL) { __maps__free_maps_by_name(maps); + up_write(&maps->lock); return; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ed7c008b9c8b..c01ba6f8fdad 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1219,8 +1219,7 @@ static int config_attr(struct perf_event_attr *attr, static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused) { -#define ADD_CONFIG_TERM(__type, __name, __val) \ -do { \ +#define ADD_CONFIG_TERM(__type) \ struct perf_evsel_config_term *__t; \ \ __t = zalloc(sizeof(*__t)); \ @@ -1229,9 +1228,24 @@ do { \ \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ - __t->val.__name = __val; \ __t->weak = term->weak; \ - list_add_tail(&__t->list, head_terms); \ + list_add_tail(&__t->list, head_terms) + +#define ADD_CONFIG_TERM_VAL(__type, __name, __val) \ +do { \ + ADD_CONFIG_TERM(__type); \ + __t->val.__name = __val; \ +} while (0) + +#define ADD_CONFIG_TERM_STR(__type, __val) \ +do { \ + ADD_CONFIG_TERM(__type); \ + __t->val.str = strdup(__val); \ + if (!__t->val.str) { \ + zfree(&__t); \ + return -ENOMEM; \ + } \ + __t->free_str = true; \ } while (0) struct parse_events_term *term; @@ -1239,53 +1253,62 @@ do { \ list_for_each_entry(term, head_config, list) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: - ADD_CONFIG_TERM(PERIOD, period, term->val.num); + ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num); break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: - ADD_CONFIG_TERM(FREQ, freq, term->val.num); + ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num); break; case PARSE_EVENTS__TERM_TYPE_TIME: - ADD_CONFIG_TERM(TIME, time, term->val.num); + ADD_CONFIG_TERM_VAL(TIME, time, term->val.num); break; case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: - ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); + ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str); break; case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: - ADD_CONFIG_TERM(BRANCH, branch, term->val.str); + ADD_CONFIG_TERM_STR(BRANCH, term->val.str); break; case PARSE_EVENTS__TERM_TYPE_STACKSIZE: - ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); + ADD_CONFIG_TERM_VAL(STACK_USER, stack_user, + term->val.num); break; case PARSE_EVENTS__TERM_TYPE_INHERIT: - ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(INHERIT, inherit, + term->val.num ? 1 : 0); break; case PARSE_EVENTS__TERM_TYPE_NOINHERIT: - ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); + ADD_CONFIG_TERM_VAL(INHERIT, inherit, + term->val.num ? 0 : 1); break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: - ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); + ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack, + term->val.num); break; case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: - ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num); + ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events, + term->val.num); break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: - ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, + term->val.num ? 1 : 0); break; case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: - ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1); + ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, + term->val.num ? 0 : 1); break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: - ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str); + ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str); break; case PARSE_EVENTS__TERM_TYPE_PERCORE: - ADD_CONFIG_TERM(PERCORE, percore, - term->val.num ? true : false); + ADD_CONFIG_TERM_VAL(PERCORE, percore, + term->val.num ? true : false); break; case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: - ADD_CONFIG_TERM(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output, + term->val.num ? 1 : 0); break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: - ADD_CONFIG_TERM(AUX_SAMPLE_SIZE, aux_sample_size, term->val.num); + ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size, + term->val.num); break; default: break; @@ -1322,7 +1345,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, } if (bits) - ADD_CONFIG_TERM(CFG_CHG, cfg_chg, bits); + ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits); #undef ADD_CONFIG_TERM return 0; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c470c49a804f..1c817add6ca4 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -303,7 +303,8 @@ static int convert_variable_type(Dwarf_Die *vr_die, char prefix; /* TODO: check all types */ - if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "x") != 0 && + if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "ustring") && + strcmp(cast, "x") != 0 && strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) { /* Non string type is OK */ /* and respect signedness/hexadecimal cast */ diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 6ccf6f6d09df..5b7d6c16d33f 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -193,16 +193,30 @@ static void find_address_in_section(bfd *abfd, asection *section, void *data) bfd_vma pc, vma; bfd_size_type size; struct a2l_data *a2l = data; + flagword flags; if (a2l->found) return; - if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0) +#ifdef bfd_get_section_flags + flags = bfd_get_section_flags(abfd, section); +#else + flags = bfd_section_flags(section); +#endif + if ((flags & SEC_ALLOC) == 0) return; pc = a2l->addr; +#ifdef bfd_get_section_vma vma = bfd_get_section_vma(abfd, section); +#else + vma = bfd_section_vma(section); +#endif +#ifdef bfd_get_section_size size = bfd_get_section_size(section); +#else + size = bfd_section_size(section); +#endif if (pc < vma || pc >= vma + size) return; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 6dd403103800..60273f1bc7d9 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -910,6 +910,12 @@ ipv6_rt_replace_mpath() check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" log_test $? 0 "Multipath with single path via multipath attribute" + # multipath with dev-only + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1" + check_route6 "2001:db8:104::/64 dev veth1 metric 1024" + log_test $? 0 "Multipath with dev-only" + # route replace fails - invalid nexthop 1 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3" diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh index e6fd7a18c655..0266443601bc 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -63,22 +63,23 @@ test_span_gre_mac() { local tundev=$1; shift local direction=$1; shift - local prot=$1; shift local what=$1; shift - local swp3mac=$(mac_get $swp3) - local h3mac=$(mac_get $h3) + case "$direction" in + ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2) + ;; + egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1) + ;; + esac RET=0 mirror_install $swp1 $direction $tundev "matchall $tcflags" - tc filter add dev $h3 ingress pref 77 prot $prot \ - flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \ - action pass + icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac" - mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10 - tc filter del dev $h3 ingress pref 77 + icmp_capture_uninstall h3-${tundev} mirror_uninstall $swp1 $direction log_test "$direction $what: envelope MAC ($tcflags)" @@ -120,14 +121,14 @@ test_ip6gretap() test_gretap_mac() { - test_span_gre_mac gt4 ingress ip "mirror to gretap" - test_span_gre_mac gt4 egress ip "mirror to gretap" + test_span_gre_mac gt4 ingress "mirror to gretap" + test_span_gre_mac gt4 egress "mirror to gretap" } test_ip6gretap_mac() { - test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap" - test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap" + test_span_gre_mac gt6 ingress "mirror to ip6gretap" + test_span_gre_mac gt6 egress "mirror to ip6gretap" } test_all() diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index bb10e33690b2..ce6bea9675c0 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -516,9 +516,9 @@ test_tos() RET=0 tc filter add dev v1 egress pref 77 prot ip \ - flower ip_tos 0x40 action pass - vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10 - vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0 + flower ip_tos 0x14 action pass + vxlan_ping_test $h1 192.0.2.3 "-Q 0x14" v1 egress 77 10 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x18" v1 egress 77 0 tc filter del dev v1 egress pref 77 prot ip log_test "VXLAN: envelope TOS inheritance" diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index f5ab1cda8bb5..138d46b3f330 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -24,6 +24,7 @@ set -e exec 3>&1 +export LANG=C export WG_HIDE_KEYS=never netns0="wg-test-$$-0" netns1="wg-test-$$-1" @@ -297,7 +298,17 @@ ip1 -4 rule add table main suppress_prefixlength 0 n1 ping -W 1 -c 100 -f 192.168.99.7 n1 ping -W 1 -c 100 -f abab::1111 +# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route. +n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2 +n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit. +n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +ip0 -4 route add 192.168.241.1 via 10.0.0.100 +n2 wg set wg0 peer "$pub1" remove +[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]] + n0 iptables -t nat -F +n0 iptables -t filter -F +n2 iptables -t nat -F ip0 link del vethrc ip0 link del vethrs ip1 link del wg0 diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index f10aa3590adc..28d477683e8a 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -38,19 +38,17 @@ endef define file_download = $(DISTFILES_PATH)/$(1): mkdir -p $(DISTFILES_PATH) - flock -x [email protected] -c '[ -f $$@ ] && exit 0; wget -O [email protected] $(MIRROR)$(1) || wget -O [email protected] $(2)$(1) || rm -f [email protected]' - if echo "$(3) [email protected]" | sha256sum -c -; then mv [email protected] $$@; else rm -f [email protected]; exit 71; fi + flock -x [email protected] -c '[ -f $$@ ] && exit 0; wget -O [email protected] $(MIRROR)$(1) || wget -O [email protected] $(2)$(1) || rm -f [email protected]; [ -f [email protected] ] || exit 1; if echo "$(3) [email protected]" | sha256sum -c -; then mv [email protected] $$@; else rm -f [email protected]; exit 71; fi' endef $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) -$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) -$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) @@ -295,21 +293,13 @@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) $(MAKE) -C $(IPERF_PATH) $(STRIP) -s $@ -$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) - flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - touch $@ - -$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) - cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared - $(MAKE) -C $(LIBMNL_PATH) - sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc - $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) + mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< touch $@ -$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg +$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg $(STRIP) -s $@ $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) @@ -340,17 +330,17 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile touch $@ -$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip - $(STRIP) -s $(IPROUTE2_PATH)/ip/ip +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip + $(STRIP) -s $@ -$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss - $(STRIP) -s $(IPROUTE2_PATH)/misc/ss +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss + $(STRIP) -s $@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) mkdir -p $(BUILD_PATH) @@ -358,8 +348,8 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure touch $@ -$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include +$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include $(MAKE) -C $(IPTABLES_PATH) $(STRIP) -s $@ diff --git a/usr/Makefile b/usr/Makefile index b6081bb2cc72..c12e6b15ce72 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -52,7 +52,7 @@ ifeq ($(cpio-data),) cpio-data := $(obj)/initramfs_data.cpio -hostprogs-y := gen_init_cpio +hostprogs := gen_init_cpio # .initramfs_data.cpio.d is used to identify all files included # in initramfs and to detect if any files are added/removed. |