From 572ccdab50bb3ae9096d6947c2e78a7107acf2dd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 14 Apr 2018 10:51:05 -0700 Subject: scsi: target: target_core_user.[ch]: convert comments into DOC: Make documentation on target-supported userspace-I/O design be usable by kernel-doc by using "DOC:". This is used in the driver-api Documentation chapter. Signed-off-by: Randy Dunlap To: "Nicholas A. Bellinger" Cc: linux-scsi@vger.kernel.org Cc: target-devel@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Jonathan Corbet Signed-off-by: Martin K. Petersen --- include/uapi/linux/target_core_user.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 0be80f72646b..6e299349b158 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -9,21 +9,22 @@ #define TCMU_VERSION "2.0" -/* +/** + * DOC: Ring Design * Ring Design * ----------- * * The mmaped area is divided into three parts: - * 1) The mailbox (struct tcmu_mailbox, below) - * 2) The command ring - * 3) Everything beyond the command ring (data) + * 1) The mailbox (struct tcmu_mailbox, below); + * 2) The command ring; + * 3) Everything beyond the command ring (data). * * The mailbox tells userspace the offset of the command ring from the * start of the shared memory region, and how big the command ring is. * * The kernel passes SCSI commands to userspace by putting a struct * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking - * userspace via uio's interrupt mechanism. + * userspace via UIO's interrupt mechanism. * * tcmu_cmd_entry contains a header. If the header type is PAD, * userspace should skip hdr->length bytes (mod cmdr_size) to find the -- cgit From c1aea9196ef4f6b64a8ef7e62a933f7e4164aed9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 16 May 2018 17:21:31 +0200 Subject: KVM: x86: hyperv: declare KVM_CAP_HYPERV_TLBFLUSH capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need a new capability to indicate support for the newly added HvFlushVirtualAddress{List,Space}{,Ex} hypercalls. Upon seeing this capability, userspace is supposed to announce PV TLB flush features by setting the appropriate CPUID bits (if needed). Signed-off-by: Vitaly Kuznetsov Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 9 +++++++++ arch/x86/kvm/x86.c | 1 + include/uapi/linux/kvm.h | 1 + 3 files changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 758bf403a169..c563da4244da 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4603,3 +4603,12 @@ Architectures: s390 This capability indicates that kvm will implement the interfaces to handle reset, migration and nested KVM for branch prediction blocking. The stfle facility 82 should not be provided to the guest without this capability. + +8.14 KVM_CAP_HYPERV_TLBFLUSH + +Architectures: x86 + +This capability indicates that KVM supports paravirtualized Hyper-V TLB Flush +hypercalls: +HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx, +HvFlushVirtualAddressList, HvFlushVirtualAddressListEx. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b7bf9ac9b6d1..22bd20fedd6d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2871,6 +2871,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_SYNIC2: case KVM_CAP_HYPERV_VP_INDEX: case KVM_CAP_HYPERV_EVENTFD: + case KVM_CAP_HYPERV_TLBFLUSH: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b02c41e53d56..b252ceb3965c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -948,6 +948,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_BPB 152 #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 +#define KVM_CAP_HYPERV_TLBFLUSH 155 #ifdef KVM_CAP_IRQ_ROUTING -- cgit From 136200f4fd365a0a9c549893c9641f6eeff53f22 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Thu, 31 May 2018 12:11:03 -0500 Subject: rpmsg: char: Switch to SPDX license identifier Use the appropriate SPDX license identifier in the rpmsg char driver source file and drop the previous boilerplate license text. The uapi header file already had the SPDX license identifier added as part of a mass update but the license text removal was deferred for later, and this patch drops the same. Signed-off-by: Suman Anna Signed-off-by: Bjorn Andersson --- drivers/rpmsg/rpmsg_char.c | 10 +--------- include/uapi/linux/rpmsg.h | 9 --------- 2 files changed, 1 insertion(+), 18 deletions(-) (limited to 'include/uapi') diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index 1efdf9ff8679..76a4477c6364 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2016, Linaro Ltd. * Copyright (c) 2012, Michal Simek @@ -7,15 +8,6 @@ * * Based on rpmsg performance statistics driver by Michal Simek, which in turn * was based on TI & Google OMX rpmsg driver. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include #include diff --git a/include/uapi/linux/rpmsg.h b/include/uapi/linux/rpmsg.h index 225eb38705dc..e14c6dab4223 100644 --- a/include/uapi/linux/rpmsg.h +++ b/include/uapi/linux/rpmsg.h @@ -1,15 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * Copyright (c) 2016, Linaro Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef _UAPI_RPMSG_H_ -- cgit From 05e98465e08a58d7f8f577281f0c98c71c624c4b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Jun 2018 20:36:30 +0200 Subject: ncpfs: remove uapi .h files Now that ncpfs is removed from the tree, there is no need to keep the uapi header files around as no one uses them, and it is not a feature that the kernel supports anymore. Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/ncp.h | 202 ----------------------------------------- include/uapi/linux/ncp_fs.h | 147 ------------------------------ include/uapi/linux/ncp_mount.h | 72 --------------- include/uapi/linux/ncp_no.h | 20 ---- 4 files changed, 441 deletions(-) delete mode 100644 include/uapi/linux/ncp.h delete mode 100644 include/uapi/linux/ncp_fs.h delete mode 100644 include/uapi/linux/ncp_mount.h delete mode 100644 include/uapi/linux/ncp_no.h (limited to 'include/uapi') diff --git a/include/uapi/linux/ncp.h b/include/uapi/linux/ncp.h deleted file mode 100644 index ca6f3d42c88f..000000000000 --- a/include/uapi/linux/ncp.h +++ /dev/null @@ -1,202 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * ncp.h - * - * Copyright (C) 1995 by Volker Lendecke - * Modified for sparc by J.F. Chadima - * Modified for __constant_ntoh by Frank A. Vorstenbosch - * - */ - -#ifndef _LINUX_NCP_H -#define _LINUX_NCP_H - -#include - -#define NCP_PTYPE (0x11) -#define NCP_PORT (0x0451) - -#define NCP_ALLOC_SLOT_REQUEST (0x1111) -#define NCP_REQUEST (0x2222) -#define NCP_DEALLOC_SLOT_REQUEST (0x5555) - -struct ncp_request_header { - __u16 type; - __u8 sequence; - __u8 conn_low; - __u8 task; - __u8 conn_high; - __u8 function; - __u8 data[0]; -} __attribute__((packed)); - -#define NCP_REPLY (0x3333) -#define NCP_WATCHDOG (0x3E3E) -#define NCP_POSITIVE_ACK (0x9999) - -struct ncp_reply_header { - __u16 type; - __u8 sequence; - __u8 conn_low; - __u8 task; - __u8 conn_high; - __u8 completion_code; - __u8 connection_state; - __u8 data[0]; -} __attribute__((packed)); - -#define NCP_VOLNAME_LEN (16) -#define NCP_NUMBER_OF_VOLUMES (256) -struct ncp_volume_info { - __u32 total_blocks; - __u32 free_blocks; - __u32 purgeable_blocks; - __u32 not_yet_purgeable_blocks; - __u32 total_dir_entries; - __u32 available_dir_entries; - __u8 sectors_per_block; - char volume_name[NCP_VOLNAME_LEN + 1]; -}; - -#define AR_READ (cpu_to_le16(1)) -#define AR_WRITE (cpu_to_le16(2)) -#define AR_EXCLUSIVE (cpu_to_le16(0x20)) - -#define NCP_FILE_ID_LEN 6 - -/* Defines for Name Spaces */ -#define NW_NS_DOS 0 -#define NW_NS_MAC 1 -#define NW_NS_NFS 2 -#define NW_NS_FTAM 3 -#define NW_NS_OS2 4 - -/* Defines for ReturnInformationMask */ -#define RIM_NAME (cpu_to_le32(1)) -#define RIM_SPACE_ALLOCATED (cpu_to_le32(2)) -#define RIM_ATTRIBUTES (cpu_to_le32(4)) -#define RIM_DATA_SIZE (cpu_to_le32(8)) -#define RIM_TOTAL_SIZE (cpu_to_le32(0x10)) -#define RIM_EXT_ATTR_INFO (cpu_to_le32(0x20)) -#define RIM_ARCHIVE (cpu_to_le32(0x40)) -#define RIM_MODIFY (cpu_to_le32(0x80)) -#define RIM_CREATION (cpu_to_le32(0x100)) -#define RIM_OWNING_NAMESPACE (cpu_to_le32(0x200)) -#define RIM_DIRECTORY (cpu_to_le32(0x400)) -#define RIM_RIGHTS (cpu_to_le32(0x800)) -#define RIM_ALL (cpu_to_le32(0xFFF)) -#define RIM_COMPRESSED_INFO (cpu_to_le32(0x80000000)) - -/* Defines for NSInfoBitMask */ -#define NSIBM_NFS_NAME 0x0001 -#define NSIBM_NFS_MODE 0x0002 -#define NSIBM_NFS_GID 0x0004 -#define NSIBM_NFS_NLINKS 0x0008 -#define NSIBM_NFS_RDEV 0x0010 -#define NSIBM_NFS_LINK 0x0020 -#define NSIBM_NFS_CREATED 0x0040 -#define NSIBM_NFS_UID 0x0080 -#define NSIBM_NFS_ACSFLAG 0x0100 -#define NSIBM_NFS_MYFLAG 0x0200 - -/* open/create modes */ -#define OC_MODE_OPEN 0x01 -#define OC_MODE_TRUNCATE 0x02 -#define OC_MODE_REPLACE 0x02 -#define OC_MODE_CREATE 0x08 - -/* open/create results */ -#define OC_ACTION_NONE 0x00 -#define OC_ACTION_OPEN 0x01 -#define OC_ACTION_CREATE 0x02 -#define OC_ACTION_TRUNCATE 0x04 -#define OC_ACTION_REPLACE 0x04 - -/* access rights attributes */ -#ifndef AR_READ_ONLY -#define AR_READ_ONLY 0x0001 -#define AR_WRITE_ONLY 0x0002 -#define AR_DENY_READ 0x0004 -#define AR_DENY_WRITE 0x0008 -#define AR_COMPATIBILITY 0x0010 -#define AR_WRITE_THROUGH 0x0040 -#define AR_OPEN_COMPRESSED 0x0100 -#endif - -struct nw_nfs_info { - __u32 mode; - __u32 rdev; -}; - -struct nw_info_struct { - __u32 spaceAlloc; - __le32 attributes; - __u16 flags; - __le32 dataStreamSize; - __le32 totalStreamSize; - __u16 numberOfStreams; - __le16 creationTime; - __le16 creationDate; - __u32 creatorID; - __le16 modifyTime; - __le16 modifyDate; - __u32 modifierID; - __le16 lastAccessDate; - __u16 archiveTime; - __u16 archiveDate; - __u32 archiverID; - __u16 inheritedRightsMask; - __le32 dirEntNum; - __le32 DosDirNum; - __u32 volNumber; - __u32 EADataSize; - __u32 EAKeyCount; - __u32 EAKeySize; - __u32 NSCreator; - __u8 nameLen; - __u8 entryName[256]; - /* libncp may depend on there being nothing after entryName */ -#ifdef __KERNEL__ - struct nw_nfs_info nfs; -#endif -} __attribute__((packed)); - -/* modify mask - use with MODIFY_DOS_INFO structure */ -#define DM_ATTRIBUTES (cpu_to_le32(0x02)) -#define DM_CREATE_DATE (cpu_to_le32(0x04)) -#define DM_CREATE_TIME (cpu_to_le32(0x08)) -#define DM_CREATOR_ID (cpu_to_le32(0x10)) -#define DM_ARCHIVE_DATE (cpu_to_le32(0x20)) -#define DM_ARCHIVE_TIME (cpu_to_le32(0x40)) -#define DM_ARCHIVER_ID (cpu_to_le32(0x80)) -#define DM_MODIFY_DATE (cpu_to_le32(0x0100)) -#define DM_MODIFY_TIME (cpu_to_le32(0x0200)) -#define DM_MODIFIER_ID (cpu_to_le32(0x0400)) -#define DM_LAST_ACCESS_DATE (cpu_to_le32(0x0800)) -#define DM_INHERITED_RIGHTS_MASK (cpu_to_le32(0x1000)) -#define DM_MAXIMUM_SPACE (cpu_to_le32(0x2000)) - -struct nw_modify_dos_info { - __le32 attributes; - __le16 creationDate; - __le16 creationTime; - __u32 creatorID; - __le16 modifyDate; - __le16 modifyTime; - __u32 modifierID; - __u16 archiveDate; - __u16 archiveTime; - __u32 archiverID; - __le16 lastAccessDate; - __u16 inheritanceGrantMask; - __u16 inheritanceRevokeMask; - __u32 maximumSpace; -} __attribute__((packed)); - -struct nw_search_sequence { - __u8 volNumber; - __u32 dirBase; - __u32 sequence; -} __attribute__((packed)); - -#endif /* _LINUX_NCP_H */ diff --git a/include/uapi/linux/ncp_fs.h b/include/uapi/linux/ncp_fs.h deleted file mode 100644 index e76a44229d2f..000000000000 --- a/include/uapi/linux/ncp_fs.h +++ /dev/null @@ -1,147 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * ncp_fs.h - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * - */ - -#ifndef _LINUX_NCP_FS_H -#define _LINUX_NCP_FS_H - -#include -#include -#include -#include - -#include -#include - -/* - * ioctl commands - */ - -struct ncp_ioctl_request { - unsigned int function; - unsigned int size; - char __user *data; -}; - -struct ncp_fs_info { - int version; - struct sockaddr_ipx addr; - __kernel_uid_t mounted_uid; - int connection; /* Connection number the server assigned us */ - int buffer_size; /* The negotiated buffer size, to be - used for read/write requests! */ - - int volume_number; - __le32 directory_id; -}; - -struct ncp_fs_info_v2 { - int version; - unsigned long mounted_uid; - unsigned int connection; - unsigned int buffer_size; - - unsigned int volume_number; - __le32 directory_id; - - __u32 dummy1; - __u32 dummy2; - __u32 dummy3; -}; - -struct ncp_sign_init -{ - char sign_root[8]; - char sign_last[16]; -}; - -struct ncp_lock_ioctl -{ -#define NCP_LOCK_LOG 0 -#define NCP_LOCK_SH 1 -#define NCP_LOCK_EX 2 -#define NCP_LOCK_CLEAR 256 - int cmd; - int origin; - unsigned int offset; - unsigned int length; -#define NCP_LOCK_DEFAULT_TIMEOUT 18 -#define NCP_LOCK_MAX_TIMEOUT 180 - int timeout; -}; - -struct ncp_setroot_ioctl -{ - int volNumber; - int namespace; - __le32 dirEntNum; -}; - -struct ncp_objectname_ioctl -{ -#define NCP_AUTH_NONE 0x00 -#define NCP_AUTH_BIND 0x31 -#define NCP_AUTH_NDS 0x32 - int auth_type; - size_t object_name_len; - void __user * object_name; /* a userspace data, in most cases user name */ -}; - -struct ncp_privatedata_ioctl -{ - size_t len; - void __user * data; /* ~1000 for NDS */ -}; - -/* NLS charsets by ioctl */ -#define NCP_IOCSNAME_LEN 20 -struct ncp_nls_ioctl -{ - unsigned char codepage[NCP_IOCSNAME_LEN+1]; - unsigned char iocharset[NCP_IOCSNAME_LEN+1]; -}; - -#define NCP_IOC_NCPREQUEST _IOR('n', 1, struct ncp_ioctl_request) -#define NCP_IOC_GETMOUNTUID _IOW('n', 2, __kernel_old_uid_t) -#define NCP_IOC_GETMOUNTUID2 _IOW('n', 2, unsigned long) - -#define NCP_IOC_CONN_LOGGED_IN _IO('n', 3) - -#define NCP_GET_FS_INFO_VERSION (1) -#define NCP_IOC_GET_FS_INFO _IOWR('n', 4, struct ncp_fs_info) -#define NCP_GET_FS_INFO_VERSION_V2 (2) -#define NCP_IOC_GET_FS_INFO_V2 _IOWR('n', 4, struct ncp_fs_info_v2) - -#define NCP_IOC_SIGN_INIT _IOR('n', 5, struct ncp_sign_init) -#define NCP_IOC_SIGN_WANTED _IOR('n', 6, int) -#define NCP_IOC_SET_SIGN_WANTED _IOW('n', 6, int) - -#define NCP_IOC_LOCKUNLOCK _IOR('n', 7, struct ncp_lock_ioctl) - -#define NCP_IOC_GETROOT _IOW('n', 8, struct ncp_setroot_ioctl) -#define NCP_IOC_SETROOT _IOR('n', 8, struct ncp_setroot_ioctl) - -#define NCP_IOC_GETOBJECTNAME _IOWR('n', 9, struct ncp_objectname_ioctl) -#define NCP_IOC_SETOBJECTNAME _IOR('n', 9, struct ncp_objectname_ioctl) -#define NCP_IOC_GETPRIVATEDATA _IOWR('n', 10, struct ncp_privatedata_ioctl) -#define NCP_IOC_SETPRIVATEDATA _IOR('n', 10, struct ncp_privatedata_ioctl) - -#define NCP_IOC_GETCHARSETS _IOWR('n', 11, struct ncp_nls_ioctl) -#define NCP_IOC_SETCHARSETS _IOR('n', 11, struct ncp_nls_ioctl) - -#define NCP_IOC_GETDENTRYTTL _IOW('n', 12, __u32) -#define NCP_IOC_SETDENTRYTTL _IOR('n', 12, __u32) - -/* - * The packet size to allocate. One page should be enough. - */ -#define NCP_PACKET_SIZE 4070 - -#define NCP_MAXPATHLEN 255 -#define NCP_MAXNAMELEN 14 - -#endif /* _LINUX_NCP_FS_H */ diff --git a/include/uapi/linux/ncp_mount.h b/include/uapi/linux/ncp_mount.h deleted file mode 100644 index 9bdbcd68c329..000000000000 --- a/include/uapi/linux/ncp_mount.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * ncp_mount.h - * - * Copyright (C) 1995, 1996 by Volker Lendecke - * - */ - -#ifndef _LINUX_NCP_MOUNT_H -#define _LINUX_NCP_MOUNT_H - -#include -#include - -#define NCP_MOUNT_VERSION 3 /* Binary */ - -/* Values for flags */ -#define NCP_MOUNT_SOFT 0x0001 -#define NCP_MOUNT_INTR 0x0002 -#define NCP_MOUNT_STRONG 0x0004 /* enable delete/rename of r/o files */ -#define NCP_MOUNT_NO_OS2 0x0008 /* do not use OS/2 (LONG) namespace */ -#define NCP_MOUNT_NO_NFS 0x0010 /* do not use NFS namespace */ -#define NCP_MOUNT_EXTRAS 0x0020 -#define NCP_MOUNT_SYMLINKS 0x0040 /* enable symlinks */ -#define NCP_MOUNT_NFS_EXTRAS 0x0080 /* Enable use of NFS NS meta-info */ - -struct ncp_mount_data { - int version; - unsigned int ncp_fd; /* The socket to the ncp port */ - __kernel_uid_t mounted_uid; /* Who may umount() this filesystem? */ - __kernel_pid_t wdog_pid; /* Who cares for our watchdog packets? */ - - unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; - unsigned int time_out; /* How long should I wait after - sending a NCP request? */ - unsigned int retry_count; /* And how often should I retry? */ - unsigned int flags; - - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_mode_t file_mode; - __kernel_mode_t dir_mode; -}; - -#define NCP_MOUNT_VERSION_V4 (4) /* Binary or text */ - -struct ncp_mount_data_v4 { - int version; - unsigned long flags; /* NCP_MOUNT_* flags */ - /* MIPS uses long __kernel_uid_t, but... */ - /* we neever pass -1, so it is safe */ - unsigned long mounted_uid; /* Who may umount() this filesystem? */ - /* MIPS uses long __kernel_pid_t */ - long wdog_pid; /* Who cares for our watchdog packets? */ - - unsigned int ncp_fd; /* The socket to the ncp port */ - unsigned int time_out; /* How long should I wait after - sending a NCP request? */ - unsigned int retry_count; /* And how often should I retry? */ - - /* MIPS uses long __kernel_uid_t... */ - /* we never pass -1, so it is safe */ - unsigned long uid; - unsigned long gid; - /* MIPS uses unsigned long __kernel_mode_t */ - unsigned long file_mode; - unsigned long dir_mode; -}; - -#define NCP_MOUNT_VERSION_V5 (5) /* Text only */ - -#endif diff --git a/include/uapi/linux/ncp_no.h b/include/uapi/linux/ncp_no.h deleted file mode 100644 index 654d7c7f5d92..000000000000 --- a/include/uapi/linux/ncp_no.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _NCP_NO -#define _NCP_NO - -/* these define the attribute byte as seen by NCP */ -#define aRONLY (__cpu_to_le32(1)) -#define aHIDDEN (__cpu_to_le32(2)) -#define aSYSTEM (__cpu_to_le32(4)) -#define aEXECUTE (__cpu_to_le32(8)) -#define aDIR (__cpu_to_le32(0x10)) -#define aARCH (__cpu_to_le32(0x20)) -#define aSHARED (__cpu_to_le32(0x80)) -#define aDONTSUBALLOCATE (__cpu_to_le32(1L<<11)) -#define aTRANSACTIONAL (__cpu_to_le32(1L<<12)) -#define aPURGE (__cpu_to_le32(1L<<16)) -#define aRENAMEINHIBIT (__cpu_to_le32(1L<<17)) -#define aDELETEINHIBIT (__cpu_to_le32(1L<<18)) -#define aDONTCOMPRESS (__cpu_to_le32(1L<<27)) - -#endif /* _NCP_NO */ -- cgit From b575e837215325544b0dbcef912f13f369de4f3f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 2 Jun 2018 08:43:53 -0400 Subject: uapi/headers: Provide types_32_64.h Provide helper macros for fields which represent pointers in kernel-userspace ABI. This facilitates handling of 32-bit user-space by 64-bit kernels by defining those fields as 32-bit 0-padding and 32-bit integer on 32-bit architectures, which allows the kernel to treat those as 64-bit integers. The order of padding and 32-bit integer depends on the endianness. Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: Joel Fernandes Cc: Peter Zijlstra Cc: Catalin Marinas Cc: Dave Watson Cc: Will Deacon Cc: Andi Kleen Cc: "H . Peter Anvin" Cc: Chris Lameter Cc: Russell King Cc: Andrew Hunter Cc: Michael Kerrisk Cc: "Paul E . McKenney" Cc: Paul Turner Cc: Boqun Feng Cc: Josh Triplett Cc: Steven Rostedt Cc: Ben Maurer Cc: linux-api@vger.kernel.org Cc: Andy Lutomirski Cc: Andrew Morton Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20180602124408.8430-2-mathieu.desnoyers@efficios.com --- include/uapi/linux/types_32_64.h | 50 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/uapi/linux/types_32_64.h (limited to 'include/uapi') diff --git a/include/uapi/linux/types_32_64.h b/include/uapi/linux/types_32_64.h new file mode 100644 index 000000000000..0a87ace34a57 --- /dev/null +++ b/include/uapi/linux/types_32_64.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_TYPES_32_64_H +#define _UAPI_LINUX_TYPES_32_64_H + +/* + * linux/types_32_64.h + * + * Integer type declaration for pointers across 32-bit and 64-bit systems. + * + * Copyright (c) 2015-2018 Mathieu Desnoyers + */ + +#ifdef __KERNEL__ +# include +#else +# include +#endif + +#include + +#ifdef __BYTE_ORDER +# if (__BYTE_ORDER == __BIG_ENDIAN) +# define LINUX_BYTE_ORDER_BIG_ENDIAN +# else +# define LINUX_BYTE_ORDER_LITTLE_ENDIAN +# endif +#else +# ifdef __BIG_ENDIAN +# define LINUX_BYTE_ORDER_BIG_ENDIAN +# else +# define LINUX_BYTE_ORDER_LITTLE_ENDIAN +# endif +#endif + +#ifdef __LP64__ +# define LINUX_FIELD_u32_u64(field) __u64 field +# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) field = (intptr_t)v +#else +# ifdef LINUX_BYTE_ORDER_BIG_ENDIAN +# define LINUX_FIELD_u32_u64(field) __u32 field ## _padding, field +# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \ + field ## _padding = 0, field = (intptr_t)v +# else +# define LINUX_FIELD_u32_u64(field) __u32 field, field ## _padding +# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \ + field = (intptr_t)v, field ## _padding = 0 +# endif +#endif + +#endif /* _UAPI_LINUX_TYPES_32_64_H */ -- cgit From d7822b1e24f2df5df98c76f0e94a5416349ff759 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 2 Jun 2018 08:43:54 -0400 Subject: rseq: Introduce restartable sequences system call Expose a new system call allowing each thread to register one userspace memory area to be used as an ABI between kernel and user-space for two purposes: user-space restartable sequences and quick access to read the current CPU number value from user-space. * Restartable sequences (per-cpu atomics) Restartables sequences allow user-space to perform update operations on per-cpu data without requiring heavy-weight atomic operations. The restartable critical sections (percpu atomics) work has been started by Paul Turner and Andrew Hunter. It lets the kernel handle restart of critical sections. [1] [2] The re-implementation proposed here brings a few simplifications to the ABI which facilitates porting to other architectures and speeds up the user-space fast path. Here are benchmarks of various rseq use-cases. Test hardware: arm32: ARMv7 Processor rev 4 (v7l) "Cubietruck", 2-core x86-64: Intel E5-2630 v3@2.40GHz, 16-core, hyperthreading The following benchmarks were all performed on a single thread. * Per-CPU statistic counter increment getcpu+atomic (ns/op) rseq (ns/op) speedup arm32: 344.0 31.4 11.0 x86-64: 15.3 2.0 7.7 * LTTng-UST: write event 32-bit header, 32-bit payload into tracer per-cpu buffer getcpu+atomic (ns/op) rseq (ns/op) speedup arm32: 2502.0 2250.0 1.1 x86-64: 117.4 98.0 1.2 * liburcu percpu: lock-unlock pair, dereference, read/compare word getcpu+atomic (ns/op) rseq (ns/op) speedup arm32: 751.0 128.5 5.8 x86-64: 53.4 28.6 1.9 * jemalloc memory allocator adapted to use rseq Using rseq with per-cpu memory pools in jemalloc at Facebook (based on rseq 2016 implementation): The production workload response-time has 1-2% gain avg. latency, and the P99 overall latency drops by 2-3%. * Reading the current CPU number Speeding up reading the current CPU number on which the caller thread is running is done by keeping the current CPU number up do date within the cpu_id field of the memory area registered by the thread. This is done by making scheduler preemption set the TIF_NOTIFY_RESUME flag on the current thread. Upon return to user-space, a notify-resume handler updates the current CPU value within the registered user-space memory area. User-space can then read the current CPU number directly from memory. Keeping the current cpu id in a memory area shared between kernel and user-space is an improvement over current mechanisms available to read the current CPU number, which has the following benefits over alternative approaches: - 35x speedup on ARM vs system call through glibc - 20x speedup on x86 compared to calling glibc, which calls vdso executing a "lsl" instruction, - 14x speedup on x86 compared to inlined "lsl" instruction, - Unlike vdso approaches, this cpu_id value can be read from an inline assembly, which makes it a useful building block for restartable sequences. - The approach of reading the cpu id through memory mapping shared between kernel and user-space is portable (e.g. ARM), which is not the case for the lsl-based x86 vdso. On x86, yet another possible approach would be to use the gs segment selector to point to user-space per-cpu data. This approach performs similarly to the cpu id cache, but it has two disadvantages: it is not portable, and it is incompatible with existing applications already using the gs segment selector for other purposes. Benchmarking various approaches for reading the current CPU number: ARMv7 Processor rev 4 (v7l) Machine model: Cubietruck - Baseline (empty loop): 8.4 ns - Read CPU from rseq cpu_id: 16.7 ns - Read CPU from rseq cpu_id (lazy register): 19.8 ns - glibc 2.19-0ubuntu6.6 getcpu: 301.8 ns - getcpu system call: 234.9 ns x86-64 Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz: - Baseline (empty loop): 0.8 ns - Read CPU from rseq cpu_id: 0.8 ns - Read CPU from rseq cpu_id (lazy register): 0.8 ns - Read using gs segment selector: 0.8 ns - "lsl" inline assembly: 13.0 ns - glibc 2.19-0ubuntu6 getcpu: 16.6 ns - getcpu system call: 53.9 ns - Speed (benchmark taken on v8 of patchset) Running 10 runs of hackbench -l 100000 seems to indicate, contrary to expectations, that enabling CONFIG_RSEQ slightly accelerates the scheduler: Configuration: 2 sockets * 8-core Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz (directly on hardware, hyperthreading disabled in BIOS, energy saving disabled in BIOS, turboboost disabled in BIOS, cpuidle.off=1 kernel parameter), with a Linux v4.6 defconfig+localyesconfig, restartable sequences series applied. * CONFIG_RSEQ=n avg.: 41.37 s std.dev.: 0.36 s * CONFIG_RSEQ=y avg.: 40.46 s std.dev.: 0.33 s - Size On x86-64, between CONFIG_RSEQ=n/y, the text size increase of vmlinux is 567 bytes, and the data size increase of vmlinux is 5696 bytes. [1] https://lwn.net/Articles/650333/ [2] http://www.linuxplumbersconf.org/2013/ocw/system/presentations/1695/original/LPC%20-%20PerCpu%20Atomics.pdf Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Joel Fernandes Cc: Catalin Marinas Cc: Dave Watson Cc: Will Deacon Cc: Andi Kleen Cc: "H . Peter Anvin" Cc: Chris Lameter Cc: Russell King Cc: Andrew Hunter Cc: Michael Kerrisk Cc: "Paul E . McKenney" Cc: Paul Turner Cc: Boqun Feng Cc: Josh Triplett Cc: Steven Rostedt Cc: Ben Maurer Cc: Alexander Viro Cc: linux-api@vger.kernel.org Cc: Andy Lutomirski Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20151027235635.16059.11630.stgit@pjt-glaptop.roam.corp.google.com Link: http://lkml.kernel.org/r/20150624222609.6116.86035.stgit@kitami.mtv.corp.google.com Link: https://lkml.kernel.org/r/20180602124408.8430-3-mathieu.desnoyers@efficios.com --- MAINTAINERS | 11 ++ arch/Kconfig | 7 + fs/exec.c | 1 + include/linux/sched.h | 134 +++++++++++++++++ include/linux/syscalls.h | 4 +- include/trace/events/rseq.h | 57 +++++++ include/uapi/linux/rseq.h | 133 +++++++++++++++++ init/Kconfig | 23 +++ kernel/Makefile | 1 + kernel/fork.c | 2 + kernel/rseq.c | 357 ++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/core.c | 2 + kernel/sys_ni.c | 3 + 13 files changed, 734 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/rseq.h create mode 100644 include/uapi/linux/rseq.h create mode 100644 kernel/rseq.c (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index aa635837a6af..a384243d911b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11976,6 +11976,17 @@ F: include/dt-bindings/reset/ F: include/linux/reset.h F: include/linux/reset-controller.h +RESTARTABLE SEQUENCES SUPPORT +M: Mathieu Desnoyers +M: Peter Zijlstra +M: "Paul E. McKenney" +M: Boqun Feng +L: linux-kernel@vger.kernel.org +S: Supported +F: kernel/rseq.c +F: include/uapi/linux/rseq.h +F: include/trace/events/rseq.h + RFKILL M: Johannes Berg L: linux-wireless@vger.kernel.org diff --git a/arch/Kconfig b/arch/Kconfig index b695a3e3e922..095ba99968c1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -272,6 +272,13 @@ config HAVE_REGS_AND_STACK_ACCESS_API declared in asm/ptrace.h For example the kprobes-based event tracer needs this API. +config HAVE_RSEQ + bool + depends on HAVE_REGS_AND_STACK_ACCESS_API + help + This symbol should be selected by an architecture if it + supports an implementation of restartable sequences. + config HAVE_CLK bool help diff --git a/fs/exec.c b/fs/exec.c index 183059c427b9..2c3911612b22 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1822,6 +1822,7 @@ static int do_execveat_common(int fd, struct filename *filename, current->fs->in_exec = 0; current->in_execve = 0; membarrier_execve(current); + rseq_execve(current); acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); diff --git a/include/linux/sched.h b/include/linux/sched.h index 14e4f9c12337..3aa4fcb74e76 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -27,6 +27,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -1047,6 +1048,17 @@ struct task_struct { unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_RSEQ + struct rseq __user *rseq; + u32 rseq_len; + u32 rseq_sig; + /* + * RmW on rseq_event_mask must be performed atomically + * with respect to preemption. + */ + unsigned long rseq_event_mask; +#endif + struct tlbflush_unmap_batch tlb_ubc; struct rcu_head rcu; @@ -1757,4 +1769,126 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +#ifdef CONFIG_RSEQ + +/* + * Map the event mask on the user-space ABI enum rseq_cs_flags + * for direct mask checks. + */ +enum rseq_event_mask_bits { + RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, + RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, + RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, +}; + +enum rseq_event_mask { + RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), + RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), + RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), +}; + +static inline void rseq_set_notify_resume(struct task_struct *t) +{ + if (t->rseq) + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); +} + +void __rseq_handle_notify_resume(struct pt_regs *regs); + +static inline void rseq_handle_notify_resume(struct pt_regs *regs) +{ + if (current->rseq) + __rseq_handle_notify_resume(regs); +} + +static inline void rseq_signal_deliver(struct pt_regs *regs) +{ + preempt_disable(); + __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); + preempt_enable(); + rseq_handle_notify_resume(regs); +} + +/* rseq_preempt() requires preemption to be disabled. */ +static inline void rseq_preempt(struct task_struct *t) +{ + __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); + rseq_set_notify_resume(t); +} + +/* rseq_migrate() requires preemption to be disabled. */ +static inline void rseq_migrate(struct task_struct *t) +{ + __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); + rseq_set_notify_resume(t); +} + +/* + * If parent process has a registered restartable sequences area, the + * child inherits. Only applies when forking a process, not a thread. In + * case a parent fork() in the middle of a restartable sequence, set the + * resume notifier to force the child to retry. + */ +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +{ + if (clone_flags & CLONE_THREAD) { + t->rseq = NULL; + t->rseq_len = 0; + t->rseq_sig = 0; + t->rseq_event_mask = 0; + } else { + t->rseq = current->rseq; + t->rseq_len = current->rseq_len; + t->rseq_sig = current->rseq_sig; + t->rseq_event_mask = current->rseq_event_mask; + rseq_preempt(t); + } +} + +static inline void rseq_execve(struct task_struct *t) +{ + t->rseq = NULL; + t->rseq_len = 0; + t->rseq_sig = 0; + t->rseq_event_mask = 0; +} + +#else + +static inline void rseq_set_notify_resume(struct task_struct *t) +{ +} +static inline void rseq_handle_notify_resume(struct pt_regs *regs) +{ +} +static inline void rseq_signal_deliver(struct pt_regs *regs) +{ +} +static inline void rseq_preempt(struct task_struct *t) +{ +} +static inline void rseq_migrate(struct task_struct *t) +{ +} +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +{ +} +static inline void rseq_execve(struct task_struct *t) +{ +} + +#endif + +#ifdef CONFIG_DEBUG_RSEQ + +void rseq_syscall(struct pt_regs *regs); + +#else + +static inline void rseq_syscall(struct pt_regs *regs) +{ +} + +#endif + #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 390e814fdc8d..73810808cdf2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -66,6 +66,7 @@ struct old_linux_dirent; struct perf_event_attr; struct file_handle; struct sigaltstack; +struct rseq; union bpf_attr; #include @@ -897,7 +898,8 @@ asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, unsigned mask, struct statx __user *buffer); - +asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, + int flags, uint32_t sig); /* * Architecture-specific system calls diff --git a/include/trace/events/rseq.h b/include/trace/events/rseq.h new file mode 100644 index 000000000000..a04a64bc1a00 --- /dev/null +++ b/include/trace/events/rseq.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rseq + +#if !defined(_TRACE_RSEQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RSEQ_H + +#include +#include + +TRACE_EVENT(rseq_update, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __field(s32, cpu_id) + ), + + TP_fast_assign( + __entry->cpu_id = raw_smp_processor_id(); + ), + + TP_printk("cpu_id=%d", __entry->cpu_id) +); + +TRACE_EVENT(rseq_ip_fixup, + + TP_PROTO(unsigned long regs_ip, unsigned long start_ip, + unsigned long post_commit_offset, unsigned long abort_ip), + + TP_ARGS(regs_ip, start_ip, post_commit_offset, abort_ip), + + TP_STRUCT__entry( + __field(unsigned long, regs_ip) + __field(unsigned long, start_ip) + __field(unsigned long, post_commit_offset) + __field(unsigned long, abort_ip) + ), + + TP_fast_assign( + __entry->regs_ip = regs_ip; + __entry->start_ip = start_ip; + __entry->post_commit_offset = post_commit_offset; + __entry->abort_ip = abort_ip; + ), + + TP_printk("regs_ip=0x%lx start_ip=0x%lx post_commit_offset=%lu abort_ip=0x%lx", + __entry->regs_ip, __entry->start_ip, + __entry->post_commit_offset, __entry->abort_ip) +); + +#endif /* _TRACE_SOCK_H */ + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h new file mode 100644 index 000000000000..d620fa43756c --- /dev/null +++ b/include/uapi/linux/rseq.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_RSEQ_H +#define _UAPI_LINUX_RSEQ_H + +/* + * linux/rseq.h + * + * Restartable sequences system call API + * + * Copyright (c) 2015-2018 Mathieu Desnoyers + */ + +#ifdef __KERNEL__ +# include +#else +# include +#endif + +#include + +enum rseq_cpu_id_state { + RSEQ_CPU_ID_UNINITIALIZED = -1, + RSEQ_CPU_ID_REGISTRATION_FAILED = -2, +}; + +enum rseq_flags { + RSEQ_FLAG_UNREGISTER = (1 << 0), +}; + +enum rseq_cs_flags_bit { + RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, +}; + +enum rseq_cs_flags { + RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = + (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = + (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = + (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), +}; + +/* + * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. It is usually declared as + * link-time constant data. + */ +struct rseq_cs { + /* Version of this structure. */ + __u32 version; + /* enum rseq_cs_flags */ + __u32 flags; + LINUX_FIELD_u32_u64(start_ip); + /* Offset from start_ip. */ + LINUX_FIELD_u32_u64(post_commit_offset); + LINUX_FIELD_u32_u64(abort_ip); +} __attribute__((aligned(4 * sizeof(__u64)))); + +/* + * struct rseq is aligned on 4 * 8 bytes to ensure it is always + * contained within a single cache-line. + * + * A single struct rseq per thread is allowed. + */ +struct rseq { + /* + * Restartable sequences cpu_id_start field. Updated by the + * kernel, and read by user-space with single-copy atomicity + * semantics. Aligned on 32-bit. Always contains a value in the + * range of possible CPUs, although the value may not be the + * actual current CPU (e.g. if rseq is not initialized). This + * CPU number value should always be compared against the value + * of the cpu_id field before performing a rseq commit or + * returning a value read from a data structure indexed using + * the cpu_id_start value. + */ + __u32 cpu_id_start; + /* + * Restartable sequences cpu_id field. Updated by the kernel, + * and read by user-space with single-copy atomicity semantics. + * Aligned on 32-bit. Values RSEQ_CPU_ID_UNINITIALIZED and + * RSEQ_CPU_ID_REGISTRATION_FAILED have a special semantic: the + * former means "rseq uninitialized", and latter means "rseq + * initialization failed". This value is meant to be read within + * rseq critical sections and compared with the cpu_id_start + * value previously read, before performing the commit instruction, + * or read and compared with the cpu_id_start value before returning + * a value loaded from a data structure indexed using the + * cpu_id_start value. + */ + __u32 cpu_id; + /* + * Restartable sequences rseq_cs field. + * + * Contains NULL when no critical section is active for the current + * thread, or holds a pointer to the currently active struct rseq_cs. + * + * Updated by user-space, which sets the address of the currently + * active rseq_cs at the beginning of assembly instruction sequence + * block, and set to NULL by the kernel when it restarts an assembly + * instruction sequence block, as well as when the kernel detects that + * it is preempting or delivering a signal outside of the range + * targeted by the rseq_cs. Also needs to be set to NULL by user-space + * before reclaiming memory that contains the targeted struct rseq_cs. + * + * Read and set by the kernel with single-copy atomicity semantics. + * Set by user-space with single-copy atomicity semantics. Aligned + * on 64-bit. + */ + LINUX_FIELD_u32_u64(rseq_cs); + /* + * - RSEQ_DISABLE flag: + * + * Fallback fast-track flag for single-stepping. + * Set by user-space if lack of progress is detected. + * Cleared by user-space after rseq finish. + * Read by the kernel. + * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT + * Inhibit instruction sequence block restart and event + * counter increment on preemption for this thread. + * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL + * Inhibit instruction sequence block restart and event + * counter increment on signal delivery for this thread. + * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE + * Inhibit instruction sequence block restart and event + * counter increment on migration for this thread. + */ + __u32 flags; +} __attribute__((aligned(4 * sizeof(__u64)))); + +#endif /* _UAPI_LINUX_RSEQ_H */ diff --git a/init/Kconfig b/init/Kconfig index 18b151f0ddc1..33ec06fddaaa 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1417,6 +1417,29 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS config ARCH_HAS_MEMBARRIER_SYNC_CORE bool +config RSEQ + bool "Enable rseq() system call" if EXPERT + default y + depends on HAVE_RSEQ + select MEMBARRIER + help + Enable the restartable sequences system call. It provides a + user-space cache for the current CPU number value, which + speeds up getting the current CPU number from user-space, + as well as an ABI to speed up user-space operations on + per-CPU data. + + If unsure, say Y. + +config DEBUG_RSEQ + default n + bool "Enabled debugging of rseq() system call" if EXPERT + depends on RSEQ && DEBUG_KERNEL + help + Enable extra debugging checks for the rseq system call. + + If unsure, say N. + config EMBEDDED bool "Embedded system" option allnoconfig_y diff --git a/kernel/Makefile b/kernel/Makefile index f85ae5dfa474..7085c841c413 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -113,6 +113,7 @@ obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o obj-$(CONFIG_TORTURE_TEST) += torture.o obj-$(CONFIG_HAS_IOMEM) += memremap.o +obj-$(CONFIG_RSEQ) += rseq.o $(obj)/configs.o: $(obj)/config_data.h diff --git a/kernel/fork.c b/kernel/fork.c index a5d21c42acfc..70992bfeba81 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1899,6 +1899,8 @@ static __latent_entropy struct task_struct *copy_process( */ copy_seccomp(p); + rseq_fork(p, clone_flags); + /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the diff --git a/kernel/rseq.c b/kernel/rseq.c new file mode 100644 index 000000000000..ae306f90c514 --- /dev/null +++ b/kernel/rseq.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Restartable sequences system call + * + * Copyright (C) 2015, Google, Inc., + * Paul Turner and Andrew Hunter + * Copyright (C) 2015-2018, EfficiOS Inc., + * Mathieu Desnoyers + */ + +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \ + RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) + +/* + * + * Restartable sequences are a lightweight interface that allows + * user-level code to be executed atomically relative to scheduler + * preemption and signal delivery. Typically used for implementing + * per-cpu operations. + * + * It allows user-space to perform update operations on per-cpu data + * without requiring heavy-weight atomic operations. + * + * Detailed algorithm of rseq user-space assembly sequences: + * + * init(rseq_cs) + * cpu = TLS->rseq::cpu_id_start + * [1] TLS->rseq::rseq_cs = rseq_cs + * [start_ip] ---------------------------- + * [2] if (cpu != TLS->rseq::cpu_id) + * goto abort_ip; + * [3] + * [post_commit_ip] ---------------------------- + * + * The address of jump target abort_ip must be outside the critical + * region, i.e.: + * + * [abort_ip] < [start_ip] || [abort_ip] >= [post_commit_ip] + * + * Steps [2]-[3] (inclusive) need to be a sequence of instructions in + * userspace that can handle being interrupted between any of those + * instructions, and then resumed to the abort_ip. + * + * 1. Userspace stores the address of the struct rseq_cs assembly + * block descriptor into the rseq_cs field of the registered + * struct rseq TLS area. This update is performed through a single + * store within the inline assembly instruction sequence. + * [start_ip] + * + * 2. Userspace tests to check whether the current cpu_id field match + * the cpu number loaded before start_ip, branching to abort_ip + * in case of a mismatch. + * + * If the sequence is preempted or interrupted by a signal + * at or after start_ip and before post_commit_ip, then the kernel + * clears TLS->__rseq_abi::rseq_cs, and sets the user-space return + * ip to abort_ip before returning to user-space, so the preempted + * execution resumes at abort_ip. + * + * 3. Userspace critical section final instruction before + * post_commit_ip is the commit. The critical section is + * self-terminating. + * [post_commit_ip] + * + * 4. + * + * On failure at [2], or if interrupted by preempt or signal delivery + * between [1] and [3]: + * + * [abort_ip] + * F1. + */ + +static int rseq_update_cpu_id(struct task_struct *t) +{ + u32 cpu_id = raw_smp_processor_id(); + + if (__put_user(cpu_id, &t->rseq->cpu_id_start)) + return -EFAULT; + if (__put_user(cpu_id, &t->rseq->cpu_id)) + return -EFAULT; + trace_rseq_update(t); + return 0; +} + +static int rseq_reset_rseq_cpu_id(struct task_struct *t) +{ + u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED; + + /* + * Reset cpu_id_start to its initial state (0). + */ + if (__put_user(cpu_id_start, &t->rseq->cpu_id_start)) + return -EFAULT; + /* + * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming + * in after unregistration can figure out that rseq needs to be + * registered again. + */ + if (__put_user(cpu_id, &t->rseq->cpu_id)) + return -EFAULT; + return 0; +} + +static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) +{ + struct rseq_cs __user *urseq_cs; + unsigned long ptr; + u32 __user *usig; + u32 sig; + int ret; + + ret = __get_user(ptr, &t->rseq->rseq_cs); + if (ret) + return ret; + if (!ptr) { + memset(rseq_cs, 0, sizeof(*rseq_cs)); + return 0; + } + urseq_cs = (struct rseq_cs __user *)ptr; + if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs))) + return -EFAULT; + if (rseq_cs->version > 0) + return -EINVAL; + + /* Ensure that abort_ip is not in the critical section. */ + if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset) + return -EINVAL; + + usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32)); + ret = get_user(sig, usig); + if (ret) + return ret; + + if (current->rseq_sig != sig) { + printk_ratelimited(KERN_WARNING + "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n", + sig, current->rseq_sig, current->pid, usig); + return -EPERM; + } + return 0; +} + +static int rseq_need_restart(struct task_struct *t, u32 cs_flags) +{ + u32 flags, event_mask; + int ret; + + /* Get thread flags. */ + ret = __get_user(flags, &t->rseq->flags); + if (ret) + return ret; + + /* Take critical section flags into account. */ + flags |= cs_flags; + + /* + * Restart on signal can only be inhibited when restart on + * preempt and restart on migrate are inhibited too. Otherwise, + * a preempted signal handler could fail to restart the prior + * execution context on sigreturn. + */ + if (unlikely((flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) && + (flags & RSEQ_CS_PREEMPT_MIGRATE_FLAGS) != + RSEQ_CS_PREEMPT_MIGRATE_FLAGS)) + return -EINVAL; + + /* + * Load and clear event mask atomically with respect to + * scheduler preemption. + */ + preempt_disable(); + event_mask = t->rseq_event_mask; + t->rseq_event_mask = 0; + preempt_enable(); + + return !!(event_mask & ~flags); +} + +static int clear_rseq_cs(struct task_struct *t) +{ + /* + * The rseq_cs field is set to NULL on preemption or signal + * delivery on top of rseq assembly block, as well as on top + * of code outside of the rseq assembly block. This performs + * a lazy clear of the rseq_cs field. + * + * Set rseq_cs to NULL with single-copy atomicity. + */ + return __put_user(0UL, &t->rseq->rseq_cs); +} + +/* + * Unsigned comparison will be true when ip >= start_ip, and when + * ip < start_ip + post_commit_offset. + */ +static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs) +{ + return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset; +} + +static int rseq_ip_fixup(struct pt_regs *regs) +{ + unsigned long ip = instruction_pointer(regs); + struct task_struct *t = current; + struct rseq_cs rseq_cs; + int ret; + + ret = rseq_get_rseq_cs(t, &rseq_cs); + if (ret) + return ret; + + /* + * Handle potentially not being within a critical section. + * If not nested over a rseq critical section, restart is useless. + * Clear the rseq_cs pointer and return. + */ + if (!in_rseq_cs(ip, &rseq_cs)) + return clear_rseq_cs(t); + ret = rseq_need_restart(t, rseq_cs.flags); + if (ret <= 0) + return ret; + ret = clear_rseq_cs(t); + if (ret) + return ret; + trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset, + rseq_cs.abort_ip); + instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip); + return 0; +} + +/* + * This resume handler must always be executed between any of: + * - preemption, + * - signal delivery, + * and return to user-space. + * + * This is how we can ensure that the entire rseq critical section, + * consisting of both the C part and the assembly instruction sequence, + * will issue the commit instruction only if executed atomically with + * respect to other threads scheduled on the same CPU, and with respect + * to signal handlers. + */ +void __rseq_handle_notify_resume(struct pt_regs *regs) +{ + struct task_struct *t = current; + int ret; + + if (unlikely(t->flags & PF_EXITING)) + return; + if (unlikely(!access_ok(VERIFY_WRITE, t->rseq, sizeof(*t->rseq)))) + goto error; + ret = rseq_ip_fixup(regs); + if (unlikely(ret < 0)) + goto error; + if (unlikely(rseq_update_cpu_id(t))) + goto error; + return; + +error: + force_sig(SIGSEGV, t); +} + +#ifdef CONFIG_DEBUG_RSEQ + +/* + * Terminate the process if a syscall is issued within a restartable + * sequence. + */ +void rseq_syscall(struct pt_regs *regs) +{ + unsigned long ip = instruction_pointer(regs); + struct task_struct *t = current; + struct rseq_cs rseq_cs; + + if (!t->rseq) + return; + if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) || + rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs)) + force_sig(SIGSEGV, t); +} + +#endif + +/* + * sys_rseq - setup restartable sequences for caller thread. + */ +SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, + int, flags, u32, sig) +{ + int ret; + + if (flags & RSEQ_FLAG_UNREGISTER) { + /* Unregister rseq for current thread. */ + if (current->rseq != rseq || !current->rseq) + return -EINVAL; + if (current->rseq_len != rseq_len) + return -EINVAL; + if (current->rseq_sig != sig) + return -EPERM; + ret = rseq_reset_rseq_cpu_id(current); + if (ret) + return ret; + current->rseq = NULL; + current->rseq_len = 0; + current->rseq_sig = 0; + return 0; + } + + if (unlikely(flags)) + return -EINVAL; + + if (current->rseq) { + /* + * If rseq is already registered, check whether + * the provided address differs from the prior + * one. + */ + if (current->rseq != rseq || current->rseq_len != rseq_len) + return -EINVAL; + if (current->rseq_sig != sig) + return -EPERM; + /* Already registered. */ + return -EBUSY; + } + + /* + * If there was no rseq previously registered, + * ensure the provided rseq is properly aligned and valid. + */ + if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || + rseq_len != sizeof(*rseq)) + return -EINVAL; + if (!access_ok(VERIFY_WRITE, rseq, rseq_len)) + return -EFAULT; + current->rseq = rseq; + current->rseq_len = rseq_len; + current->rseq_sig = sig; + /* + * If rseq was previously inactive, and has just been + * registered, ensure the cpu_id_start and cpu_id fields + * are updated before returning to user-space. + */ + rseq_set_notify_resume(current); + + return 0; +} diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e9866f86f304..a98d54cd5535 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1191,6 +1191,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p); p->se.nr_migrations++; + rseq_migrate(p); perf_event_task_migrate(p); } @@ -2634,6 +2635,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, { sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); + rseq_preempt(prev); fire_sched_out_preempt_notifiers(prev, next); prepare_task(next); prepare_arch_switch(next); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 183169c2a75b..86f832d6ff6f 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -432,3 +432,6 @@ COND_SYSCALL(setresgid16); COND_SYSCALL(setresuid16); COND_SYSCALL(setreuid16); COND_SYSCALL(setuid16); + +/* restartable sequence */ +COND_SYSCALL(rseq); -- cgit From 64e6dd1fb2f1ed799f317dc34aa6e251c64f4981 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 7 Jun 2018 18:15:14 +0800 Subject: netfilter: nf_conntrack: Increase __IPS_MAX_BIT with new bit IPS_OFFLOAD_BIT The __IPS_MAX_BIT is used in __ctnetlink_change_status as the max bit value. When add new bit IPS_OFFLOAD_BIT whose value is 14, we should increase the __IPS_MAX_BIT too, from 14 to 15. There is no any bug in current codes, although it lost one loop in __ctnetlink_change_status. Because the new bit IPS_OFFLOAD_BIT belongs the IPS_UNCHANGEABLE_MASK. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index c712eb6879f1..336014bf8868 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -112,7 +112,7 @@ enum ip_conntrack_status { IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), - __IPS_MAX_BIT = 14, + __IPS_MAX_BIT = 15, }; /* Connection tracking event types */ -- cgit From 7eced5ab5a7366ee7ca5360b3eca9d220c2b2887 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 3 Jun 2018 12:06:57 +0200 Subject: netfilter: nf_tables: add NFT_LOGLEVEL_* enumeration and use it This is internal, not exposed through uapi, and although it maps with userspace LOG_*, with the introduction of LOGLEVEL_AUDIT we are incurring in namespace pollution. This patch adds the NFT_LOGLEVEL_ enumeration and use it from nft_log. Fixes: 1a893b44de45 ("netfilter: nf_tables: Add audit support to log statement") Signed-off-by: Pablo Neira Ayuso Acked-by: Phil Sutter Signed-off-by: David S. Miller --- include/uapi/linux/netfilter/nf_tables.h | 28 +++++++++++++++++++++++++--- net/netfilter/nft_log.c | 10 +++++----- 2 files changed, 30 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ae00a3c49b8a..c9bf74b94f37 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1099,9 +1099,31 @@ enum nft_log_attributes { #define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) /** - * LOGLEVEL_AUDIT - a pseudo log level enabling audit logging - */ -#define LOGLEVEL_AUDIT 8 + * enum nft_log_level - nf_tables log levels + * + * @NFT_LOGLEVEL_EMERG: system is unusable + * @NFT_LOGLEVEL_ALERT: action must be taken immediately + * @NFT_LOGLEVEL_CRIT: critical conditions + * @NFT_LOGLEVEL_ERR: error conditions + * @NFT_LOGLEVEL_WARNING: warning conditions + * @NFT_LOGLEVEL_NOTICE: normal but significant condition + * @NFT_LOGLEVEL_INFO: informational + * @NFT_LOGLEVEL_DEBUG: debug-level messages + * @NFT_LOGLEVEL_AUDIT: enabling audit logging + */ +enum nft_log_level { + NFT_LOGLEVEL_EMERG, + NFT_LOGLEVEL_ALERT, + NFT_LOGLEVEL_CRIT, + NFT_LOGLEVEL_ERR, + NFT_LOGLEVEL_WARNING, + NFT_LOGLEVEL_NOTICE, + NFT_LOGLEVEL_INFO, + NFT_LOGLEVEL_DEBUG, + NFT_LOGLEVEL_AUDIT, + __NFT_LOGLEVEL_MAX +}; +#define NFT_LOGLEVEL_MAX (__NFT_LOGLEVEL_MAX + 1) /** * enum nft_queue_attributes - nf_tables queue expression netlink attributes diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 7eef1cffbf1b..655187bed5d8 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -111,7 +111,7 @@ static void nft_log_eval(const struct nft_expr *expr, const struct nft_log *priv = nft_expr_priv(expr); if (priv->loginfo.type == NF_LOG_TYPE_LOG && - priv->loginfo.u.log.level == LOGLEVEL_AUDIT) { + priv->loginfo.u.log.level == NFT_LOGLEVEL_AUDIT) { nft_log_eval_audit(pkt); return; } @@ -166,9 +166,9 @@ static int nft_log_init(const struct nft_ctx *ctx, li->u.log.level = ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); } else { - li->u.log.level = LOGLEVEL_WARNING; + li->u.log.level = NFT_LOGLEVEL_WARNING; } - if (li->u.log.level > LOGLEVEL_AUDIT) { + if (li->u.log.level > NFT_LOGLEVEL_AUDIT) { err = -EINVAL; goto err1; } @@ -196,7 +196,7 @@ static int nft_log_init(const struct nft_ctx *ctx, break; } - if (li->u.log.level == LOGLEVEL_AUDIT) + if (li->u.log.level == NFT_LOGLEVEL_AUDIT) return 0; err = nf_logger_find_get(ctx->family, li->type); @@ -220,7 +220,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx, if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); - if (li->u.log.level == LOGLEVEL_AUDIT) + if (li->u.log.level == NFT_LOGLEVEL_AUDIT) return; nf_logger_put(ctx->family, li->type); -- cgit From a5a16e43529b5040760ebf9bd9b056dd34861f93 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 Jun 2018 15:37:34 +0200 Subject: xsk: Fix umem fill/completion queue mmap on 32-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc-4.1.2 on 32-bit: net/xdp/xsk.c:663: warning: integer constant is too large for ‘long’ type net/xdp/xsk.c:665: warning: integer constant is too large for ‘long’ type Add the missing "ULL" suffixes to the large XDP_UMEM_PGOFF_*_RING values to fix this. net/xdp/xsk.c:663: warning: comparison is always false due to limited range of data type net/xdp/xsk.c:665: warning: comparison is always false due to limited range of data type "unsigned long" is 32-bit on 32-bit systems, hence the offset is truncated, and can never be equal to any of the XDP_UMEM_PGOFF_*_RING values. Use loff_t (and the required cast) to fix this. Fixes: 423f38329d267969 ("xsk: add umem fill queue support and mmap") Fixes: fe2308328cd2f26e ("xsk: add umem completion queue support and mmap") Signed-off-by: Geert Uytterhoeven Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/uapi/linux/if_xdp.h | 4 ++-- net/xdp/xsk.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 1fa0e977ea8d..caed8b1614ff 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -63,8 +63,8 @@ struct xdp_statistics { /* Pgoff for mmaping the rings */ #define XDP_PGOFF_RX_RING 0 #define XDP_PGOFF_TX_RING 0x80000000 -#define XDP_UMEM_PGOFF_FILL_RING 0x100000000 -#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000 +#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL +#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL /* Rx/Tx descriptor */ struct xdp_desc { diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c6ed2454f7ce..36919a254ba3 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -643,7 +643,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, static int xsk_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; struct xdp_sock *xs = xdp_sk(sock->sk); struct xsk_queue *q = NULL; -- cgit From cfecc2918d2b3c5e86ff1a6c95eabbbb17bb8fd3 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Fri, 1 Jun 2018 12:02:39 +0800 Subject: virtio_pci: support enabling VFs There is a new feature bit allocated in virtio spec to support SR-IOV (Single Root I/O Virtualization): https://github.com/oasis-tcs/virtio-spec/issues/11 This patch enables the support for this feature bit in virtio driver. Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 30 ++++++++++++++++++++++++++++++ drivers/virtio/virtio_pci_modern.c | 14 ++++++++++++++ include/uapi/linux/virtio_config.h | 7 ++++++- 3 files changed, 50 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 48d4d1cf1cb6..1d4467b2dc31 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -577,6 +577,8 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); + pci_disable_sriov(pci_dev); + unregister_virtio_device(&vp_dev->vdev); if (vp_dev->ioaddr) @@ -588,6 +590,33 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) put_device(dev); } +static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs) +{ + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct virtio_device *vdev = &vp_dev->vdev; + int ret; + + if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK)) + return -EBUSY; + + if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV)) + return -EINVAL; + + if (pci_vfs_assigned(pci_dev)) + return -EPERM; + + if (num_vfs == 0) { + pci_disable_sriov(pci_dev); + return 0; + } + + ret = pci_enable_sriov(pci_dev, num_vfs); + if (ret < 0) + return ret; + + return num_vfs; +} + static struct pci_driver virtio_pci_driver = { .name = "virtio-pci", .id_table = virtio_pci_id_table, @@ -596,6 +625,7 @@ static struct pci_driver virtio_pci_driver = { #ifdef CONFIG_PM_SLEEP .driver.pm = &virtio_pci_pm_ops, #endif + .sriov_configure = virtio_pci_sriov_configure, }; module_pci_driver(virtio_pci_driver); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 2555d80f6eec..07571daccfec 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -153,14 +153,28 @@ static u64 vp_get_features(struct virtio_device *vdev) return features; } +static void vp_transport_features(struct virtio_device *vdev, u64 features) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct pci_dev *pci_dev = vp_dev->pci_dev; + + if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) && + pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV)) + __virtio_set_bit(vdev, VIRTIO_F_SR_IOV); +} + /* virtio config->finalize_features() implementation */ static int vp_finalize_features(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + u64 features = vdev->features; /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); + /* Give virtio_pci a chance to accept features. */ + vp_transport_features(vdev, features); + if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) { dev_err(&vdev->dev, "virtio: device uses modern interface " "but does not have VIRTIO_F_VERSION_1\n"); diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index 308e2096291f..b7c1f4e7d59e 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -49,7 +49,7 @@ * transport being used (eg. virtio_ring), the rest are per-device feature * bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 34 +#define VIRTIO_TRANSPORT_F_END 38 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -71,4 +71,9 @@ * this is for compatibility with legacy systems. */ #define VIRTIO_F_IOMMU_PLATFORM 33 + +/* + * Does the device support Single Root I/O Virtualization? + */ +#define VIRTIO_F_SR_IOV 37 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ -- cgit From 2eb98105f8c7f4b867f7f714a998f5b8c1bb009b Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Fri, 1 Jun 2018 19:26:32 +0800 Subject: virtio: update the comments for transport features The existing comments for transport features are outdated. So update them to address the latest changes in the spec. Suggested-by: Michael S. Tsirkin Signed-off-by: Tiwei Bie Reviewed-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- include/uapi/linux/virtio_config.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index b7c1f4e7d59e..449132c76b1c 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -45,9 +45,12 @@ /* We've given up on this device. */ #define VIRTIO_CONFIG_S_FAILED 0x80 -/* Some virtio feature bits (currently bits 28 through 32) are reserved for the - * transport being used (eg. virtio_ring), the rest are per-device feature - * bits. */ +/* + * Virtio feature bits VIRTIO_TRANSPORT_F_START through + * VIRTIO_TRANSPORT_F_END are reserved for the transport + * being used (e.g. virtio_ring, virtio_pci etc.), the + * rest are per-device feature bits. + */ #define VIRTIO_TRANSPORT_F_START 28 #define VIRTIO_TRANSPORT_F_END 38 -- cgit From 766d3571d8e50d3a73b77043dc632226f9e6b389 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 8 Jun 2018 02:19:53 +0300 Subject: kvm: fix typo in flag name KVM_X86_DISABLE_EXITS_HTL really refers to exit on halt. Obviously a typo: should be named KVM_X86_DISABLE_EXITS_HLT. Fixes: caa057a2cad ("KVM: X86: Provide a capability to disable HLT intercepts") Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- include/uapi/linux/kvm.h | 4 ++-- tools/include/uapi/linux/kvm.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 439fb0c7dbc0..06dd4cdb2ca8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2899,7 +2899,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_TSC_STABLE; break; case KVM_CAP_X86_DISABLE_EXITS: - r |= KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE; + r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE; if(kvm_can_mwait_in_guest()) r |= KVM_X86_DISABLE_EXITS_MWAIT; break; @@ -4253,7 +4253,7 @@ split_irqchip_unlock: if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && kvm_can_mwait_in_guest()) kvm->arch.mwait_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL) + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) kvm->arch.hlt_in_guest = true; if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) kvm->arch.pause_in_guest = true; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b252ceb3965c..b6270a3b38e9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -677,10 +677,10 @@ struct kvm_ioeventfd { }; #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) -#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_HLT | \ KVM_X86_DISABLE_EXITS_PAUSE) /* for KVM_ENABLE_CAP */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index b02c41e53d56..39e364c70caf 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -677,10 +677,10 @@ struct kvm_ioeventfd { }; #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) -#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_HLT | \ KVM_X86_DISABLE_EXITS_PAUSE) /* for KVM_ENABLE_CAP */ -- cgit From 215a31f19dedd4e92a67cf5a9717ee898d012b3a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 11 Jun 2018 17:18:29 +0200 Subject: netfilter: nft_dynset: do not reject set updates with NFT_SET_EVAL NFT_SET_EVAL is signalling the kernel that this sets can be updated from the evaluation path, even if there are no expressions attached to the element. Otherwise, set updates with no expressions fail. Update description to describe the right semantics. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- net/netfilter/nft_dynset.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c9bf74b94f37..89438e68dc03 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -266,7 +266,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_INTERVAL: set contains intervals * @NFT_SET_MAP: set is used as a dictionary * @NFT_SET_TIMEOUT: set uses timeouts - * @NFT_SET_EVAL: set contains expressions for evaluation + * @NFT_SET_EVAL: set can be updated from the evaluation path * @NFT_SET_OBJECT: set contains stateful objects */ enum nft_set_flags { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 4d49529cff61..27d7e4598ab6 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -203,9 +203,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, goto err1; set->ops->gc_init(set); } - - } else if (set->flags & NFT_SET_EVAL) - return -EINVAL; + } nft_set_ext_prepare(&priv->tmpl); nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); -- cgit From 2dd5aa15d9a2d4b87b32d93f3c5a66f175c0123d Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Date: Thu, 14 Jun 2018 15:05:55 +0100 Subject: ALSA: usb-audio: Add bi-directional terminal types Define the bi-directional USB terminal types for audio devices. Signed-off-by: Jorge Sanjuan Signed-off-by: Takashi Iwai --- include/uapi/linux/usb/audio.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h index 13d98e6e0db1..74e520fb944f 100644 --- a/include/uapi/linux/usb/audio.h +++ b/include/uapi/linux/usb/audio.h @@ -230,6 +230,14 @@ struct uac1_output_terminal_descriptor { #define UAC_OUTPUT_TERMINAL_COMMUNICATION_SPEAKER 0x306 #define UAC_OUTPUT_TERMINAL_LOW_FREQ_EFFECTS_SPEAKER 0x307 +/* Terminals - 2.4 Bi-directional Terminal Types */ +#define UAC_BIDIR_TERMINAL_UNDEFINED 0x400 +#define UAC_BIDIR_TERMINAL_HANDSET 0x401 +#define UAC_BIDIR_TERMINAL_HEADSET 0x402 +#define UAC_BIDIR_TERMINAL_SPEAKER_PHONE 0x403 +#define UAC_BIDIR_TERMINAL_ECHO_SUPPRESSING 0x404 +#define UAC_BIDIR_TERMINAL_ECHO_CANCELING 0x405 + /* Set bControlSize = 2 as default setting */ #define UAC_DT_FEATURE_UNIT_SIZE(ch) (7 + ((ch) + 1) * 2) -- cgit From 94aefd32f431a2c15f9d2c5f8f19dece73a77b52 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 8 Jun 2018 17:55:05 +0300 Subject: aio: mark __aio_sigset::sigmask const io_pgetevents() will not change the signal mask. Mark it const to make it clear and to reduce the need for casts in user code. Reviewed-by: Christoph Hellwig Signed-off-by: Avi Kivity Signed-off-by: Al Viro --- include/uapi/linux/aio_abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index 75846164290e..d00221345c19 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -109,7 +109,7 @@ struct iocb { #undef IFLITTLE struct __aio_sigset { - sigset_t __user *sigmask; + const sigset_t __user *sigmask; size_t sigsetsize; }; -- cgit From badbc27df3a934e0025be238754f9ca6a852c006 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 8 Jun 2018 10:04:47 +0300 Subject: nl80211: fix some kernel doc tag mistakes There is a bunch of tags marking constants with &, which means struct or enum name. Replace them with %, which is the correct tag for constants. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 28b36545de24..27e4e441caac 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -981,18 +981,18 @@ * only the %NL80211_ATTR_IE data is used and updated with this command. * * @NL80211_CMD_SET_PMK: For offloaded 4-Way handshake, set the PMK or PMK-R0 - * for the given authenticator address (specified with &NL80211_ATTR_MAC). - * When &NL80211_ATTR_PMKR0_NAME is set, &NL80211_ATTR_PMK specifies the + * for the given authenticator address (specified with %NL80211_ATTR_MAC). + * When %NL80211_ATTR_PMKR0_NAME is set, %NL80211_ATTR_PMK specifies the * PMK-R0, otherwise it specifies the PMK. * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously * configured PMK for the authenticator address identified by - * &NL80211_ATTR_MAC. + * %NL80211_ATTR_MAC. * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way * handshake was completed successfully by the driver. The BSSID is - * specified with &NL80211_ATTR_MAC. Drivers that support 4 way handshake + * specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake * offload should send this event after indicating 802.11 association with - * &NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed - * &NL80211_CMD_DISCONNECT should be indicated instead. + * %NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed + * %NL80211_CMD_DISCONNECT should be indicated instead. * * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request * and RX notification. This command is used both as a request to transmit @@ -1029,9 +1029,9 @@ * initiated the connection through the connect request. * * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's - * ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE, - * &NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its - * address(specified in &NL80211_ATTR_MAC). + * ht opmode or vht opmode changes using any of %NL80211_ATTR_SMPS_MODE, + * %NL80211_ATTR_CHANNEL_WIDTH,%NL80211_ATTR_NSS attributes with its + * address(specified in %NL80211_ATTR_MAC). * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -2218,7 +2218,7 @@ enum nl80211_commands { * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external * authentication operation (u32 attribute with an * &enum nl80211_external_auth_action value). This is used with the - * &NL80211_CMD_EXTERNAL_AUTH request event. + * %NL80211_CMD_EXTERNAL_AUTH request event. * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user * space supports external authentication. This attribute shall be used * only with %NL80211_CMD_CONNECT request. The driver may offload @@ -3491,7 +3491,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated * base on contiguous rules and wider channels will be allowed to cross * multiple contiguous/overlapping frequency ranges. - * @NL80211_RRF_IR_CONCURRENT: See &NL80211_FREQUENCY_ATTR_IR_CONCURRENT + * @NL80211_RRF_IR_CONCURRENT: See %NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed @@ -5643,11 +5643,11 @@ enum nl80211_nan_func_attributes { * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set. * This is a flag. * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if - * &NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. + * %NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if - * &NL80211_NAN_SRF_BF is present. This is a u8. + * %NL80211_NAN_SRF_BF is present. This is a u8. * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if - * and only if &NL80211_NAN_SRF_BF isn't present. This is a nested + * and only if %NL80211_NAN_SRF_BF isn't present. This is a nested * attribute. Each nested attribute is a MAC address. * @NUM_NL80211_NAN_SRF_ATTR: internal * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute -- cgit From 5fb94e9ca333f0fe1d96de06704a79942b3832c3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 May 2018 15:14:57 -0300 Subject: docs: Fix some broken references As we move stuff around, some doc references are broken. Fix some of them via this script: ./scripts/documentation-file-ref-check --fix Manually checked if the produced result is valid, removing a few false-positives. Acked-by: Takashi Iwai Acked-by: Masami Hiramatsu Acked-by: Stephen Boyd Acked-by: Charles Keepax Acked-by: Mathieu Poirier Reviewed-by: Coly Li Signed-off-by: Mauro Carvalho Chehab Acked-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 4 ++-- .../devicetree/bindings/input/rotary-encoder.txt | 2 +- Documentation/driver-api/gpio/consumer.rst | 2 +- Documentation/kprobes.txt | 4 ++-- Documentation/trace/coresight.txt | 2 +- Documentation/trace/ftrace-uses.rst | 2 +- Documentation/trace/histogram.txt | 2 +- Documentation/trace/intel_th.rst | 2 +- Documentation/trace/tracepoint-analysis.rst | 6 +++--- Documentation/translations/ja_JP/howto.rst | 4 ++-- Documentation/translations/zh_CN/magic-number.txt | 4 ++-- .../translations/zh_CN/video4linux/omap3isp.txt | 4 ++-- MAINTAINERS | 20 ++++++++++---------- arch/Kconfig | 2 +- arch/arm/include/asm/cacheflush.h | 2 +- arch/arm64/include/asm/cacheflush.h | 2 +- arch/microblaze/include/asm/cacheflush.h | 2 +- arch/um/Kconfig.um | 2 +- arch/unicore32/include/asm/cacheflush.h | 2 +- arch/x86/entry/vsyscall/vsyscall_64.c | 2 +- arch/xtensa/include/asm/cacheflush.h | 4 ++-- block/Kconfig | 2 +- certs/Kconfig | 2 +- crypto/asymmetric_keys/asymmetric_type.c | 2 +- crypto/asymmetric_keys/signature.c | 2 +- drivers/char/Kconfig | 2 +- drivers/clk/clk.c | 4 ++-- drivers/clk/ingenic/cgu.h | 2 +- drivers/gpu/vga/Kconfig | 2 +- drivers/gpu/vga/vgaarb.c | 2 +- drivers/input/joystick/Kconfig | 10 +++++----- drivers/input/joystick/walkera0701.c | 2 +- drivers/input/misc/Kconfig | 4 ++-- drivers/input/misc/rotary_encoder.c | 2 +- drivers/input/mouse/Kconfig | 6 +++--- drivers/input/mouse/alps.c | 2 +- drivers/input/touchscreen/wm97xx-core.c | 2 +- drivers/lightnvm/pblk-rb.c | 2 +- drivers/md/bcache/Kconfig | 2 +- drivers/md/bcache/btree.c | 2 +- drivers/md/bcache/extents.c | 2 +- drivers/media/dvb-core/dvb_ringbuffer.c | 2 +- drivers/media/pci/meye/Kconfig | 2 +- drivers/media/platform/pxa_camera.c | 4 ++-- .../media/platform/soc_camera/sh_mobile_ceu_camera.c | 2 +- drivers/media/radio/Kconfig | 2 +- drivers/media/radio/si470x/Kconfig | 2 +- drivers/media/usb/dvb-usb-v2/lmedm04.c | 2 +- drivers/media/usb/zr364xx/Kconfig | 2 +- drivers/parport/Kconfig | 6 +++--- drivers/staging/media/bcm2048/TODO | 2 +- include/keys/asymmetric-subtype.h | 2 +- include/keys/asymmetric-type.h | 2 +- include/linux/assoc_array.h | 2 +- include/linux/assoc_array_priv.h | 2 +- include/linux/circ_buf.h | 2 +- include/linux/ftrace.h | 2 +- include/linux/rculist_nulls.h | 2 +- include/uapi/linux/prctl.h | 2 +- include/xen/interface/io/kbdif.h | 2 +- kernel/cgroup/cpuset.c | 2 +- kernel/trace/Kconfig | 16 ++++++++-------- lib/Kconfig | 2 +- security/selinux/hooks.c | 2 +- sound/core/Kconfig | 4 ++-- sound/drivers/Kconfig | 4 ++-- sound/pci/Kconfig | 10 +++++----- tools/include/uapi/linux/prctl.h | 2 +- tools/lib/api/fs/fs.c | 2 +- tools/perf/util/bpf-prologue.c | 2 +- .../pm-graph/config/custom-timeline-functions.cfg | 4 ++-- 71 files changed, 113 insertions(+), 113 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 638342d0a095..6fa3f31ed2a5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4335,7 +4335,7 @@ [FTRACE] Set and start specified trace events in order to facilitate early boot debugging. The event-list is a comma separated list of trace events to enable. See - also Documentation/trace/events.txt + also Documentation/trace/events.rst trace_options=[option-list] [FTRACE] Enable or disable tracer options at boot. @@ -4350,7 +4350,7 @@ trace_options=stacktrace - See also Documentation/trace/ftrace.txt "trace options" + See also Documentation/trace/ftrace.rst "trace options" section. tp_printk[FTRACE] diff --git a/Documentation/devicetree/bindings/input/rotary-encoder.txt b/Documentation/devicetree/bindings/input/rotary-encoder.txt index f99fe5cdeaec..a644408b33b8 100644 --- a/Documentation/devicetree/bindings/input/rotary-encoder.txt +++ b/Documentation/devicetree/bindings/input/rotary-encoder.txt @@ -28,7 +28,7 @@ Deprecated properties: This property is deprecated. Instead, a 'steps-per-period ' value should be used, such as "rotary-encoder,steps-per-period = <2>". -See Documentation/input/rotary-encoder.txt for more information. +See Documentation/input/devices/rotary-encoder.rst for more information. Example: diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst index c71a50d85b50..aa03f389d41d 100644 --- a/Documentation/driver-api/gpio/consumer.rst +++ b/Documentation/driver-api/gpio/consumer.rst @@ -57,7 +57,7 @@ device that displays digits), an additional index argument can be specified:: enum gpiod_flags flags) For a more detailed description of the con_id parameter in the DeviceTree case -see Documentation/gpio/board.txt +see Documentation/driver-api/gpio/board.rst The flags parameter is used to optionally specify a direction and initial value for the GPIO. Values can be: diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 22208bf2386d..cb3b0de83fc6 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -724,8 +724,8 @@ migrate your tool to one of the following options: See following documents: - - Documentation/trace/kprobetrace.txt - - Documentation/trace/events.txt + - Documentation/trace/kprobetrace.rst + - Documentation/trace/events.rst - tools/perf/Documentation/perf-probe.txt diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt index 1d74ad0202b6..efbc832146e7 100644 --- a/Documentation/trace/coresight.txt +++ b/Documentation/trace/coresight.txt @@ -426,5 +426,5 @@ root@genericarmv8:~# Details on how to use the generic STM API can be found here [2]. [1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm -[2]. Documentation/trace/stm.txt +[2]. Documentation/trace/stm.rst [3]. https://github.com/Linaro/perf-opencsd diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst index 00283b6dd101..1fbc69894eed 100644 --- a/Documentation/trace/ftrace-uses.rst +++ b/Documentation/trace/ftrace-uses.rst @@ -199,7 +199,7 @@ If @buf is NULL and reset is set, all functions will be enabled for tracing. The @buf can also be a glob expression to enable all functions that match a specific pattern. -See Filter Commands in :file:`Documentation/trace/ftrace.txt`. +See Filter Commands in :file:`Documentation/trace/ftrace.rst`. To just trace the schedule function: diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt index b13771cb12c1..e73bcf9cb5f3 100644 --- a/Documentation/trace/histogram.txt +++ b/Documentation/trace/histogram.txt @@ -7,7 +7,7 @@ Histogram triggers are special event triggers that can be used to aggregate trace event data into histograms. For information on - trace events and event triggers, see Documentation/trace/events.txt. + trace events and event triggers, see Documentation/trace/events.rst. 2. Histogram Trigger Command diff --git a/Documentation/trace/intel_th.rst b/Documentation/trace/intel_th.rst index 990f13265178..19e2d633f3c7 100644 --- a/Documentation/trace/intel_th.rst +++ b/Documentation/trace/intel_th.rst @@ -38,7 +38,7 @@ description is at Documentation/ABI/testing/sysfs-bus-intel_th-devices-gth. STH registers an stm class device, through which it provides interface to userspace and kernelspace software trace sources. See -Documentation/trace/stm.txt for more information on that. +Documentation/trace/stm.rst for more information on that. MSU can be configured to collect trace data into a system memory buffer, which can later on be read from its device nodes via read() or diff --git a/Documentation/trace/tracepoint-analysis.rst b/Documentation/trace/tracepoint-analysis.rst index bef37abf4ad3..716326b9f152 100644 --- a/Documentation/trace/tracepoint-analysis.rst +++ b/Documentation/trace/tracepoint-analysis.rst @@ -55,7 +55,7 @@ simple case of:: 3.1 System-Wide Event Enabling ------------------------------ -See Documentation/trace/events.txt for a proper description on how events +See Documentation/trace/events.rst for a proper description on how events can be enabled system-wide. A short example of enabling all events related to page allocation would look something like:: @@ -112,7 +112,7 @@ at that point. 3.4 Local Event Enabling ------------------------ -Documentation/trace/ftrace.txt describes how to enable events on a per-thread +Documentation/trace/ftrace.rst describes how to enable events on a per-thread basis using set_ftrace_pid. 3.5 Local Event Enablement with PCL @@ -137,7 +137,7 @@ basis using PCL such as follows. 4. Event Filtering ================== -Documentation/trace/ftrace.txt covers in-depth how to filter events in +Documentation/trace/ftrace.rst covers in-depth how to filter events in ftrace. Obviously using grep and awk of trace_pipe is an option as well as any script reading trace_pipe. diff --git a/Documentation/translations/ja_JP/howto.rst b/Documentation/translations/ja_JP/howto.rst index 8d7ed0cbbf5f..f3116381c26b 100644 --- a/Documentation/translations/ja_JP/howto.rst +++ b/Documentation/translations/ja_JP/howto.rst @@ -1,5 +1,5 @@ NOTE: -This is a version of Documentation/HOWTO translated into Japanese. +This is a version of Documentation/process/howto.rst translated into Japanese. This document is maintained by Tsugikazu Shibata If you find any difference between this document and the original file or a problem with the translation, please contact the maintainer of this file. @@ -109,7 +109,7 @@ linux-api@vger.kernel.org に送ることを勧めます。 ています。 カーネルに関して初めての人はここからスタートすると良い でしょう。 - :ref:`Documentation/Process/changes.rst ` + :ref:`Documentation/process/changes.rst ` このファイルはカーネルをうまく生成(訳注 build )し、走らせるのに最 小限のレベルで必要な数々のソフトウェアパッケージの一覧を示してい ます。 diff --git a/Documentation/translations/zh_CN/magic-number.txt b/Documentation/translations/zh_CN/magic-number.txt index e9db693c0a23..7159cec04090 100644 --- a/Documentation/translations/zh_CN/magic-number.txt +++ b/Documentation/translations/zh_CN/magic-number.txt @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/magic-number.txt +Chinese translated version of Documentation/process/magic-number.rst If you have any comment or update to the content, please post to LKML directly. However, if you have problem communicating in English you can also ask the @@ -7,7 +7,7 @@ translation is outdated or there is problem with translation. Chinese maintainer: Jia Wei Wei --------------------------------------------------------------------- -Documentation/magic-number.txt的中文翻译 +Documentation/process/magic-number.rst的中文翻译 如果想评论或更新本文的内容,请直接发信到LKML。如果你使用英文交流有困难的话,也可 以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题,请联系中文版维护者。 diff --git a/Documentation/translations/zh_CN/video4linux/omap3isp.txt b/Documentation/translations/zh_CN/video4linux/omap3isp.txt index 67ffbf352ae0..e9f29375aa95 100644 --- a/Documentation/translations/zh_CN/video4linux/omap3isp.txt +++ b/Documentation/translations/zh_CN/video4linux/omap3isp.txt @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/video4linux/omap3isp.txt +Chinese translated version of Documentation/media/v4l-drivers/omap3isp.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -11,7 +11,7 @@ Maintainer: Laurent Pinchart David Cohen Chinese maintainer: Fu Wei --------------------------------------------------------------------- -Documentation/video4linux/omap3isp.txt 的中文翻译 +Documentation/media/v4l-drivers/omap3isp.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 diff --git a/MAINTAINERS b/MAINTAINERS index 653a2c29ca43..09554034be46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3079,7 +3079,7 @@ M: Clemens Ladisch L: alsa-devel@alsa-project.org (moderated for non-subscribers) T: git git://git.alsa-project.org/alsa-kernel.git S: Maintained -F: Documentation/sound/alsa/Bt87x.txt +F: Documentation/sound/cards/bt87x.rst F: sound/pci/bt87x.c BT8XXGPIO DRIVER @@ -3375,7 +3375,7 @@ M: David Howells M: David Woodhouse L: keyrings@vger.kernel.org S: Maintained -F: Documentation/module-signing.txt +F: Documentation/admin-guide/module-signing.rst F: certs/ F: scripts/sign-file.c F: scripts/extract-cert.c @@ -6501,7 +6501,7 @@ L: linux-mm@kvack.org S: Maintained F: mm/hmm* F: include/linux/hmm* -F: Documentation/vm/hmm.txt +F: Documentation/vm/hmm.rst HOST AP DRIVER M: Jouni Malinen @@ -7401,7 +7401,7 @@ F: drivers/platform/x86/intel-wmi-thunderbolt.c INTEL(R) TRACE HUB M: Alexander Shishkin S: Supported -F: Documentation/trace/intel_th.txt +F: Documentation/trace/intel_th.rst F: drivers/hwtracing/intel_th/ INTEL(R) TRUSTED EXECUTION TECHNOLOGY (TXT) @@ -9665,7 +9665,7 @@ F: include/uapi/linux/mmc/ MULTIPLEXER SUBSYSTEM M: Peter Rosin S: Maintained -F: Documentation/ABI/testing/mux/sysfs-class-mux* +F: Documentation/ABI/testing/sysfs-class-mux* F: Documentation/devicetree/bindings/mux/ F: include/linux/dt-bindings/mux/ F: include/linux/mux/ @@ -10244,7 +10244,7 @@ F: arch/powerpc/include/asm/pnv-ocxl.h F: drivers/misc/ocxl/ F: include/misc/ocxl* F: include/uapi/misc/ocxl.h -F: Documentation/accelerators/ocxl.txt +F: Documentation/accelerators/ocxl.rst OMAP AUDIO SUPPORT M: Peter Ujfalusi @@ -13794,7 +13794,7 @@ SYSTEM TRACE MODULE CLASS M: Alexander Shishkin S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/ash/stm.git -F: Documentation/trace/stm.txt +F: Documentation/trace/stm.rst F: drivers/hwtracing/stm/ F: include/linux/stm.h F: include/uapi/linux/stm.h @@ -14471,7 +14471,7 @@ M: Steven Rostedt M: Ingo Molnar T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core S: Maintained -F: Documentation/trace/ftrace.txt +F: Documentation/trace/ftrace.rst F: arch/*/*/*/ftrace.h F: arch/*/kernel/ftrace.c F: include/*/ftrace.h @@ -14940,7 +14940,7 @@ M: Heikki Krogerus L: linux-usb@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-class-typec -F: Documentation/usb/typec.rst +F: Documentation/driver-api/usb/typec.rst F: drivers/usb/typec/ F: include/linux/usb/typec.h @@ -15770,7 +15770,7 @@ YEALINK PHONE DRIVER M: Henk Vergonet L: usbb2k-api-dev@nongnu.org S: Maintained -F: Documentation/input/yealink.rst +F: Documentation/input/devices/yealink.rst F: drivers/input/misc/yealink.* Z8530 DRIVER FOR AX.25 diff --git a/arch/Kconfig b/arch/Kconfig index 47b235d43909..1aa59063f1fd 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -403,7 +403,7 @@ config SECCOMP_FILTER in terms of Berkeley Packet Filter programs which implement task-defined system call filtering polices. - See Documentation/prctl/seccomp_filter.txt for details. + See Documentation/userspace-api/seccomp_filter.rst for details. preferred-plugin-hostcc := $(if-success,[ $(gcc-version) -ge 40800 ],$(HOSTCXX),$(HOSTCC)) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 869080bedb89..ec1a5fd0d294 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -35,7 +35,7 @@ * Start addresses are inclusive and end addresses are exclusive; * start addresses should be rounded down, end addresses up. * - * See Documentation/cachetlb.txt for more information. + * See Documentation/core-api/cachetlb.rst for more information. * Please note that the implementation of these, and the required * effects are cache-type (VIVT/VIPT/PIPT) specific. * diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 0094c6653b06..d264a7274811 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -36,7 +36,7 @@ * Start addresses are inclusive and end addresses are exclusive; start * addresses should be rounded down, end addresses up. * - * See Documentation/cachetlb.txt for more information. Please note that + * See Documentation/core-api/cachetlb.rst for more information. Please note that * the implementation assumes non-aliasing VIPT D-cache and (aliasing) * VIPT I-cache. * diff --git a/arch/microblaze/include/asm/cacheflush.h b/arch/microblaze/include/asm/cacheflush.h index ffea82a16d2c..b091de77b15b 100644 --- a/arch/microblaze/include/asm/cacheflush.h +++ b/arch/microblaze/include/asm/cacheflush.h @@ -19,7 +19,7 @@ #include #include -/* Look at Documentation/cachetlb.txt */ +/* Look at Documentation/core-api/cachetlb.rst */ /* * Cache handling functions. diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um index 3e7f228b22e1..20da5a8ca949 100644 --- a/arch/um/Kconfig.um +++ b/arch/um/Kconfig.um @@ -80,7 +80,7 @@ config MAGIC_SYSRQ On UML, this is accomplished by sending a "sysrq" command with mconsole, followed by the letter for the requested command. - The keys are documented in . Don't say Y + The keys are documented in . Don't say Y unless you really know what this hack does. config KERNEL_STACK_ORDER diff --git a/arch/unicore32/include/asm/cacheflush.h b/arch/unicore32/include/asm/cacheflush.h index 1d9132b66039..1c8b9f13a9e1 100644 --- a/arch/unicore32/include/asm/cacheflush.h +++ b/arch/unicore32/include/asm/cacheflush.h @@ -33,7 +33,7 @@ * Start addresses are inclusive and end addresses are exclusive; * start addresses should be rounded down, end addresses up. * - * See Documentation/cachetlb.txt for more information. + * See Documentation/core-api/cachetlb.rst for more information. * Please note that the implementation of these, and the required * effects are cache-type (VIVT/VIPT/PIPT) specific. * diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 7782cdbcd67d..82ed001e8909 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -201,7 +201,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) /* * Handle seccomp. regs->ip must be the original value. - * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt. + * See seccomp_send_sigsys and Documentation/userspace-api/seccomp_filter.rst. * * We could optimize the seccomp disabled case, but performance * here doesn't matter. diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index 397d6a1a4224..a0d50be5a8cb 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -88,7 +88,7 @@ static inline void __invalidate_icache_page_alias(unsigned long virt, * * Pages can get remapped. Because this might change the 'color' of that page, * we have to flush the cache before the PTE is changed. - * (see also Documentation/cachetlb.txt) + * (see also Documentation/core-api/cachetlb.rst) */ #if defined(CONFIG_MMU) && \ @@ -152,7 +152,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, __invalidate_icache_range(start,(end) - (start)); \ } while (0) -/* This is not required, see Documentation/cachetlb.txt */ +/* This is not required, see Documentation/core-api/cachetlb.rst */ #define flush_icache_page(vma,page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) diff --git a/block/Kconfig b/block/Kconfig index 28ec55752b68..eb50fd4977c2 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -114,7 +114,7 @@ config BLK_DEV_THROTTLING one needs to mount and use blkio cgroup controller for creating cgroups and specifying per device IO rate policies. - See Documentation/cgroups/blkio-controller.txt for more information. + See Documentation/cgroup-v1/blkio-controller.txt for more information. config BLK_DEV_THROTTLING_LOW bool "Block throttling .low limit interface support (EXPERIMENTAL)" diff --git a/certs/Kconfig b/certs/Kconfig index 5f7663df6e8e..c94e93d8bccf 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -13,7 +13,7 @@ config MODULE_SIG_KEY If this option is unchanged from its default "certs/signing_key.pem", then the kernel will automatically generate the private key and - certificate as described in Documentation/module-signing.txt + certificate as described in Documentation/admin-guide/module-signing.rst config SYSTEM_TRUSTED_KEYRING bool "Provide system-wide ring of trusted keys" diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index 39aecad286fe..26539e9a8bda 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -1,6 +1,6 @@ /* Asymmetric public-key cryptography key type * - * See Documentation/security/asymmetric-keys.txt + * See Documentation/crypto/asymmetric-keys.txt * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c index 11b7ba170904..28198314bc39 100644 --- a/crypto/asymmetric_keys/signature.c +++ b/crypto/asymmetric_keys/signature.c @@ -1,6 +1,6 @@ /* Signature verification with an asymmetric key * - * See Documentation/security/asymmetric-keys.txt + * See Documentation/crypto/asymmetric-keys.txt * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 410c30c42120..212f447938ae 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -81,7 +81,7 @@ config PRINTER corresponding drivers into the kernel. To compile this driver as a module, choose M here and read - . The module will be called lp. + . The module will be called lp. If you have several parallel ports, you can specify which ports to use with the "lp" kernel command line option. (Try "man bootparam" diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index a24a6afb50b6..9760b526ca31 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -6,7 +6,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * Standard functionality for the common clock API. See Documentation/clk.txt + * Standard functionality for the common clock API. See Documentation/driver-api/clk.rst */ #include @@ -2747,7 +2747,7 @@ static int __clk_core_init(struct clk_core *core) goto out; } - /* check that clk_ops are sane. See Documentation/clk.txt */ + /* check that clk_ops are sane. See Documentation/driver-api/clk.rst */ if (core->ops->set_rate && !((core->ops->round_rate || core->ops->determine_rate) && core->ops->recalc_rate)) { diff --git a/drivers/clk/ingenic/cgu.h b/drivers/clk/ingenic/cgu.h index 542192376ebf..502bcbb61b04 100644 --- a/drivers/clk/ingenic/cgu.h +++ b/drivers/clk/ingenic/cgu.h @@ -194,7 +194,7 @@ struct ingenic_cgu { /** * struct ingenic_clk - private data for a clock - * @hw: see Documentation/clk.txt + * @hw: see Documentation/driver-api/clk.rst * @cgu: a pointer to the CGU data * @idx: the index of this clock in cgu->clock_info */ diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig index 29437eabe095..b677e5d524e6 100644 --- a/drivers/gpu/vga/Kconfig +++ b/drivers/gpu/vga/Kconfig @@ -6,7 +6,7 @@ config VGA_ARB Some "legacy" VGA devices implemented on PCI typically have the same hard-decoded addresses as they did on ISA. When multiple PCI devices are accessed at same time they need some kind of coordination. Please - see Documentation/vgaarbiter.txt for more details. Select this to + see Documentation/gpu/vgaarbiter.rst for more details. Select this to enable VGA arbiter. config VGA_ARB_MAX_GPUS diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index 1c5e74cb9279..c61b04555779 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -1,6 +1,6 @@ /* * vgaarb.c: Implements the VGA arbitration. For details refer to - * Documentation/vgaarbiter.txt + * Documentation/gpu/vgaarbiter.rst * * * (C) Copyright 2005 Benjamin Herrenschmidt diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig index 32ec4cee6716..d8f9c6e1fc08 100644 --- a/drivers/input/joystick/Kconfig +++ b/drivers/input/joystick/Kconfig @@ -214,7 +214,7 @@ config JOYSTICK_DB9 gamepad, Sega Saturn gamepad, or a Multisystem -- Atari, Amiga, Commodore, Amstrad CPC joystick connected to your parallel port. For more information on how to use the driver please read - . + . To compile this driver as a module, choose M here: the module will be called db9. @@ -229,7 +229,7 @@ config JOYSTICK_GAMECON Sony PlayStation gamepad or a Multisystem -- Atari, Amiga, Commodore, Amstrad CPC joystick connected to your parallel port. For more information on how to use the driver please read - . + . To compile this driver as a module, choose M here: the module will be called gamecon. @@ -241,7 +241,7 @@ config JOYSTICK_TURBOGRAFX Say Y here if you have the TurboGraFX interface by Steffen Schwenke, and want to use it with Multisystem -- Atari, Amiga, Commodore, Amstrad CPC joystick. For more information on how to use the driver - please read . + please read . To compile this driver as a module, choose M here: the module will be called turbografx. @@ -287,7 +287,7 @@ config JOYSTICK_XPAD and/or "Event interface support" (CONFIG_INPUT_EVDEV) as well. For information about how to connect the X-Box pad to USB, see - . + . To compile this driver as a module, choose M here: the module will be called xpad. @@ -313,7 +313,7 @@ config JOYSTICK_WALKERA0701 Say Y or M here if you have a Walkera WK-0701 transmitter which is supplied with a ready to fly Walkera helicopters such as HM36, HM37, HM60 and want to use it via parport as a joystick. More - information is available: + information is available: To compile this driver as a module, choose M here: the module will be called walkera0701. diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c index 36a5b93156ed..dce313dc260a 100644 --- a/drivers/input/joystick/walkera0701.c +++ b/drivers/input/joystick/walkera0701.c @@ -3,7 +3,7 @@ * * Copyright (c) 2008 Peter Popovec * - * More about driver: + * More about driver: */ /* diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 572b15fa18c2..c25606e00693 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -411,7 +411,7 @@ config INPUT_YEALINK usb sound driver, so you might want to enable that as well. For information about how to use these additional functions, see - . + . To compile this driver as a module, choose M here: the module will be called yealink. @@ -595,7 +595,7 @@ config INPUT_GPIO_ROTARY_ENCODER depends on GPIOLIB || COMPILE_TEST help Say Y here to add support for rotary encoders connected to GPIO lines. - Check file:Documentation/input/rotary-encoder.txt for more + Check file:Documentation/input/devices/rotary-encoder.rst for more information. To compile this driver as a module, choose M here: the diff --git a/drivers/input/misc/rotary_encoder.c b/drivers/input/misc/rotary_encoder.c index 6d304381fc30..30ec77ad32c6 100644 --- a/drivers/input/misc/rotary_encoder.c +++ b/drivers/input/misc/rotary_encoder.c @@ -7,7 +7,7 @@ * state machine code inspired by code from Tim Ruetz * * A generic driver for rotary encoders connected to GPIO lines. - * See file:Documentation/input/rotary-encoder.txt for more information + * See file:Documentation/input/devices/rotary-encoder.rst for more information * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index f27f23f2d99a..566a1e3aa504 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -129,7 +129,7 @@ config MOUSE_PS2_ELANTECH This driver exposes some configuration registers via sysfs entries. For further information, - see . + see . If unsure, say N. @@ -228,7 +228,7 @@ config MOUSE_APPLETOUCH scrolling in X11. For further information, see - . + . To compile this driver as a module, choose M here: the module will be called appletouch. @@ -251,7 +251,7 @@ config MOUSE_BCM5974 The interface is currently identical to the appletouch interface, for further information, see - . + . To compile this driver as a module, choose M here: the module will be called bcm5974. diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index cb5579716dba..0a6f7ca883e7 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -212,7 +212,7 @@ static void alps_set_abs_params_v7(struct alps_data *priv, static void alps_set_abs_params_ss4_v2(struct alps_data *priv, struct input_dev *dev1); -/* Packet formats are described in Documentation/input/alps.txt */ +/* Packet formats are described in Documentation/input/devices/alps.rst */ static bool alps_is_valid_first_byte(struct alps_data *priv, unsigned char data) diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c index fd714ee881f7..2566b4d8b342 100644 --- a/drivers/input/touchscreen/wm97xx-core.c +++ b/drivers/input/touchscreen/wm97xx-core.c @@ -68,7 +68,7 @@ * The default values correspond to Mainstone II in QVGA mode * * Please read - * Documentation/input/input-programming.txt for more details. + * Documentation/input/input-programming.rst for more details. */ static int abs_x[3] = {150, 4000, 5}; diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 00cd1f20a196..55e9442a99e2 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -38,7 +38,7 @@ void pblk_rb_data_free(struct pblk_rb *rb) /* * Initialize ring buffer. The data and metadata buffers must be previously * allocated and their size must be a power of two - * (Documentation/circular-buffers.txt) + * (Documentation/core-api/circular-buffers.rst) */ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, unsigned int power_size, unsigned int power_seg_sz) diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index 4d200883c505..17bf109c58e9 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -5,7 +5,7 @@ config BCACHE Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. - See Documentation/bcache.txt for details. + See Documentation/admin-guide/bcache.rst for details. config BCACHE_DEBUG bool "Bcache debugging" diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 2a0968c04e21..547c9eedc2f4 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -18,7 +18,7 @@ * as keys are inserted we only sort the pages that have not yet been written. * When garbage collection is run, we resort the entire node. * - * All configuration is done via sysfs; see Documentation/bcache.txt. + * All configuration is done via sysfs; see Documentation/admin-guide/bcache.rst. */ #include "bcache.h" diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index c334e6666461..1d096742eb41 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -18,7 +18,7 @@ * as keys are inserted we only sort the pages that have not yet been written. * When garbage collection is run, we resort the entire node. * - * All configuration is done via sysfs; see Documentation/bcache.txt. + * All configuration is done via sysfs; see Documentation/admin-guide/bcache.rst. */ #include "bcache.h" diff --git a/drivers/media/dvb-core/dvb_ringbuffer.c b/drivers/media/dvb-core/dvb_ringbuffer.c index 4330b6fa4af2..d1d471af0636 100644 --- a/drivers/media/dvb-core/dvb_ringbuffer.c +++ b/drivers/media/dvb-core/dvb_ringbuffer.c @@ -55,7 +55,7 @@ int dvb_ringbuffer_empty(struct dvb_ringbuffer *rbuf) * this pairs with smp_store_release() in dvb_ringbuffer_write(), * dvb_ringbuffer_write_user(), or dvb_ringbuffer_reset() * - * for memory barriers also see Documentation/circular-buffers.txt + * for memory barriers also see Documentation/core-api/circular-buffers.rst */ return (rbuf->pread == smp_load_acquire(&rbuf->pwrite)); } diff --git a/drivers/media/pci/meye/Kconfig b/drivers/media/pci/meye/Kconfig index 2e60334ffef5..9a50f54231ad 100644 --- a/drivers/media/pci/meye/Kconfig +++ b/drivers/media/pci/meye/Kconfig @@ -5,7 +5,7 @@ config VIDEO_MEYE ---help--- This is the video4linux driver for the Motion Eye camera found in the Vaio Picturebook laptops. Please read the material in - for more information. + for more information. If you say Y or M here, you need to say Y or M to "Sony Laptop Extras" in the misc device section. diff --git a/drivers/media/platform/pxa_camera.c b/drivers/media/platform/pxa_camera.c index 4d5a26b4cdda..d85ffbfb7c1f 100644 --- a/drivers/media/platform/pxa_camera.c +++ b/drivers/media/platform/pxa_camera.c @@ -1021,7 +1021,7 @@ static void pxa_camera_wakeup(struct pxa_camera_dev *pcdev, * - a videobuffer is queued on the pcdev->capture list * * Please check the "DMA hot chaining timeslice issue" in - * Documentation/video4linux/pxa_camera.txt + * Documentation/media/v4l-drivers/pxa_camera.rst * * Context: should only be called within the dma irq handler */ @@ -1443,7 +1443,7 @@ static void pxac_vb2_queue(struct vb2_buffer *vb) /* * Please check the DMA prepared buffer structure in : - * Documentation/video4linux/pxa_camera.txt + * Documentation/media/v4l-drivers/pxa_camera.rst * Please check also in pxa_camera_check_link_miss() to understand why DMA chain * modification while DMA chain is running will work anyway. */ diff --git a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c index 242342fd7ede..9897213f2618 100644 --- a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c +++ b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c @@ -1111,7 +1111,7 @@ static void sh_mobile_ceu_put_formats(struct soc_camera_device *icd) /* * CEU can scale and crop, but we don't want to waste bandwidth and kill the * framerate by always requesting the maximum image from the client. See - * Documentation/video4linux/sh_mobile_ceu_camera.txt for a description of + * Documentation/media/v4l-drivers/sh_mobile_ceu_camera.rst for a description of * scaling and cropping algorithms and for the meaning of referenced here steps. */ static int sh_mobile_ceu_set_selection(struct soc_camera_device *icd, diff --git a/drivers/media/radio/Kconfig b/drivers/media/radio/Kconfig index 39b04ad924c0..b426d6f9787d 100644 --- a/drivers/media/radio/Kconfig +++ b/drivers/media/radio/Kconfig @@ -272,7 +272,7 @@ config RADIO_RTRACK been reported to be used by these cards. More information is contained in the file - . + . To compile this driver as a module, choose M here: the module will be called radio-aimslab. diff --git a/drivers/media/radio/si470x/Kconfig b/drivers/media/radio/si470x/Kconfig index a21172e413a9..6dbb158cd2a0 100644 --- a/drivers/media/radio/si470x/Kconfig +++ b/drivers/media/radio/si470x/Kconfig @@ -29,7 +29,7 @@ config USB_SI470X Please have a look at the documentation, especially on how to redirect the audio stream from the radio to your sound device: - Documentation/video4linux/si470x.txt + Documentation/media/v4l-drivers/si470x.rst Say Y here if you want to connect this type of radio to your computer's USB port. diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index be26c029546b..39db6dc4b5cd 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -21,7 +21,7 @@ * * LME2510C + M88RS2000 * - * For firmware see Documentation/dvb/lmedm04.txt + * For firmware see Documentation/media/dvb-drivers/lmedm04.rst * * I2C addresses: * 0xd0 - STV0288 - Demodulator diff --git a/drivers/media/usb/zr364xx/Kconfig b/drivers/media/usb/zr364xx/Kconfig index 0f585662881d..ac429bca70e8 100644 --- a/drivers/media/usb/zr364xx/Kconfig +++ b/drivers/media/usb/zr364xx/Kconfig @@ -6,7 +6,7 @@ config USB_ZR364XX ---help--- Say Y here if you want to connect this type of camera to your computer's USB port. - See for more info + See for more info and list of supported cameras. To compile this driver as a module, choose M here: the diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig index 44333bd8f908..a97f4eada60b 100644 --- a/drivers/parport/Kconfig +++ b/drivers/parport/Kconfig @@ -20,7 +20,7 @@ menuconfig PARPORT drive, PLIP link (Parallel Line Internet Protocol is mainly used to create a mini network by connecting the parallel ports of two local machines) etc., then you need to say Y here; please read - and + and . For extensive information about drivers for many devices attaching @@ -33,7 +33,7 @@ menuconfig PARPORT the module will be called parport. If you have more than one parallel port and want to specify which port and IRQ to be used by this driver at module load time, take a - look at . + look at . If unsure, say Y. @@ -71,7 +71,7 @@ config PARPORT_PC_FIFO As well as actually having a FIFO, or DMA capability, the kernel will need to know which IRQ the parallel port has. By default, parallel port interrupts will not be used, and so neither will the - FIFO. See to find out how to + FIFO. See to find out how to specify which IRQ/DMA to use. config PARPORT_PC_SUPERIO diff --git a/drivers/staging/media/bcm2048/TODO b/drivers/staging/media/bcm2048/TODO index 051f85dbe89e..6bee2a2dad68 100644 --- a/drivers/staging/media/bcm2048/TODO +++ b/drivers/staging/media/bcm2048/TODO @@ -3,7 +3,7 @@ TODO: From the initial code review: The main thing you need to do is to implement all the controls using the -control framework (see Documentation/video4linux/v4l2-controls.txt). +control framework (see Documentation/media/kapi/v4l2-controls.rst). Most drivers are by now converted to the control framework, so you will find many examples of how to do this in drivers/media/radio. diff --git a/include/keys/asymmetric-subtype.h b/include/keys/asymmetric-subtype.h index 2480469ce8fb..e0a9c2368872 100644 --- a/include/keys/asymmetric-subtype.h +++ b/include/keys/asymmetric-subtype.h @@ -1,6 +1,6 @@ /* Asymmetric public-key cryptography key subtype * - * See Documentation/security/asymmetric-keys.txt + * See Documentation/crypto/asymmetric-keys.txt * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) diff --git a/include/keys/asymmetric-type.h b/include/keys/asymmetric-type.h index b38240716d41..1cb77cd5135e 100644 --- a/include/keys/asymmetric-type.h +++ b/include/keys/asymmetric-type.h @@ -1,6 +1,6 @@ /* Asymmetric Public-key cryptography key type interface * - * See Documentation/security/asymmetric-keys.txt + * See Documentation/crypto/asymmetric-keys.txt * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h index a89df3be1686..65e3832f96b2 100644 --- a/include/linux/assoc_array.h +++ b/include/linux/assoc_array.h @@ -1,6 +1,6 @@ /* Generic associative array implementation. * - * See Documentation/assoc_array.txt for information. + * See Documentation/core-api/assoc_array.rst for information. * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h index 711275e6681c..a00a06550c10 100644 --- a/include/linux/assoc_array_priv.h +++ b/include/linux/assoc_array_priv.h @@ -1,6 +1,6 @@ /* Private definitions for the generic associative array implementation. * - * See Documentation/assoc_array.txt for information. + * See Documentation/core-api/assoc_array.rst for information. * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) diff --git a/include/linux/circ_buf.h b/include/linux/circ_buf.h index 7cf262a421c3..b3233e8202f9 100644 --- a/include/linux/circ_buf.h +++ b/include/linux/circ_buf.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * See Documentation/circular-buffers.txt for more information. + * See Documentation/core-api/circular-buffers.rst for more information. */ #ifndef _LINUX_CIRC_BUF_H diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9c3c9a319e48..8154f4920fcb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Ftrace header. For implementation details beyond the random comments - * scattered below, see: Documentation/trace/ftrace-design.txt + * scattered below, see: Documentation/trace/ftrace-design.rst */ #ifndef _LINUX_FTRACE_H diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index e4b257ff881b..bc8206a8f30e 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -109,7 +109,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * * The barrier() is needed to make sure compiler doesn't cache first element [1], * as this loop can be restarted [2] - * [1] Documentation/atomic_ops.txt around line 114 + * [1] Documentation/core-api/atomic_ops.rst around line 114 * [2] Documentation/RCU/rculist_nulls.txt around line 146 */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index db9f15f5db04..c0d7ea0bf5b6 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -170,7 +170,7 @@ struct prctl_mm_map { * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. * - * See Documentation/prctl/no_new_privs.txt for more details. + * See Documentation/userspace-api/no_new_privs.rst for more details. */ #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h index 2a9510ade701..e2340a4130cf 100644 --- a/include/xen/interface/io/kbdif.h +++ b/include/xen/interface/io/kbdif.h @@ -317,7 +317,7 @@ struct xenkbd_position { * Linux [2] and Windows [3] multi-touch support. * * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml - * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt + * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.rst * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx * * diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d8b12e0d39cd..266f10cb7222 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -605,7 +605,7 @@ static inline int nr_cpusets(void) * load balancing domains (sched domains) as specified by that partial * partition. * - * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt + * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index dd6c0a2ad969..dcc0166d1997 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -12,22 +12,22 @@ config NOP_TRACER config HAVE_FTRACE_NMI_ENTER bool help - See Documentation/trace/ftrace-design.txt + See Documentation/trace/ftrace-design.rst config HAVE_FUNCTION_TRACER bool help - See Documentation/trace/ftrace-design.txt + See Documentation/trace/ftrace-design.rst config HAVE_FUNCTION_GRAPH_TRACER bool help - See Documentation/trace/ftrace-design.txt + See Documentation/trace/ftrace-design.rst config HAVE_DYNAMIC_FTRACE bool help - See Documentation/trace/ftrace-design.txt + See Documentation/trace/ftrace-design.rst config HAVE_DYNAMIC_FTRACE_WITH_REGS bool @@ -35,12 +35,12 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS config HAVE_FTRACE_MCOUNT_RECORD bool help - See Documentation/trace/ftrace-design.txt + See Documentation/trace/ftrace-design.rst config HAVE_SYSCALL_TRACEPOINTS bool help - See Documentation/trace/ftrace-design.txt + See Documentation/trace/ftrace-design.rst config HAVE_FENTRY bool @@ -448,7 +448,7 @@ config KPROBE_EVENTS help This allows the user to add tracing events (similar to tracepoints) on the fly via the ftrace interface. See - Documentation/trace/kprobetrace.txt for more details. + Documentation/trace/kprobetrace.rst for more details. Those events can be inserted wherever kprobes can probe, and record various register and memory values. @@ -575,7 +575,7 @@ config MMIOTRACE implementation and works via page faults. Tracing is disabled by default and can be enabled at run-time. - See Documentation/trace/mmiotrace.txt. + See Documentation/trace/mmiotrace.rst. If you are not helping to develop drivers, say N. config TRACING_MAP diff --git a/lib/Kconfig b/lib/Kconfig index 809fdd155739..e34b04b56057 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -405,7 +405,7 @@ config ASSOCIATIVE_ARRAY See: - Documentation/assoc_array.txt + Documentation/core-api/assoc_array.rst for more information. diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9a46dc24ac10..2b5ee5fbd652 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4728,7 +4728,7 @@ err_af: } /* This supports connect(2) and SCTP connect services such as sctp_connectx(3) - * and sctp_sendmsg(3) as described in Documentation/security/LSM-sctp.txt + * and sctp_sendmsg(3) as described in Documentation/security/LSM-sctp.rst */ static int selinux_socket_connect_helper(struct socket *sock, struct sockaddr *address, int addrlen) diff --git a/sound/core/Kconfig b/sound/core/Kconfig index 6e937a8146a1..63b3ef9c83f5 100644 --- a/sound/core/Kconfig +++ b/sound/core/Kconfig @@ -48,7 +48,7 @@ config SND_MIXER_OSS depends on SND_OSSEMUL help To enable OSS mixer API emulation (/dev/mixer*), say Y here - and read . + and read . Many programs still use the OSS API, so say Y. @@ -61,7 +61,7 @@ config SND_PCM_OSS select SND_PCM help To enable OSS digital audio (PCM) emulation (/dev/dsp*), say Y - here and read . + here and read . Many programs still use the OSS API, so say Y. diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 7144cc36e8ae..648a12da44f9 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -153,7 +153,7 @@ config SND_SERIAL_U16550 select SND_RAWMIDI help To include support for MIDI serial port interfaces, say Y here - and read . + and read . This driver works with serial UARTs 16550 and better. This driver accesses the serial port hardware directly, so @@ -223,7 +223,7 @@ config SND_AC97_POWER_SAVE the device frequently. A value of 10 seconds would be a good choice for normal operations. - See Documentation/sound/alsa/powersave.txt for more details. + See Documentation/sound/designs/powersave.rst for more details. config SND_AC97_POWER_SAVE_DEFAULT int "Default time-out for AC97 power-save mode" diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index d9f3fdb777e4..4105d9f653d9 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -175,7 +175,7 @@ config SND_BT87X help If you want to record audio from TV cards based on Brooktree Bt878/Bt879 chips, say Y here and read - . + . To compile this driver as a module, choose M here: the module will be called snd-bt87x. @@ -210,7 +210,7 @@ config SND_CMIPCI help If you want to use soundcards based on C-Media CMI8338, CMI8738, CMI8768 or CMI8770 chips, say Y here and read - . + . To compile this driver as a module, choose M here: the module will be called snd-cmipci. @@ -472,8 +472,8 @@ config SND_EMU10K1 Audigy and E-mu APS (partially supported) soundcards. The confusing multitude of mixer controls is documented in - and - . + and + . To compile this driver as a module, choose M here: the module will be called snd-emu10k1. @@ -735,7 +735,7 @@ config SND_MIXART select SND_PCM help If you want to use Digigram miXart soundcards, say Y here and - read . + read . To compile this driver as a module, choose M here: the module will be called snd-mixart. diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index db9f15f5db04..c0d7ea0bf5b6 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -170,7 +170,7 @@ struct prctl_mm_map { * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. * - * See Documentation/prctl/no_new_privs.txt for more details. + * See Documentation/userspace-api/no_new_privs.rst for more details. */ #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 6a12bbf39f7b..7aba8243a0e7 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -201,7 +201,7 @@ static void mem_toupper(char *f, size_t len) /* * Check for "NAME_PATH" environment variable to override fs location (for - * testing). This matches the recommendation in Documentation/sysfs-rules.txt + * testing). This matches the recommendation in Documentation/admin-guide/sysfs-rules.rst * for SYSFS_PATH. */ static bool fs__env_override(struct fs *fs) diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c index 29347756b0af..77e4891e17b0 100644 --- a/tools/perf/util/bpf-prologue.c +++ b/tools/perf/util/bpf-prologue.c @@ -61,7 +61,7 @@ check_pos(struct bpf_insn_pos *pos) /* * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see - * Documentation/trace/kprobetrace.txt) to size field of BPF_LDX_MEM + * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM * instruction (BPF_{B,H,W,DW}). */ static int diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg index 4f80ad7d7275..f8fcb06fd68b 100644 --- a/tools/power/pm-graph/config/custom-timeline-functions.cfg +++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg @@ -105,7 +105,7 @@ override-dev-timeline-functions: true # example: [color=#CC00CC] # # arglist: A list of arguments from registers/stack addresses. See URL: -# https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt +# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst # # example: cpu=%di:s32 # @@ -170,7 +170,7 @@ pm_restore_console: # example: [color=#CC00CC] # # arglist: A list of arguments from registers/stack addresses. See URL: -# https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt +# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst # # example: port=+36(%di):s32 # -- cgit From 6c3796d130ed2860489885a934dcb7bb334d5eb0 Mon Sep 17 00:00:00 2001 From: "bstroesser@ts.fujitsu.com" Date: Thu, 24 May 2018 18:49:41 +0200 Subject: scsi: target: tcmu: add read length support Generally target core and TCMUser seem to work fine for tape devices and media changers. But there is at least one situation where TCMUser is not able to support sequential access device emulation correctly. The situation is when an initiator sends a SCSI READ CDB with a length that is greater than the length of the tape block to read. We can distinguish two subcases: A) The initiator sent the READ CDB with the SILI bit being set. In this case the sequential access device has to transfer the data from the tape block (only the length of the tape block) and transmit a good status. The current interface between TCMUser and the userspace does not support reduction of the read data size by the userspace program. The patch below fixes this subcase by allowing the userspace program to specify a reduced data size in read direction. B) The initiator sent the READ CDB with the SILI bit not being set. In this case the sequential access device has to transfer the data from the tape block as in A), but additionally has to transmit CHECK CONDITION with the ILI bit set and NO SENSE in the sensebytes. The information field in the sensebytes must contain the residual count. With the below patch a user space program can specify the real read data length and appropriate sensebytes. TCMUser then uses the se_cmd flag SCF_TREAT_READ_AS_NORMAL, to force target core to transmit the real data size and the sensebytes. Note: the flag SCF_TREAT_READ_AS_NORMAL is introduced by Lee Duncan's patch "[PATCH v4] target: transport should handle st FM/EOM/ILI reads" from Tue, 15 May 2018 18:25:24 -0700. Signed-off-by: Bodo Stroesser Acked-by: Mike Christie Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 44 ++++++++++++++++++++++++++++------- include/uapi/linux/target_core_user.h | 4 +++- 2 files changed, 39 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 7f96dfa32b9c..d8dc3d22051f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -656,7 +656,7 @@ static void scatter_data_area(struct tcmu_dev *udev, } static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, - bool bidi) + bool bidi, uint32_t read_len) { struct se_cmd *se_cmd = cmd->se_cmd; int i, dbi; @@ -689,7 +689,7 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, for_each_sg(data_sg, sg, data_nents, i) { int sg_remaining = sg->length; to = kmap_atomic(sg_page(sg)) + sg->offset; - while (sg_remaining > 0) { + while (sg_remaining > 0 && read_len > 0) { if (block_remaining == 0) { if (from) kunmap_atomic(from); @@ -701,6 +701,8 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, } copy_bytes = min_t(size_t, sg_remaining, block_remaining); + if (read_len < copy_bytes) + copy_bytes = read_len; offset = DATA_BLOCK_SIZE - block_remaining; tcmu_flush_dcache_range(from, copy_bytes); memcpy(to + sg->length - sg_remaining, from + offset, @@ -708,8 +710,11 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, sg_remaining -= copy_bytes; block_remaining -= copy_bytes; + read_len -= copy_bytes; } kunmap_atomic(to - sg->offset); + if (read_len == 0) + break; } if (from) kunmap_atomic(from); @@ -1042,6 +1047,8 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * { struct se_cmd *se_cmd = cmd->se_cmd; struct tcmu_dev *udev = cmd->tcmu_dev; + bool read_len_valid = false; + uint32_t read_len = se_cmd->data_length; /* * cmd has been completed already from timeout, just reclaim @@ -1056,13 +1063,28 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n", cmd->se_cmd); entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION; - } else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { + goto done; + } + + if (se_cmd->data_direction == DMA_FROM_DEVICE && + (entry->hdr.uflags & TCMU_UFLAG_READ_LEN) && entry->rsp.read_len) { + read_len_valid = true; + if (entry->rsp.read_len < read_len) + read_len = entry->rsp.read_len; + } + + if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { transport_copy_sense_to_cmd(se_cmd, entry->rsp.sense_buffer); - } else if (se_cmd->se_cmd_flags & SCF_BIDI) { + if (!read_len_valid ) + goto done; + else + se_cmd->se_cmd_flags |= SCF_TREAT_READ_AS_NORMAL; + } + if (se_cmd->se_cmd_flags & SCF_BIDI) { /* Get Data-In buffer before clean up */ - gather_data_area(udev, cmd, true); + gather_data_area(udev, cmd, true, read_len); } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { - gather_data_area(udev, cmd, false); + gather_data_area(udev, cmd, false, read_len); } else if (se_cmd->data_direction == DMA_TO_DEVICE) { /* TODO: */ } else if (se_cmd->data_direction != DMA_NONE) { @@ -1070,7 +1092,13 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * se_cmd->data_direction); } - target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); +done: + if (read_len_valid) { + pr_debug("read_len = %d\n", read_len); + target_complete_cmd_with_length(cmd->se_cmd, + entry->rsp.scsi_status, read_len); + } else + target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); out: cmd->se_cmd = NULL; @@ -1740,7 +1768,7 @@ static int tcmu_configure_device(struct se_device *dev) /* Initialise the mailbox of the ring buffer */ mb = udev->mb_addr; mb->version = TCMU_MAILBOX_VERSION; - mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC; + mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC | TCMU_MAILBOX_FLAG_CAP_READ_LEN; mb->cmdr_off = CMDR_OFF; mb->cmdr_size = udev->cmdr_size; diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 6e299349b158..b7b57967d90f 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -44,6 +44,7 @@ #define TCMU_MAILBOX_VERSION 2 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */ #define TCMU_MAILBOX_FLAG_CAP_OOOC (1 << 0) /* Out-of-order completions */ +#define TCMU_MAILBOX_FLAG_CAP_READ_LEN (1 << 1) /* Read data length */ struct tcmu_mailbox { __u16 version; @@ -71,6 +72,7 @@ struct tcmu_cmd_entry_hdr { __u16 cmd_id; __u8 kflags; #define TCMU_UFLAG_UNKNOWN_OP 0x1 +#define TCMU_UFLAG_READ_LEN 0x2 __u8 uflags; } __packed; @@ -119,7 +121,7 @@ struct tcmu_cmd_entry { __u8 scsi_status; __u8 __pad1; __u16 __pad2; - __u32 __pad3; + __u32 read_len; char sense_buffer[TCMU_SENSE_BUFFERSIZE]; } rsp; }; -- cgit From 08ba91ee6e2c1c08d3f0648f978cbb5dbf3491d8 Mon Sep 17 00:00:00 2001 From: Doron Roberts-Kedes Date: Fri, 15 Jun 2018 14:05:32 -0700 Subject: nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag. If NBD_DISCONNECT_ON_CLOSE is set on a device, then the driver will issue a disconnect from nbd_release if the device has no remaining bdev->bd_openers. Fix ret val so reconfigure with only setting the flag succeeds. Reviewed-by: Josef Bacik Signed-off-by: Doron Roberts-Kedes Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 42 ++++++++++++++++++++++++++++++++++-------- include/uapi/linux/nbd.h | 3 +++ 2 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3b7083b8ecbb..74a05561b620 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -76,6 +76,7 @@ struct link_dead_args { #define NBD_HAS_CONFIG_REF 4 #define NBD_BOUND 5 #define NBD_DESTROY_ON_DISCONNECT 6 +#define NBD_DISCONNECT_ON_CLOSE 7 struct nbd_config { u32 flags; @@ -138,6 +139,7 @@ static void nbd_config_put(struct nbd_device *nbd); static void nbd_connect_reply(struct genl_info *info, int index); static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); static void nbd_dead_link_work(struct work_struct *work); +static void nbd_disconnect_and_put(struct nbd_device *nbd); static inline struct device *nbd_to_dev(struct nbd_device *nbd) { @@ -1305,6 +1307,12 @@ out: static void nbd_release(struct gendisk *disk, fmode_t mode) { struct nbd_device *nbd = disk->private_data; + struct block_device *bdev = bdget_disk(disk, 0); + + if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && + bdev->bd_openers == 0) + nbd_disconnect_and_put(nbd); + nbd_config_put(nbd); nbd_put(nbd); } @@ -1705,6 +1713,10 @@ again: &config->runtime_flags); put_dev = true; } + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { + set_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } } if (info->attrs[NBD_ATTR_SOCKETS]) { @@ -1749,6 +1761,17 @@ out: return ret; } +static void nbd_disconnect_and_put(struct nbd_device *nbd) +{ + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + nbd_clear_sock(nbd); + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); +} + static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) { struct nbd_device *nbd; @@ -1781,13 +1804,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) nbd_put(nbd); return 0; } - mutex_lock(&nbd->config_lock); - nbd_disconnect(nbd); - nbd_clear_sock(nbd); - mutex_unlock(&nbd->config_lock); - if (test_and_clear_bit(NBD_HAS_CONFIG_REF, - &nbd->config->runtime_flags)) - nbd_config_put(nbd); + nbd_disconnect_and_put(nbd); nbd_config_put(nbd); nbd_put(nbd); return 0; @@ -1798,7 +1815,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) struct nbd_device *nbd = NULL; struct nbd_config *config; int index; - int ret = -EINVAL; + int ret = 0; bool put_dev = false; if (!netlink_capable(skb, CAP_SYS_ADMIN)) @@ -1838,6 +1855,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) !nbd->task_recv) { dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); + ret = -EINVAL; goto out; } @@ -1862,6 +1880,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) &config->runtime_flags)) refcount_inc(&nbd->refs); } + + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { + set_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } else { + clear_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } } if (info->attrs[NBD_ATTR_SOCKETS]) { diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index 85a3fb65e40a..20d6cc91435d 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -53,6 +53,9 @@ enum { /* These are client behavior specific flags. */ #define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on disconnect. */ +#define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on + * close by last opener. + */ /* userspace doesn't need the nbd_device structure */ -- cgit From a11e1d432b51f63ba698d044441284a661f01144 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jun 2018 09:43:44 -0700 Subject: Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL The poll() changes were not well thought out, and completely unexplained. They also caused a huge performance regression, because "->poll()" was no longer a trivial file operation that just called down to the underlying file operations, but instead did at least two indirect calls. Indirect calls are sadly slow now with the Spectre mitigation, but the performance problem could at least be largely mitigated by changing the "->get_poll_head()" operation to just have a per-file-descriptor pointer to the poll head instead. That gets rid of one of the new indirections. But that doesn't fix the new complexity that is completely unwarranted for the regular case. The (undocumented) reason for the poll() changes was some alleged AIO poll race fixing, but we don't make the common case slower and more complex for some uncommon special case, so this all really needs way more explanations and most likely a fundamental redesign. [ This revert is a revert of about 30 different commits, not reverted individually because that would just be unnecessarily messy - Linus ] Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 7 +- Documentation/filesystems/vfs.txt | 13 ---- crypto/af_alg.c | 13 +++- crypto/algif_aead.c | 4 +- crypto/algif_skcipher.c | 4 +- drivers/char/random.c | 29 ++++---- drivers/isdn/mISDN/socket.c | 2 +- drivers/net/ppp/pppoe.c | 2 +- fs/aio.c | 148 +------------------------------------- fs/eventfd.c | 19 ++--- fs/eventpoll.c | 15 ++-- fs/pipe.c | 22 +++--- fs/select.c | 23 ------ fs/timerfd.c | 22 +++--- include/crypto/if_alg.h | 3 +- include/linux/fs.h | 2 - include/linux/net.h | 1 - include/linux/poll.h | 12 ++-- include/linux/skbuff.h | 3 +- include/net/bluetooth/bluetooth.h | 2 +- include/net/iucv/af_iucv.h | 2 + include/net/sctp/sctp.h | 3 +- include/net/tcp.h | 3 +- include/net/tls.h | 6 +- include/net/udp.h | 2 +- include/uapi/linux/aio_abi.h | 8 ++- net/appletalk/ddp.c | 2 +- net/atm/common.c | 11 ++- net/atm/common.h | 2 +- net/atm/pvc.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/af_bluetooth.c | 7 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- net/caif/caif_socket.c | 12 ++-- net/can/bcm.c | 2 +- net/can/raw.c | 2 +- net/core/datagram.c | 13 ++-- net/dccp/dccp.h | 3 +- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/dccp/proto.c | 13 +++- net/decnet/af_decnet.c | 6 +- net/ieee802154/socket.c | 4 +- net/ipv4/af_inet.c | 8 +-- net/ipv4/tcp.c | 23 ++++-- net/ipv4/udp.c | 10 +-- net/ipv6/af_inet6.c | 4 +- net/ipv6/raw.c | 4 +- net/iucv/af_iucv.c | 7 +- net/kcm/kcmsock.c | 10 +-- net/key/af_key.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- net/l2tp/l2tp_ppp.c | 2 +- net/llc/af_llc.c | 2 +- net/netlink/af_netlink.c | 2 +- net/netrom/af_netrom.c | 2 +- net/nfc/llcp_sock.c | 9 ++- net/nfc/rawsock.c | 4 +- net/packet/af_packet.c | 9 +-- net/phonet/socket.c | 9 ++- net/qrtr/qrtr.c | 2 +- net/rose/af_rose.c | 2 +- net/rxrpc/af_rxrpc.c | 10 ++- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/sctp/socket.c | 4 +- net/smc/af_smc.c | 12 +++- net/socket.c | 48 ++----------- net/tipc/socket.c | 14 ++-- net/tls/tls_main.c | 2 +- net/tls/tls_sw.c | 19 ++--- net/unix/af_unix.c | 30 +++++--- net/vmw_vsock/af_vsock.c | 19 +++-- net/x25/af_x25.c | 2 +- net/xdp/xsk.c | 7 +- 80 files changed, 301 insertions(+), 450 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2c391338c675..37bf0a9de75c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -441,8 +441,6 @@ prototypes: int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -473,7 +471,7 @@ prototypes: }; locking rules: - All except for ->poll_mask may block. + All may block. ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you @@ -505,9 +503,6 @@ in sys_read() and friends. the lease within the individual filesystem to record the result of the operation -->poll_mask can be called with or without the waitqueue lock for the waitqueue -returned from ->get_poll_head. - --------------------------- dquot_operations ------------------------------- prototypes: int (*write_dquot) (struct dquot *); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 829a7b7857a4..f608180ad59d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -857,8 +857,6 @@ struct file_operations { ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -903,17 +901,6 @@ otherwise noted. activity on this file and (optionally) go to sleep until there is activity. Called by the select(2) and poll(2) system calls - get_poll_head: Returns the struct wait_queue_head that callers can - wait on. Callers need to check the returned events using ->poll_mask - once woken. Can return NULL to indicate polling is not supported, - or any error code using the ERR_PTR convention to indicate that a - grave error occured and ->poll_mask shall not be called. - - poll_mask: return the mask of EPOLL* values describing the file descriptor - state. Called either before going to sleep on the waitqueue returned by - get_poll_head, or after it has been woken. If ->get_poll_head and - ->poll_mask are implemented ->poll does not need to be implement. - unlocked_ioctl: called by the ioctl(2) system call. compat_ioctl: called by the ioctl(2) system call when 32 bit system calls diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 49fa8582138b..314c52c967e5 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1060,12 +1060,19 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err) } EXPORT_SYMBOL_GPL(af_alg_async_cb); -__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events) +/** + * af_alg_poll - poll system call handler + */ +__poll_t af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct af_alg_ctx *ctx = ask->private; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; if (!ctx->more || ctx->used) mask |= EPOLLIN | EPOLLRDNORM; @@ -1075,7 +1082,7 @@ __poll_t af_alg_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL_GPL(af_alg_poll_mask); +EXPORT_SYMBOL_GPL(af_alg_poll); /** * af_alg_alloc_areq - allocate struct af_alg_async_req diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 825524f27438..c40a8c7ee8ae 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -375,7 +375,7 @@ static struct proto_ops algif_aead_ops = { .sendmsg = aead_sendmsg, .sendpage = af_alg_sendpage, .recvmsg = aead_recvmsg, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static int aead_check_key(struct socket *sock) @@ -471,7 +471,7 @@ static struct proto_ops algif_aead_ops_nokey = { .sendmsg = aead_sendmsg_nokey, .sendpage = aead_sendpage_nokey, .recvmsg = aead_recvmsg_nokey, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static void *aead_bind(const char *name, u32 type, u32 mask) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 4c04eb9888ad..cfdaab2b7d76 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -206,7 +206,7 @@ static struct proto_ops algif_skcipher_ops = { .sendmsg = skcipher_sendmsg, .sendpage = af_alg_sendpage, .recvmsg = skcipher_recvmsg, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static int skcipher_check_key(struct socket *sock) @@ -302,7 +302,7 @@ static struct proto_ops algif_skcipher_ops_nokey = { .sendmsg = skcipher_sendmsg_nokey, .sendpage = skcipher_sendpage_nokey, .recvmsg = skcipher_recvmsg_nokey, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static void *skcipher_bind(const char *name, u32 type, u32 mask) diff --git a/drivers/char/random.c b/drivers/char/random.c index a8fb0020ba5c..cd888d4ee605 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -402,7 +402,8 @@ static struct poolinfo { /* * Static global variables */ -static DECLARE_WAIT_QUEUE_HEAD(random_wait); +static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); +static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); @@ -721,8 +722,8 @@ retry: /* should we wake readers? */ if (entropy_bits >= random_read_wakeup_bits && - wq_has_sleeper(&random_wait)) { - wake_up_interruptible_poll(&random_wait, POLLIN); + wq_has_sleeper(&random_read_wait)) { + wake_up_interruptible(&random_read_wait); kill_fasync(&fasync, SIGIO, POLL_IN); } /* If the input pool is getting full, send some @@ -1396,7 +1397,7 @@ retry: trace_debit_entropy(r->name, 8 * ibytes); if (ibytes && (r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) { - wake_up_interruptible_poll(&random_wait, POLLOUT); + wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1838,7 +1839,7 @@ _random_read(int nonblock, char __user *buf, size_t nbytes) if (nonblock) return -EAGAIN; - wait_event_interruptible(random_wait, + wait_event_interruptible(random_read_wait, ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits); if (signal_pending(current)) @@ -1875,17 +1876,14 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) return ret; } -static struct wait_queue_head * -random_get_poll_head(struct file *file, __poll_t events) -{ - return &random_wait; -} - static __poll_t -random_poll_mask(struct file *file, __poll_t events) +random_poll(struct file *file, poll_table * wait) { - __poll_t mask = 0; + __poll_t mask; + poll_wait(file, &random_read_wait, wait); + poll_wait(file, &random_write_wait, wait); + mask = 0; if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits) mask |= EPOLLIN | EPOLLRDNORM; if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits) @@ -1992,8 +1990,7 @@ static int random_fasync(int fd, struct file *filp, int on) const struct file_operations random_fops = { .read = random_read, .write = random_write, - .get_poll_head = random_get_poll_head, - .poll_mask = random_poll_mask, + .poll = random_poll, .unlocked_ioctl = random_ioctl, .fasync = random_fasync, .llseek = noop_llseek, @@ -2326,7 +2323,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, * We'll be woken up again once below random_write_wakeup_thresh, * or when the calling thread is about to terminate. */ - wait_event_interruptible(random_wait, kthread_should_stop() || + wait_event_interruptible(random_write_wait, kthread_should_stop() || ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); mix_pool_bytes(poolp, buffer, count); credit_entropy_bits(poolp, entropy); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 98f90aadd141..18c0a1281914 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -588,7 +588,7 @@ static const struct proto_ops data_sock_ops = { .getname = data_sock_getname, .sendmsg = mISDN_sock_sendmsg, .recvmsg = mISDN_sock_recvmsg, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = data_sock_setsockopt, diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index de51e8f70f44..ce61231e96ea 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1107,7 +1107,7 @@ static const struct proto_ops pppoe_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppoe_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, diff --git a/fs/aio.c b/fs/aio.c index e1d20124ec0e..210df9da1283 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -5,7 +5,6 @@ * Implements an efficient asynchronous io interface. * * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. - * Copyright 2018 Christoph Hellwig. * * See ../COPYING for licensing terms. */ @@ -165,22 +164,10 @@ struct fsync_iocb { bool datasync; }; -struct poll_iocb { - struct file *file; - __poll_t events; - struct wait_queue_head *head; - - union { - struct wait_queue_entry wait; - struct work_struct work; - }; -}; - struct aio_kiocb { union { struct kiocb rw; struct fsync_iocb fsync; - struct poll_iocb poll; }; struct kioctx *ki_ctx; @@ -1590,6 +1577,7 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)) return -EINVAL; + req->file = fget(iocb->aio_fildes); if (unlikely(!req->file)) return -EBADF; @@ -1604,137 +1592,6 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) return 0; } -/* need to use list_del_init so we can check if item was present */ -static inline bool __aio_poll_remove(struct poll_iocb *req) -{ - if (list_empty(&req->wait.entry)) - return false; - list_del_init(&req->wait.entry); - return true; -} - -static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) -{ - fput(iocb->poll.file); - aio_complete(iocb, mangle_poll(mask), 0); -} - -static void aio_poll_work(struct work_struct *work) -{ - struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work); - - if (!list_empty_careful(&iocb->ki_list)) - aio_remove_iocb(iocb); - __aio_poll_complete(iocb, iocb->poll.events); -} - -static int aio_poll_cancel(struct kiocb *iocb) -{ - struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); - struct poll_iocb *req = &aiocb->poll; - struct wait_queue_head *head = req->head; - bool found = false; - - spin_lock(&head->lock); - found = __aio_poll_remove(req); - spin_unlock(&head->lock); - - if (found) { - req->events = 0; - INIT_WORK(&req->work, aio_poll_work); - schedule_work(&req->work); - } - return 0; -} - -static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, - void *key) -{ - struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); - struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); - struct file *file = req->file; - __poll_t mask = key_to_poll(key); - - assert_spin_locked(&req->head->lock); - - /* for instances that support it check for an event match first: */ - if (mask && !(mask & req->events)) - return 0; - - mask = file->f_op->poll_mask(file, req->events) & req->events; - if (!mask) - return 0; - - __aio_poll_remove(req); - - /* - * Try completing without a context switch if we can acquire ctx_lock - * without spinning. Otherwise we need to defer to a workqueue to - * avoid a deadlock due to the lock order. - */ - if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { - list_del_init(&iocb->ki_list); - spin_unlock(&iocb->ki_ctx->ctx_lock); - - __aio_poll_complete(iocb, mask); - } else { - req->events = mask; - INIT_WORK(&req->work, aio_poll_work); - schedule_work(&req->work); - } - - return 1; -} - -static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) -{ - struct kioctx *ctx = aiocb->ki_ctx; - struct poll_iocb *req = &aiocb->poll; - __poll_t mask; - - /* reject any unknown events outside the normal event mask. */ - if ((u16)iocb->aio_buf != iocb->aio_buf) - return -EINVAL; - /* reject fields that are not defined for poll */ - if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags) - return -EINVAL; - - req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; - req->file = fget(iocb->aio_fildes); - if (unlikely(!req->file)) - return -EBADF; - if (!file_has_poll_mask(req->file)) - goto out_fail; - - req->head = req->file->f_op->get_poll_head(req->file, req->events); - if (!req->head) - goto out_fail; - if (IS_ERR(req->head)) { - mask = EPOLLERR; - goto done; - } - - init_waitqueue_func_entry(&req->wait, aio_poll_wake); - aiocb->ki_cancel = aio_poll_cancel; - - spin_lock_irq(&ctx->ctx_lock); - spin_lock(&req->head->lock); - mask = req->file->f_op->poll_mask(req->file, req->events) & req->events; - if (!mask) { - __add_wait_queue(req->head, &req->wait); - list_add_tail(&aiocb->ki_list, &ctx->active_reqs); - } - spin_unlock(&req->head->lock); - spin_unlock_irq(&ctx->ctx_lock); -done: - if (mask) - __aio_poll_complete(aiocb, mask); - return 0; -out_fail: - fput(req->file); - return -EINVAL; /* same as no support for IOCB_CMD_POLL */ -} - static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, bool compat) { @@ -1808,9 +1665,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, case IOCB_CMD_FDSYNC: ret = aio_fsync(&req->fsync, &iocb, true); break; - case IOCB_CMD_POLL: - ret = aio_poll(req, &iocb); - break; default: pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); ret = -EINVAL; diff --git a/fs/eventfd.c b/fs/eventfd.c index ceb1031f1cac..08d3bd602f73 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -101,20 +101,14 @@ static int eventfd_release(struct inode *inode, struct file *file) return 0; } -static struct wait_queue_head * -eventfd_get_poll_head(struct file *file, __poll_t events) -{ - struct eventfd_ctx *ctx = file->private_data; - - return &ctx->wqh; -} - -static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) +static __poll_t eventfd_poll(struct file *file, poll_table *wait) { struct eventfd_ctx *ctx = file->private_data; __poll_t events = 0; u64 count; + poll_wait(file, &ctx->wqh, wait); + /* * All writes to ctx->count occur within ctx->wqh.lock. This read * can be done outside ctx->wqh.lock because we know that poll_wait @@ -156,11 +150,11 @@ static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) count = READ_ONCE(ctx->count); if (count > 0) - events |= (EPOLLIN & eventmask); + events |= EPOLLIN; if (count == ULLONG_MAX) events |= EPOLLERR; if (ULLONG_MAX - 1 > count) - events |= (EPOLLOUT & eventmask); + events |= EPOLLOUT; return events; } @@ -311,8 +305,7 @@ static const struct file_operations eventfd_fops = { .show_fdinfo = eventfd_show_fdinfo, #endif .release = eventfd_release, - .get_poll_head = eventfd_get_poll_head, - .poll_mask = eventfd_poll_mask, + .poll = eventfd_poll, .read = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ea4436f409fb..67db22fe99c5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -922,18 +922,14 @@ static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head return 0; } -static struct wait_queue_head *ep_eventpoll_get_poll_head(struct file *file, - __poll_t eventmask) -{ - struct eventpoll *ep = file->private_data; - return &ep->poll_wait; -} - -static __poll_t ep_eventpoll_poll_mask(struct file *file, __poll_t eventmask) +static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) { struct eventpoll *ep = file->private_data; int depth = 0; + /* Insert inside our poll wait queue */ + poll_wait(file, &ep->poll_wait, wait); + /* * Proceed to find out if wanted events are really available inside * the ready list. @@ -972,8 +968,7 @@ static const struct file_operations eventpoll_fops = { .show_fdinfo = ep_show_fdinfo, #endif .release = ep_eventpoll_release, - .get_poll_head = ep_eventpoll_get_poll_head, - .poll_mask = ep_eventpoll_poll_mask, + .poll = ep_eventpoll_poll, .llseek = noop_llseek, }; diff --git a/fs/pipe.c b/fs/pipe.c index bb0840e234f3..39d6f431da83 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -509,22 +509,19 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } -static struct wait_queue_head * -pipe_get_poll_head(struct file *filp, __poll_t events) -{ - struct pipe_inode_info *pipe = filp->private_data; - - return &pipe->wait; -} - /* No kernel lock held - fine */ -static __poll_t pipe_poll_mask(struct file *filp, __poll_t events) +static __poll_t +pipe_poll(struct file *filp, poll_table *wait) { + __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; - int nrbufs = pipe->nrbufs; - __poll_t mask = 0; + int nrbufs; + + poll_wait(filp, &pipe->wait, wait); /* Reading only -- no need for acquiring the semaphore. */ + nrbufs = pipe->nrbufs; + mask = 0; if (filp->f_mode & FMODE_READ) { mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0; if (!pipe->writers && filp->f_version != pipe->w_counter) @@ -1023,8 +1020,7 @@ const struct file_operations pipefifo_fops = { .llseek = no_llseek, .read_iter = pipe_read, .write_iter = pipe_write, - .get_poll_head = pipe_get_poll_head, - .poll_mask = pipe_poll_mask, + .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, diff --git a/fs/select.c b/fs/select.c index 317891ff8165..4a6b6e4b21cb 100644 --- a/fs/select.c +++ b/fs/select.c @@ -34,29 +34,6 @@ #include -__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) -{ - if (file->f_op->poll) { - return file->f_op->poll(file, pt); - } else if (file_has_poll_mask(file)) { - unsigned int events = poll_requested_events(pt); - struct wait_queue_head *head; - - if (pt && pt->_qproc) { - head = file->f_op->get_poll_head(file, events); - if (!head) - return DEFAULT_POLLMASK; - if (IS_ERR(head)) - return EPOLLERR; - pt->_qproc(file, head, pt); - } - - return file->f_op->poll_mask(file, events); - } else { - return DEFAULT_POLLMASK; - } -} -EXPORT_SYMBOL_GPL(vfs_poll); /* * Estimate expected accuracy in ns from a timeval. diff --git a/fs/timerfd.c b/fs/timerfd.c index d84a2bee4f82..cdad49da3ff7 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -226,20 +226,21 @@ static int timerfd_release(struct inode *inode, struct file *file) kfree_rcu(ctx, rcu); return 0; } - -static struct wait_queue_head *timerfd_get_poll_head(struct file *file, - __poll_t eventmask) + +static __poll_t timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; + __poll_t events = 0; + unsigned long flags; - return &ctx->wqh; -} + poll_wait(file, &ctx->wqh, wait); -static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask) -{ - struct timerfd_ctx *ctx = file->private_data; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->ticks) + events |= EPOLLIN; + spin_unlock_irqrestore(&ctx->wqh.lock, flags); - return ctx->ticks ? EPOLLIN : 0; + return events; } static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, @@ -363,8 +364,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg static const struct file_operations timerfd_fops = { .release = timerfd_release, - .get_poll_head = timerfd_get_poll_head, - .poll_mask = timerfd_poll_mask, + .poll = timerfd_poll, .read = timerfd_read, .llseek = noop_llseek, .show_fdinfo = timerfd_show, diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index cc414db9da0a..482461d8931d 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -245,7 +245,8 @@ ssize_t af_alg_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); void af_alg_free_resources(struct af_alg_async_req *areq); void af_alg_async_cb(struct crypto_async_request *_req, int err); -__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events); +__poll_t af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait); struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk, unsigned int areqlen); int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c91108846db..d78d146a98da 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1720,8 +1720,6 @@ struct file_operations { int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); diff --git a/include/linux/net.h b/include/linux/net.h index 08b6eb964dd6..6554d3ba4396 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -147,7 +147,6 @@ struct proto_ops { int (*getname) (struct socket *sock, struct sockaddr *addr, int peer); - __poll_t (*poll_mask) (struct socket *sock, __poll_t events); __poll_t (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, diff --git a/include/linux/poll.h b/include/linux/poll.h index fdf86b4cbc71..7e0fdcf905d2 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -74,18 +74,18 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) pt->_key = ~(__poll_t)0; /* all events enabled */ } -static inline bool file_has_poll_mask(struct file *file) +static inline bool file_can_poll(struct file *file) { - return file->f_op->get_poll_head && file->f_op->poll_mask; + return file->f_op->poll; } -static inline bool file_can_poll(struct file *file) +static inline __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) { - return file->f_op->poll || file_has_poll_mask(file); + if (unlikely(!file->f_op->poll)) + return DEFAULT_POLLMASK; + return file->f_op->poll(file, pt); } -__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt); - struct poll_table_entry { struct file *filp; __poll_t key; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c86885954994..164cdedf6012 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3252,7 +3252,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events); +__poll_t datagram_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 53ce8176c313..ec9d6bc65855 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -271,7 +271,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events); +__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); int bt_sock_wait_ready(struct sock *sk, unsigned long flags); diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index b0eaeb02d46d..f4c21b5a1242 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -153,6 +153,8 @@ struct iucv_sock_list { atomic_t autobind_name; }; +__poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait); void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); void iucv_accept_enqueue(struct sock *parent, struct sock *sk); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 30b3e2fe240a..8c2caa370e0f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -109,7 +109,8 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); void sctp_data_ready(struct sock *sk); -__poll_t sctp_poll_mask(struct socket *sock, __poll_t events); +__poll_t sctp_poll(struct file *file, struct socket *sock, + poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); diff --git a/include/net/tcp.h b/include/net/tcp.h index 0448e7c5d2b4..800582b5dd54 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -388,7 +388,8 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); -__poll_t tcp_poll_mask(struct socket *sock, __poll_t events); +__poll_t tcp_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, diff --git a/include/net/tls.h b/include/net/tls.h index 7f84ea3e217c..70c273777fe9 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -109,7 +109,8 @@ struct tls_sw_context_rx { struct strparser strp; void (*saved_data_ready)(struct sock *sk); - __poll_t (*sk_poll_mask)(struct socket *sock, __poll_t events); + unsigned int (*sk_poll)(struct file *file, struct socket *sock, + struct poll_table_struct *wait); struct sk_buff *recv_pkt; u8 control; bool decrypted; @@ -224,7 +225,8 @@ void tls_sw_free_resources_tx(struct sock *sk); void tls_sw_free_resources_rx(struct sock *sk); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events); +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/include/net/udp.h b/include/net/udp.h index b1ea8b0f5e6a..81afdacd4fff 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -285,7 +285,7 @@ int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); -__poll_t udp_poll_mask(struct socket *sock, __poll_t events); +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6); diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index d00221345c19..d4e768d55d14 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -39,8 +39,10 @@ enum { IOCB_CMD_PWRITE = 1, IOCB_CMD_FSYNC = 2, IOCB_CMD_FDSYNC = 3, - /* 4 was the experimental IOCB_CMD_PREADX */ - IOCB_CMD_POLL = 5, + /* These two are experimental. + * IOCB_CMD_PREADX = 4, + * IOCB_CMD_POLL = 5, + */ IOCB_CMD_NOOP = 6, IOCB_CMD_PREADV = 7, IOCB_CMD_PWRITEV = 8, @@ -109,7 +111,7 @@ struct iocb { #undef IFLITTLE struct __aio_sigset { - const sigset_t __user *sigmask; + sigset_t __user *sigmask; size_t sigsetsize; }; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 55fdba05d7d9..9b6bc5abe946 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1869,7 +1869,7 @@ static const struct proto_ops atalk_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = atalk_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = atalk_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = atalk_compat_ioctl, diff --git a/net/atm/common.c b/net/atm/common.c index ff5748b2190f..a7a68e509628 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -647,11 +647,16 @@ out: return error; } -__poll_t vcc_poll_mask(struct socket *sock, __poll_t events) +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - struct atm_vcc *vcc = ATM_SD(sock); - __poll_t mask = 0; + struct atm_vcc *vcc; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + vcc = ATM_SD(sock); /* exceptional events */ if (sk->sk_err) diff --git a/net/atm/common.h b/net/atm/common.h index 526796ad230f..5850649068bb 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); -__poll_t vcc_poll_mask(struct socket *sock, __poll_t events); +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_setsockopt(struct socket *sock, int level, int optname, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 9f75092fe778..2cb10af16afc 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -113,7 +113,7 @@ static const struct proto_ops pvc_proto_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pvc_getname, - .poll_mask = vcc_poll_mask, + .poll = vcc_poll, .ioctl = vcc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vcc_compat_ioctl, diff --git a/net/atm/svc.c b/net/atm/svc.c index 53f4ad7087b1..2f91b766ac42 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -636,7 +636,7 @@ static const struct proto_ops svc_proto_ops = { .socketpair = sock_no_socketpair, .accept = svc_accept, .getname = svc_getname, - .poll_mask = vcc_poll_mask, + .poll = vcc_poll, .ioctl = svc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = svc_compat_ioctl, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d1d2442ce573..c603d33d5410 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1941,7 +1941,7 @@ static const struct proto_ops ax25_proto_ops = { .socketpair = sock_no_socketpair, .accept = ax25_accept, .getname = ax25_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ax25_ioctl, .listen = ax25_listen, .shutdown = ax25_shutdown, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 510ab4f55df5..3264e1873219 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -437,13 +437,16 @@ static inline __poll_t bt_accept_poll(struct sock *parent) return 0; } -__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events) +__poll_t bt_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; BT_DBG("sock %p, sk %p", sock, sk); + poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); @@ -475,7 +478,7 @@ __poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(bt_sock_poll_mask); +EXPORT_SYMBOL(bt_sock_poll); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d6c099861538..1506e1632394 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1975,7 +1975,7 @@ static const struct proto_ops hci_sock_ops = { .sendmsg = hci_sock_sendmsg, .recvmsg = hci_sock_recvmsg, .ioctl = hci_sock_ioctl, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = hci_sock_setsockopt, diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 742a190034e6..686bdc6b35b0 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1653,7 +1653,7 @@ static const struct proto_ops l2cap_sock_ops = { .getname = l2cap_sock_getname, .sendmsg = l2cap_sock_sendmsg, .recvmsg = l2cap_sock_recvmsg, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 1cf57622473a..d606e9212291 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1049,7 +1049,7 @@ static const struct proto_ops rfcomm_sock_ops = { .setsockopt = rfcomm_sock_setsockopt, .getsockopt = rfcomm_sock_getsockopt, .ioctl = rfcomm_sock_ioctl, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .socketpair = sock_no_socketpair, .mmap = sock_no_mmap }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d60dbc61d170..413b8ee49fec 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1197,7 +1197,7 @@ static const struct proto_ops sco_sock_ops = { .getname = sco_sock_getname, .sendmsg = sco_sock_sendmsg, .recvmsg = sco_sock_recvmsg, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index c7991867d622..a6fb1b3bcad9 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -934,11 +934,15 @@ static int caif_release(struct socket *sock) } /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ -static __poll_t caif_poll_mask(struct socket *sock, __poll_t events) +static __poll_t caif_poll(struct file *file, + struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; + __poll_t mask; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - __poll_t mask = 0; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err) @@ -972,7 +976,7 @@ static const struct proto_ops caif_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = caif_poll_mask, + .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -993,7 +997,7 @@ static const struct proto_ops caif_stream_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = caif_poll_mask, + .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/can/bcm.c b/net/can/bcm.c index 9393f25df08d..0af8f0db892a 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1660,7 +1660,7 @@ static const struct proto_ops bcm_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/can/raw.c b/net/can/raw.c index fd7e2f49ea6a..1051eee82581 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -843,7 +843,7 @@ static const struct proto_ops raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = raw_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/core/datagram.c b/net/core/datagram.c index f19bf3dc2bd6..9938952c5c78 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -819,8 +819,9 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); /** * datagram_poll - generic datagram poll + * @file: file struct * @sock: socket - * @events to wait for + * @wait: poll table * * Datagram poll: Again totally generic. This also handles * sequenced packet sockets providing the socket receive queue @@ -830,10 +831,14 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); * and you use a different write policy from sock_writeable() * then please supply your own write_space callback. */ -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events) +__poll_t datagram_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) @@ -866,4 +871,4 @@ __poll_t datagram_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(datagram_poll_mask); +EXPORT_SYMBOL(datagram_poll); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 0ea2ee56ac1b..f91e3816806b 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -316,7 +316,8 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); -__poll_t dccp_poll_mask(struct socket *sock, __poll_t events); +__poll_t dccp_poll(struct file *file, struct socket *sock, + poll_table *wait); int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void dccp_req_err(struct sock *sk, u64 seq); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a9e478cd3787..b08feb219b44 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -984,7 +984,7 @@ static const struct proto_ops inet_dccp_ops = { .accept = inet_accept, .getname = inet_getname, /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ - .poll_mask = dccp_poll_mask, + .poll = dccp_poll, .ioctl = inet_ioctl, /* FIXME: work on inet_listen to rename it to sock_common_listen */ .listen = inet_dccp_listen, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 17fc4e0166ba..6344f1b18a6a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1070,7 +1070,7 @@ static const struct proto_ops inet6_dccp_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet6_getname, - .poll_mask = dccp_poll_mask, + .poll = dccp_poll, .ioctl = inet6_ioctl, .listen = inet_dccp_listen, .shutdown = inet_shutdown, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ca21c1c76da0..0d56e36a6db7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -312,11 +312,20 @@ int dccp_disconnect(struct sock *sk, int flags) EXPORT_SYMBOL_GPL(dccp_disconnect); -__poll_t dccp_poll_mask(struct socket *sock, __poll_t events) +/* + * Wait for a DCCP event. + * + * Note that we don't need to lock the socket, as the upper poll layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. + */ +__poll_t dccp_poll(struct file *file, struct socket *sock, + poll_table *wait) { __poll_t mask; struct sock *sk = sock->sk; + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); @@ -358,7 +367,7 @@ __poll_t dccp_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL_GPL(dccp_poll_mask); +EXPORT_SYMBOL_GPL(dccp_poll); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) { diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 9a686d890bfa..7d6ff983ba2c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1207,11 +1207,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer) } -static __poll_t dn_poll_mask(struct socket *sock, __poll_t events) +static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); if (!skb_queue_empty(&scp->other_receive_queue)) mask |= EPOLLRDBAND; @@ -2331,7 +2331,7 @@ static const struct proto_ops dn_proto_ops = { .socketpair = sock_no_socketpair, .accept = dn_accept, .getname = dn_getname, - .poll_mask = dn_poll_mask, + .poll = dn_poll, .ioctl = dn_ioctl, .listen = dn_listen, .shutdown = dn_shutdown, diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a0768d2759b8..a60658c85a9a 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -423,7 +423,7 @@ static const struct proto_ops ieee802154_raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -969,7 +969,7 @@ static const struct proto_ops ieee802154_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 15e125558c76..b403499fdabe 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, - .poll_mask = tcp_poll_mask, + .poll = tcp_poll, .ioctl = inet_ioctl, .listen = inet_listen, .shutdown = inet_shutdown, @@ -1021,7 +1021,7 @@ const struct proto_ops inet_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, - .poll_mask = udp_poll_mask, + .poll = udp_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, @@ -1042,7 +1042,7 @@ EXPORT_SYMBOL(inet_dgram_ops); /* * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without - * udp_poll_mask + * udp_poll */ static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, @@ -1053,7 +1053,7 @@ static const struct proto_ops inet_sockraw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 141acd92e58a..e7b53d2a971f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -494,21 +494,32 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, } /* - * Socket is not locked. We are protected from async events by poll logic and - * correct handling of state changes made by other threads is impossible in - * any case. + * Wait for a TCP event. + * + * Note that we don't need to lock the socket, as the upper poll layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. */ -__poll_t tcp_poll_mask(struct socket *sock, __poll_t events) +__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { + __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); - __poll_t mask = 0; int state; + sock_poll_wait(file, sk_sleep(sk), wait); + state = inet_sk_state_load(sk); if (state == TCP_LISTEN) return inet_csk_listen_poll(sk); + /* Socket is not locked. We are protected from async events + * by poll logic and correct handling of state changes + * made by other threads is impossible in any case. + */ + + mask = 0; + /* * EPOLLHUP is certainly not done right. But poll() doesn't * have a notion of HUP in just one direction, and for a @@ -589,7 +600,7 @@ __poll_t tcp_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(tcp_poll_mask); +EXPORT_SYMBOL(tcp_poll); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9bb27df4dac5..24e116ddae79 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2591,7 +2591,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, * udp_poll - wait for a UDP event. * @file - file struct * @sock - socket - * @events - events to wait for + * @wait - poll table * * This is same as datagram poll, except for the special case of * blocking sockets. If application is using a blocking fd @@ -2600,23 +2600,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, * but then block when reading it. Add special case code * to work around these arguably broken applications. */ -__poll_t udp_poll_mask(struct socket *sock, __poll_t events) +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Check for false positives due to checksum errors */ - if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) && + if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(EPOLLIN | EPOLLRDNORM); return mask; } -EXPORT_SYMBOL(udp_poll_mask); +EXPORT_SYMBOL(udp_poll); int udp_abort(struct sock *sk, int err) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 74f2a261e8df..9ed0eae91758 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -570,7 +570,7 @@ const struct proto_ops inet6_stream_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = inet_accept, /* ok */ .getname = inet6_getname, - .poll_mask = tcp_poll_mask, /* ok */ + .poll = tcp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = inet_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ @@ -603,7 +603,7 @@ const struct proto_ops inet6_dgram_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, - .poll_mask = udp_poll_mask, /* ok */ + .poll = udp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ce6f0d15b5dd..afc307c89d1a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1334,7 +1334,7 @@ void raw6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -/* Same as inet6_dgram_ops, sans udp_poll_mask. */ +/* Same as inet6_dgram_ops, sans udp_poll. */ const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, @@ -1344,7 +1344,7 @@ const struct proto_ops inet6_sockraw_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, - .poll_mask = datagram_poll_mask, /* ok */ + .poll = datagram_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 68e86257a549..893a022f9620 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1488,11 +1488,14 @@ static inline __poll_t iucv_accept_poll(struct sock *parent) return 0; } -static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events) +__poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); @@ -2385,7 +2388,7 @@ static const struct proto_ops iucv_sock_ops = { .getname = iucv_sock_getname, .sendmsg = iucv_sock_sendmsg, .recvmsg = iucv_sock_recvmsg, - .poll_mask = iucv_sock_poll_mask, + .poll = iucv_sock_poll, .ioctl = sock_no_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 84b7d5c6fec8..d3601d421571 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1336,9 +1336,9 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) struct list_head *head; int index = 0; - /* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state, - * so we set sk_state, otherwise epoll_wait always returns right away - * with EPOLLHUP + /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so + * we set sk_state, otherwise epoll_wait always returns right away with + * EPOLLHUP */ kcm->sk.sk_state = TCP_ESTABLISHED; @@ -1903,7 +1903,7 @@ static const struct proto_ops kcm_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = kcm_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -1924,7 +1924,7 @@ static const struct proto_ops kcm_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = kcm_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/key/af_key.c b/net/key/af_key.c index 8bdc1cbe490a..5e1d2946ffbf 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3751,7 +3751,7 @@ static const struct proto_ops pfkey_ops = { /* Now the operations that really occur. */ .release = pfkey_release, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .sendmsg = pfkey_sendmsg, .recvmsg = pfkey_recvmsg, }; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 181073bf6925..a9c05b2bc1b0 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -613,7 +613,7 @@ static const struct proto_ops l2tp_ip_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = l2tp_ip_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 336e4c00abbc..957369192ca1 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -754,7 +754,7 @@ static const struct proto_ops l2tp_ip6_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = l2tp_ip6_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet6_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 55188382845c..e398797878a9 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1818,7 +1818,7 @@ static const struct proto_ops pppol2tp_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppol2tp_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = pppol2tp_setsockopt, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 804de8490186..1beeea9549fa 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1192,7 +1192,7 @@ static const struct proto_ops llc_ui_ops = { .socketpair = sock_no_socketpair, .accept = llc_ui_accept, .getname = llc_ui_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = llc_ui_ioctl, .listen = llc_ui_listen, .shutdown = llc_ui_shutdown, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1189b84413d5..393573a99a5a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2658,7 +2658,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = netlink_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 93fbcafbf388..03f37c4e64fe 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1355,7 +1355,7 @@ static const struct proto_ops nr_proto_ops = { .socketpair = sock_no_socketpair, .accept = nr_accept, .getname = nr_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = nr_ioctl, .listen = nr_listen, .shutdown = sock_no_shutdown, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ab5bb14b49af..ea0c0c6f1874 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -548,13 +548,16 @@ static inline __poll_t llcp_accept_poll(struct sock *parent) return 0; } -static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events) +static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; pr_debug("%p\n", sk); + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); @@ -896,7 +899,7 @@ static const struct proto_ops llcp_sock_ops = { .socketpair = sock_no_socketpair, .accept = llcp_sock_accept, .getname = llcp_sock_getname, - .poll_mask = llcp_sock_poll_mask, + .poll = llcp_sock_poll, .ioctl = sock_no_ioctl, .listen = llcp_sock_listen, .shutdown = sock_no_shutdown, @@ -916,7 +919,7 @@ static const struct proto_ops llcp_rawsock_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = llcp_sock_getname, - .poll_mask = llcp_sock_poll_mask, + .poll = llcp_sock_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 60c322531c49..e2188deb08dc 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -284,7 +284,7 @@ static const struct proto_ops rawsock_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -304,7 +304,7 @@ static const struct proto_ops rawsock_raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ff8e7e245c37..57634bc3da74 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4076,11 +4076,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, return 0; } -static __poll_t packet_poll_mask(struct socket *sock, __poll_t events) +static __poll_t packet_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { @@ -4422,7 +4423,7 @@ static const struct proto_ops packet_ops_spkt = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname_spkt, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -4443,7 +4444,7 @@ static const struct proto_ops packet_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname, - .poll_mask = packet_poll_mask, + .poll = packet_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c295c4e20f01..30187990257f 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -340,12 +340,15 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, return sizeof(struct sockaddr_pn); } -static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events) +static __poll_t pn_socket_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct pep_sock *pn = pep_sk(sk); __poll_t mask = 0; + poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == TCP_CLOSE) return EPOLLERR; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -445,7 +448,7 @@ const struct proto_ops phonet_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pn_socket_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = pn_socket_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -470,7 +473,7 @@ const struct proto_ops phonet_stream_ops = { .socketpair = sock_no_socketpair, .accept = pn_socket_accept, .getname = pn_socket_getname, - .poll_mask = pn_socket_poll_mask, + .poll = pn_socket_poll, .ioctl = pn_socket_ioctl, .listen = pn_socket_listen, .shutdown = sock_no_shutdown, diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 1b5025ea5b04..2aa07b547b16 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1023,7 +1023,7 @@ static const struct proto_ops qrtr_proto_ops = { .recvmsg = qrtr_recvmsg, .getname = qrtr_getname, .ioctl = qrtr_ioctl, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ebe42e7eb456..d00a0ef39a56 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1470,7 +1470,7 @@ static const struct proto_ops rose_proto_ops = { .socketpair = sock_no_socketpair, .accept = rose_accept, .getname = rose_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = rose_ioctl, .listen = rose_listen, .shutdown = sock_no_shutdown, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 3b1ac93efee2..2b463047dd7b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -734,11 +734,15 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname, /* * permit an RxRPC socket to be polled */ -static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t rxrpc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct rxrpc_sock *rx = rxrpc_sk(sk); - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* the socket is readable if there are any messages waiting on the Rx * queue */ @@ -945,7 +949,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = rxrpc_poll_mask, + .poll = rxrpc_poll, .ioctl = sock_no_ioctl, .listen = rxrpc_listen, .shutdown = rxrpc_shutdown, diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7339918a805d..0cd2e764f47f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1010,7 +1010,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, - .poll_mask = sctp_poll_mask, + .poll = sctp_poll, .ioctl = inet6_ioctl, .listen = sctp_inet_listen, .shutdown = inet_shutdown, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5dffbc493008..67f73d3a1356 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1016,7 +1016,7 @@ static const struct proto_ops inet_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, /* Semantics are different. */ - .poll_mask = sctp_poll_mask, + .poll = sctp_poll, .ioctl = inet_ioctl, .listen = sctp_inet_listen, .shutdown = inet_shutdown, /* Looks harmless. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d20f7addee19..ce620e878538 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7717,12 +7717,14 @@ out: * here, again, by modeling the current TCP/UDP code. We don't have * a good way to test with it yet. */ -__poll_t sctp_poll_mask(struct socket *sock, __poll_t events) +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct sctp_sock *sp = sctp_sk(sk); __poll_t mask; + poll_wait(file, sk_sleep(sk), wait); + sock_rps_record_flow(sk); /* A TCP-style listening socket becomes readable when the accept queue diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da7f02edcd37..973b4471b532 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1273,7 +1273,8 @@ static __poll_t smc_accept_poll(struct sock *parent) return mask; } -static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t smc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; @@ -1289,7 +1290,7 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { /* delegate to CLC child sock */ release_sock(sk); - mask = smc->clcsock->ops->poll_mask(smc->clcsock, events); + mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); lock_sock(sk); sk->sk_err = smc->clcsock->sk->sk_err; if (sk->sk_err) { @@ -1307,6 +1308,11 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) } } } else { + if (sk->sk_state != SMC_CLOSED) { + release_sock(sk); + sock_poll_wait(file, sk_sleep(sk), wait); + lock_sock(sk); + } if (sk->sk_err) mask |= EPOLLERR; if ((sk->sk_shutdown == SHUTDOWN_MASK) || @@ -1619,7 +1625,7 @@ static const struct proto_ops smc_sock_ops = { .socketpair = sock_no_socketpair, .accept = smc_accept, .getname = smc_getname, - .poll_mask = smc_poll_mask, + .poll = smc_poll, .ioctl = smc_ioctl, .listen = smc_listen, .shutdown = smc_shutdown, diff --git a/net/socket.c b/net/socket.c index 8a109012608a..a564c6ed19d5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -117,10 +117,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); -static struct wait_queue_head *sock_get_poll_head(struct file *file, - __poll_t events); -static __poll_t sock_poll_mask(struct file *file, __poll_t); -static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait); +static __poll_t sock_poll(struct file *file, + struct poll_table_struct *wait); static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, @@ -143,8 +141,6 @@ static const struct file_operations socket_file_ops = { .llseek = no_llseek, .read_iter = sock_read_iter, .write_iter = sock_write_iter, - .get_poll_head = sock_get_poll_head, - .poll_mask = sock_poll_mask, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, #ifdef CONFIG_COMPAT @@ -1130,48 +1126,14 @@ out_release: } EXPORT_SYMBOL(sock_create_lite); -static struct wait_queue_head *sock_get_poll_head(struct file *file, - __poll_t events) -{ - struct socket *sock = file->private_data; - - if (!sock->ops->poll_mask) - return NULL; - sock_poll_busy_loop(sock, events); - return sk_sleep(sock->sk); -} - -static __poll_t sock_poll_mask(struct file *file, __poll_t events) -{ - struct socket *sock = file->private_data; - - /* - * We need to be sure we are in sync with the socket flags modification. - * - * This memory barrier is paired in the wq_has_sleeper. - */ - smp_mb(); - - /* this socket can poll_ll so tell the system call */ - return sock->ops->poll_mask(sock, events) | - (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0); -} - /* No kernel lock held - perfect */ static __poll_t sock_poll(struct file *file, poll_table *wait) { struct socket *sock = file->private_data; - __poll_t events = poll_requested_events(wait), mask = 0; - - if (sock->ops->poll) { - sock_poll_busy_loop(sock, events); - mask = sock->ops->poll(file, sock, wait); - } else if (sock->ops->poll_mask) { - sock_poll_wait(file, sock_get_poll_head(file, events), wait); - mask = sock->ops->poll_mask(sock, events); - } + __poll_t events = poll_requested_events(wait); - return mask | sock_poll_busy_flag(sock); + sock_poll_busy_loop(sock, events); + return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 14a5d055717d..930852c54d7a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -692,9 +692,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, } /** - * tipc_poll - read pollmask + * tipc_poll - read and possibly block on pollmask * @file: file structure associated with the socket * @sock: socket for which to calculate the poll bits + * @wait: ??? * * Returns pollmask value * @@ -708,12 +709,15 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * imply that the operation will succeed, merely that it should be performed * and will not block. */ -static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t tipc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); __poll_t revents = 0; + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_shutdown & RCV_SHUTDOWN) revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) @@ -3033,7 +3037,7 @@ static const struct proto_ops msg_ops = { .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = sock_no_listen, .shutdown = tipc_shutdown, @@ -3054,7 +3058,7 @@ static const struct proto_ops packet_ops = { .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, @@ -3075,7 +3079,7 @@ static const struct proto_ops stream_ops = { .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a127d61e8af9..301f22430469 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -712,7 +712,7 @@ static int __init tls_register(void) build_protos(tls_prots[TLSV4], &tcp_prot); tls_sw_proto_ops = inet_stream_ops; - tls_sw_proto_ops.poll_mask = tls_sw_poll_mask; + tls_sw_proto_ops.poll = tls_sw_poll; tls_sw_proto_ops.splice_read = tls_sw_splice_read; #ifdef CONFIG_TLS_DEVICE diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f127fac88acf..d2380548f8f6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -919,22 +919,23 @@ splice_read_end: return copied ? : err; } -__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events) +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) { + unsigned int ret; struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - __poll_t mask; - /* Grab EPOLLOUT and EPOLLHUP from the underlying socket */ - mask = ctx->sk_poll_mask(sock, events); + /* Grab POLLOUT and POLLHUP from the underlying socket */ + ret = ctx->sk_poll(file, sock, wait); - /* Clear EPOLLIN bits, and set based on recv_pkt */ - mask &= ~(EPOLLIN | EPOLLRDNORM); + /* Clear POLLIN bits, and set based on recv_pkt */ + ret &= ~(POLLIN | POLLRDNORM); if (ctx->recv_pkt) - mask |= EPOLLIN | EPOLLRDNORM; + ret |= POLLIN | POLLRDNORM; - return mask; + return ret; } static int tls_read_size(struct strparser *strp, struct sk_buff *skb) @@ -1191,7 +1192,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) sk->sk_data_ready = tls_data_ready; write_unlock_bh(&sk->sk_callback_lock); - sw_ctx_rx->sk_poll_mask = sk->sk_socket->ops->poll_mask; + sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll; strp_check_rcv(&sw_ctx_rx->strp); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 95b02a71fd47..e5473c03d667 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -638,8 +638,9 @@ static int unix_stream_connect(struct socket *, struct sockaddr *, static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int, bool); static int unix_getname(struct socket *, struct sockaddr *, int); -static __poll_t unix_poll_mask(struct socket *, __poll_t); -static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t); +static __poll_t unix_poll(struct file *, struct socket *, poll_table *); +static __poll_t unix_dgram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); @@ -680,7 +681,7 @@ static const struct proto_ops unix_stream_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll_mask = unix_poll_mask, + .poll = unix_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -703,7 +704,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll_mask = unix_dgram_poll_mask, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -725,7 +726,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll_mask = unix_dgram_poll_mask, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -2629,10 +2630,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; } -static __poll_t unix_poll_mask(struct socket *sock, __poll_t events) +static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err) @@ -2661,11 +2665,15 @@ static __poll_t unix_poll_mask(struct socket *sock, __poll_t events) return mask; } -static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events) +static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk, *other; - int writable; - __poll_t mask = 0; + unsigned int writable; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) @@ -2691,7 +2699,7 @@ static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events) } /* No write status requested, avoid expensive OUT tests. */ - if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) + if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; writable = unix_writable(sk); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bb5d5fa68c35..c1076c19b858 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -850,11 +850,18 @@ static int vsock_shutdown(struct socket *sock, int mode) return err; } -static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events) +static __poll_t vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) { - struct sock *sk = sock->sk; - struct vsock_sock *vsk = vsock_sk(sk); - __poll_t mask = 0; + struct sock *sk; + __poll_t mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; if (sk->sk_err) /* Signify that there has been an error on this socket. */ @@ -1084,7 +1091,7 @@ static const struct proto_ops vsock_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = vsock_getname, - .poll_mask = vsock_poll_mask, + .poll = vsock_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = vsock_shutdown, @@ -1842,7 +1849,7 @@ static const struct proto_ops vsock_stream_ops = { .socketpair = sock_no_socketpair, .accept = vsock_accept, .getname = vsock_getname, - .poll_mask = vsock_poll_mask, + .poll = vsock_poll, .ioctl = sock_no_ioctl, .listen = vsock_listen, .shutdown = vsock_shutdown, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index f93365ae0fdd..d49aa79b7997 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1750,7 +1750,7 @@ static const struct proto_ops x25_proto_ops = { .socketpair = sock_no_socketpair, .accept = x25_accept, .getname = x25_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = x25_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_x25_ioctl, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3b3410ada097..59fb7d3c36a3 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -303,9 +303,10 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len); } -static __poll_t xsk_poll_mask(struct socket *sock, __poll_t events) +static unsigned int xsk_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) { - __poll_t mask = datagram_poll_mask(sock, events); + unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); @@ -696,7 +697,7 @@ static const struct proto_ops xsk_proto_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = xsk_poll_mask, + .poll = xsk_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, -- cgit From 4c79579b44b1876444f4d04de31c1a37098a0350 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Jun 2018 16:21:18 -0700 Subject: bpf: Change bpf_fib_lookup to return lookup status For ACLs implemented using either FIB rules or FIB entries, the BPF program needs the FIB lookup status to be able to drop the packet. Since the bpf_fib_lookup API has not reached a released kernel yet, change the return code to contain an encoding of the FIB lookup result and return the nexthop device index in the params struct. In addition, inform the BPF program of any post FIB lookup reason as to why the packet needs to go up the stack. The fib result for unicast routes must have an egress device, so remove the check that it is non-NULL. Signed-off-by: David Ahern Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 28 ++++++++++++--- net/core/filter.c | 86 +++++++++++++++++++++++++++++----------------- samples/bpf/xdp_fwd_kern.c | 8 ++--- 3 files changed, 81 insertions(+), 41 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 59b19b6a40d7..b7db3261c62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1857,7 +1857,8 @@ union bpf_attr { * is resolved), the nexthop address is returned in ipv4_dst * or ipv6_dst based on family, smac is set to mac address of * egress device, dmac is set to nexthop mac address, rt_metric - * is set to metric from route (IPv4/IPv6 only). + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. * * *plen* argument is the size of the passed in struct. * *flags* argument can be a combination of one or more of the @@ -1873,9 +1874,10 @@ union bpf_attr { * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. * Return - * Egress device index on success, 0 if packet needs to continue - * up the stack for further processing or a negative error in case - * of failure. + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the + * * packet is not forwarded or needs assist from full stack * * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) * Description @@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args { #define BPF_FIB_LOOKUP_DIRECT BIT(0) #define BPF_FIB_LOOKUP_OUTPUT BIT(1) +enum { + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ +}; + struct bpf_fib_lookup { /* input: network family for lookup (AF_INET, AF_INET6) * output: network family of egress nexthop @@ -2625,7 +2639,11 @@ struct bpf_fib_lookup { /* total length of packet from network header - used for MTU check */ __u16 tot_len; - __u32 ifindex; /* L3 device index for lookup */ + + /* input: L3 device index for lookup + * output: device index from FIB lookup + */ + __u32 ifindex; union { /* inputs to lookup */ diff --git a/net/core/filter.c b/net/core/filter.c index e7f12e9f598c..0ca6907d7efe 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, memcpy(params->smac, dev->dev_addr, ETH_ALEN); params->h_vlan_TCI = 0; params->h_vlan_proto = 0; + params->ifindex = dev->ifindex; - return dev->ifindex; + return 0; } #endif @@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, /* verify forwarding is enabled on this interface */ in_dev = __in_dev_get_rcu(dev); if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) - return 0; + return BPF_FIB_LKUP_RET_FWD_DISABLED; if (flags & BPF_FIB_LOOKUP_OUTPUT) { fl4.flowi4_iif = 1; @@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, tb = fib_get_table(net, tbid); if (unlikely(!tb)) - return 0; + return BPF_FIB_LKUP_RET_NOT_FWDED; err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); } else { @@ -4135,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); } - if (err || res.type != RTN_UNICAST) - return 0; + if (err) { + /* map fib lookup errors to RTN_ type */ + if (err == -EINVAL) + return BPF_FIB_LKUP_RET_BLACKHOLE; + if (err == -EHOSTUNREACH) + return BPF_FIB_LKUP_RET_UNREACHABLE; + if (err == -EACCES) + return BPF_FIB_LKUP_RET_PROHIBIT; + + return BPF_FIB_LKUP_RET_NOT_FWDED; + } + + if (res.type != RTN_UNICAST) + return BPF_FIB_LKUP_RET_NOT_FWDED; if (res.fi->fib_nhs > 1) fib_select_path(net, &res, &fl4, NULL); @@ -4144,19 +4157,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (check_mtu) { mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); if (params->tot_len > mtu) - return 0; + return BPF_FIB_LKUP_RET_FRAG_NEEDED; } nh = &res.fi->fib_nh[res.nh_sel]; /* do not handle lwt encaps right now */ if (nh->nh_lwtstate) - return 0; + return BPF_FIB_LKUP_RET_UNSUPP_LWT; dev = nh->nh_dev; - if (unlikely(!dev)) - return 0; - if (nh->nh_gw) params->ipv4_dst = nh->nh_gw; @@ -4166,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, * rcu_read_lock_bh is not needed here */ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); - if (neigh) - return bpf_fib_set_fwd_params(params, neigh, dev); + if (!neigh) + return BPF_FIB_LKUP_RET_NO_NEIGH; - return 0; + return bpf_fib_set_fwd_params(params, neigh, dev); } #endif @@ -4190,7 +4200,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, /* link local addresses are never forwarded */ if (rt6_need_strict(dst) || rt6_need_strict(src)) - return 0; + return BPF_FIB_LKUP_RET_NOT_FWDED; dev = dev_get_by_index_rcu(net, params->ifindex); if (unlikely(!dev)) @@ -4198,7 +4208,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, idev = __in6_dev_get_safely(dev); if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) - return 0; + return BPF_FIB_LKUP_RET_FWD_DISABLED; if (flags & BPF_FIB_LOOKUP_OUTPUT) { fl6.flowi6_iif = 1; @@ -4225,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, tb = ipv6_stub->fib6_get_table(net, tbid); if (unlikely(!tb)) - return 0; + return BPF_FIB_LKUP_RET_NOT_FWDED; f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); } else { @@ -4238,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, } if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) - return 0; + return BPF_FIB_LKUP_RET_NOT_FWDED; + + if (unlikely(f6i->fib6_flags & RTF_REJECT)) { + switch (f6i->fib6_type) { + case RTN_BLACKHOLE: + return BPF_FIB_LKUP_RET_BLACKHOLE; + case RTN_UNREACHABLE: + return BPF_FIB_LKUP_RET_UNREACHABLE; + case RTN_PROHIBIT: + return BPF_FIB_LKUP_RET_PROHIBIT; + default: + return BPF_FIB_LKUP_RET_NOT_FWDED; + } + } - if (unlikely(f6i->fib6_flags & RTF_REJECT || - f6i->fib6_type != RTN_UNICAST)) - return 0; + if (f6i->fib6_type != RTN_UNICAST) + return BPF_FIB_LKUP_RET_NOT_FWDED; if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, @@ -4252,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (check_mtu) { mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src); if (params->tot_len > mtu) - return 0; + return BPF_FIB_LKUP_RET_FRAG_NEEDED; } if (f6i->fib6_nh.nh_lwtstate) - return 0; + return BPF_FIB_LKUP_RET_UNSUPP_LWT; if (f6i->fib6_flags & RTF_GATEWAY) *dst = f6i->fib6_nh.nh_gw; @@ -4270,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, */ neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, ndisc_hashfn, dst, dev); - if (neigh) - return bpf_fib_set_fwd_params(params, neigh, dev); + if (!neigh) + return BPF_FIB_LKUP_RET_NO_NEIGH; - return 0; + return bpf_fib_set_fwd_params(params, neigh, dev); } #endif @@ -4315,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, struct bpf_fib_lookup *, params, int, plen, u32, flags) { struct net *net = dev_net(skb->dev); - int index = -EAFNOSUPPORT; + int rc = -EAFNOSUPPORT; if (plen < sizeof(*params)) return -EINVAL; @@ -4326,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, switch (params->family) { #if IS_ENABLED(CONFIG_INET) case AF_INET: - index = bpf_ipv4_fib_lookup(net, params, flags, false); + rc = bpf_ipv4_fib_lookup(net, params, flags, false); break; #endif #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - index = bpf_ipv6_fib_lookup(net, params, flags, false); + rc = bpf_ipv6_fib_lookup(net, params, flags, false); break; #endif } - if (index > 0) { + if (!rc) { struct net_device *dev; - dev = dev_get_by_index_rcu(net, index); + dev = dev_get_by_index_rcu(net, params->ifindex); if (!is_skb_forwardable(dev, skb)) - index = 0; + rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; } - return index; + return rc; } static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c index 6673cdb9f55c..a7e94e7ff87d 100644 --- a/samples/bpf/xdp_fwd_kern.c +++ b/samples/bpf/xdp_fwd_kern.c @@ -48,9 +48,9 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) struct ethhdr *eth = data; struct ipv6hdr *ip6h; struct iphdr *iph; - int out_index; u16 h_proto; u64 nh_off; + int rc; nh_off = sizeof(*eth); if (data + nh_off > data_end) @@ -101,7 +101,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) fib_params.ifindex = ctx->ingress_ifindex; - out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); + rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); /* verify egress index has xdp support * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with @@ -109,7 +109,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) * NOTE: without verification that egress index supports XDP * forwarding packets are dropped. */ - if (out_index > 0) { + if (rc == 0) { if (h_proto == htons(ETH_P_IP)) ip_decrease_ttl(iph); else if (h_proto == htons(ETH_P_IPV6)) @@ -117,7 +117,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); memcpy(eth->h_source, fib_params.smac, ETH_ALEN); - return bpf_redirect_map(&tx_port, out_index, 0); + return bpf_redirect_map(&tx_port, fib_params.ifindex, 0); } return XDP_PASS; -- cgit From 2cd3ae2129736f9019130064d09713a375870941 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 29 Jun 2018 15:37:25 +0200 Subject: aio: mark __aio_sigset::sigmask const io_pgetevents() will not change the signal mask. Mark it const to make it clear and to reduce the need for casts in user code. Reviewed-by: Christoph Hellwig Signed-off-by: Avi Kivity Signed-off-by: Al Viro [hch: reapply the patch that got incorrectly reverted] Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/uapi/linux/aio_abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index d4e768d55d14..3c5038b587ba 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -111,7 +111,7 @@ struct iocb { #undef IFLITTLE struct __aio_sigset { - sigset_t __user *sigmask; + const sigset_t __user *sigmask; size_t sigsetsize; }; -- cgit