diff options
Diffstat (limited to 'include/linux')
127 files changed, 2101 insertions, 911 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3015235d65e3..5e6a876e17ba 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -586,6 +586,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000 #define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000 #define OSC_SB_PRM_SUPPORT 0x00200000 +#define OSC_SB_FFH_OPR_SUPPORT 0x00400000 extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; @@ -1136,6 +1137,7 @@ int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); void acpi_ec_mark_gpe_for_wake(void); void acpi_ec_set_gpe_wake_mask(u8 action); +int acpi_subsys_restore_early(struct device *dev); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} @@ -1144,6 +1146,7 @@ static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } +static inline int acpi_subsys_restore_early(struct device *dev) { return 0; } static inline void acpi_ec_mark_gpe_for_wake(void) {} static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif @@ -1488,6 +1491,16 @@ void acpi_init_pcc(void); static inline void acpi_init_pcc(void) { } #endif +#ifdef CONFIG_ACPI_FFH +void acpi_init_ffh(void); +extern int acpi_ffh_address_space_arch_setup(void *handler_ctxt, + void **region_ctxt); +extern int acpi_ffh_address_space_arch_handler(acpi_integer *value, + void *region_context); +#else +static inline void acpi_init_ffh(void) { } +#endif + #ifdef CONFIG_ACPI extern void acpi_device_notify(struct device *dev); extern void acpi_device_notify_remove(struct device *dev); diff --git a/include/linux/acpi_apmt.h b/include/linux/acpi_apmt.h new file mode 100644 index 000000000000..40bd634d082f --- /dev/null +++ b/include/linux/acpi_apmt.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * ARM CoreSight PMU driver. + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. + * + */ + +#ifndef __ACPI_APMT_H__ +#define __ACPI_APMT_H__ + +#include <linux/acpi.h> + +#ifdef CONFIG_ACPI_APMT +void acpi_apmt_init(void); +#else +static inline void acpi_apmt_init(void) { } +#endif /* CONFIG_ACPI_APMT */ + +#endif /* __ACPI_APMT_H__ */ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 5f02d2e6b9d9..c87aeecaa9b2 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -11,6 +11,89 @@ #include <linux/types.h> #include <linux/uuid.h> +#define FFA_SMC(calling_convention, func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, (calling_convention), \ + ARM_SMCCC_OWNER_STANDARD, (func_num)) + +#define FFA_SMC_32(func_num) FFA_SMC(ARM_SMCCC_SMC_32, (func_num)) +#define FFA_SMC_64(func_num) FFA_SMC(ARM_SMCCC_SMC_64, (func_num)) + +#define FFA_ERROR FFA_SMC_32(0x60) +#define FFA_SUCCESS FFA_SMC_32(0x61) +#define FFA_INTERRUPT FFA_SMC_32(0x62) +#define FFA_VERSION FFA_SMC_32(0x63) +#define FFA_FEATURES FFA_SMC_32(0x64) +#define FFA_RX_RELEASE FFA_SMC_32(0x65) +#define FFA_RXTX_MAP FFA_SMC_32(0x66) +#define FFA_FN64_RXTX_MAP FFA_SMC_64(0x66) +#define FFA_RXTX_UNMAP FFA_SMC_32(0x67) +#define FFA_PARTITION_INFO_GET FFA_SMC_32(0x68) +#define FFA_ID_GET FFA_SMC_32(0x69) +#define FFA_MSG_POLL FFA_SMC_32(0x6A) +#define FFA_MSG_WAIT FFA_SMC_32(0x6B) +#define FFA_YIELD FFA_SMC_32(0x6C) +#define FFA_RUN FFA_SMC_32(0x6D) +#define FFA_MSG_SEND FFA_SMC_32(0x6E) +#define FFA_MSG_SEND_DIRECT_REQ FFA_SMC_32(0x6F) +#define FFA_FN64_MSG_SEND_DIRECT_REQ FFA_SMC_64(0x6F) +#define FFA_MSG_SEND_DIRECT_RESP FFA_SMC_32(0x70) +#define FFA_FN64_MSG_SEND_DIRECT_RESP FFA_SMC_64(0x70) +#define FFA_MEM_DONATE FFA_SMC_32(0x71) +#define FFA_FN64_MEM_DONATE FFA_SMC_64(0x71) +#define FFA_MEM_LEND FFA_SMC_32(0x72) +#define FFA_FN64_MEM_LEND FFA_SMC_64(0x72) +#define FFA_MEM_SHARE FFA_SMC_32(0x73) +#define FFA_FN64_MEM_SHARE FFA_SMC_64(0x73) +#define FFA_MEM_RETRIEVE_REQ FFA_SMC_32(0x74) +#define FFA_FN64_MEM_RETRIEVE_REQ FFA_SMC_64(0x74) +#define FFA_MEM_RETRIEVE_RESP FFA_SMC_32(0x75) +#define FFA_MEM_RELINQUISH FFA_SMC_32(0x76) +#define FFA_MEM_RECLAIM FFA_SMC_32(0x77) +#define FFA_MEM_OP_PAUSE FFA_SMC_32(0x78) +#define FFA_MEM_OP_RESUME FFA_SMC_32(0x79) +#define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A) +#define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B) +#define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C) + +/* + * For some calls it is necessary to use SMC64 to pass or return 64-bit values. + * For such calls FFA_FN_NATIVE(name) will choose the appropriate + * (native-width) function ID. + */ +#ifdef CONFIG_64BIT +#define FFA_FN_NATIVE(name) FFA_FN64_##name +#else +#define FFA_FN_NATIVE(name) FFA_##name +#endif + +/* FFA error codes. */ +#define FFA_RET_SUCCESS (0) +#define FFA_RET_NOT_SUPPORTED (-1) +#define FFA_RET_INVALID_PARAMETERS (-2) +#define FFA_RET_NO_MEMORY (-3) +#define FFA_RET_BUSY (-4) +#define FFA_RET_INTERRUPTED (-5) +#define FFA_RET_DENIED (-6) +#define FFA_RET_RETRY (-7) +#define FFA_RET_ABORTED (-8) + +/* FFA version encoding */ +#define FFA_MAJOR_VERSION_MASK GENMASK(30, 16) +#define FFA_MINOR_VERSION_MASK GENMASK(15, 0) +#define FFA_MAJOR_VERSION(x) ((u16)(FIELD_GET(FFA_MAJOR_VERSION_MASK, (x)))) +#define FFA_MINOR_VERSION(x) ((u16)(FIELD_GET(FFA_MINOR_VERSION_MASK, (x)))) +#define FFA_PACK_VERSION_INFO(major, minor) \ + (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \ + FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) +#define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) + +/** + * FF-A specification mentions explicitly about '4K pages'. This should + * not be confused with the kernel PAGE_SIZE, which is the translation + * granule kernel is configured and may be one among 4K, 16K and 64K. + */ +#define FFA_PAGE_SIZE SZ_4K + /* FFA Bus/Device/Driver related */ struct ffa_device { int vm_id; @@ -161,11 +244,11 @@ struct ffa_mem_region_attributes { */ #define FFA_MEM_RETRIEVE_SELF_BORROWER BIT(0) u8 flag; - u32 composite_off; /* * Offset in bytes from the start of the outer `ffa_memory_region` to * an `struct ffa_mem_region_addr_range`. */ + u32 composite_off; u64 reserved; }; diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index 53b31f69b74a..7615f8d7b1ed 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -11,6 +11,7 @@ #include <linux/vmalloc.h> #ifdef CONFIG_BCM47XX_NVRAM +int bcm47xx_nvram_init_from_iomem(void __iomem *nvram_start, size_t res_size); int bcm47xx_nvram_init_from_mem(u32 base, u32 lim); int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len); int bcm47xx_nvram_gpio_pin(const char *name); @@ -20,6 +21,11 @@ static inline void bcm47xx_nvram_release_contents(char *nvram) vfree(nvram); }; #else +static inline int bcm47xx_nvram_init_from_iomem(void __iomem *nvram_start, + size_t res_size) +{ + return -ENOTSUPP; +} static inline int bcm47xx_nvram_init_from_mem(u32 base, u32 lim) { return -ENOTSUPP; diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 69b24fe92cbf..26b1b71c3091 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -13,6 +13,7 @@ enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_AES_256_XTS, BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, BLK_ENCRYPTION_MODE_ADIANTUM, + BLK_ENCRYPTION_MODE_SM4_XTS, BLK_ENCRYPTION_MODE_MAX, }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ba18e9bdb799..d6119c5d1069 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -853,7 +853,8 @@ static inline bool blk_mq_add_to_batch(struct request *req, struct io_comp_batch *iob, int ioerror, void (*complete)(struct io_comp_batch *)) { - if (!iob || (req->rq_flags & RQF_ELV) || ioerror) + if (!iob || (req->rq_flags & RQF_ELV) || ioerror || + (req->end_io && !blk_rq_is_passthrough(req))) return false; if (!iob->complete) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50e358a19d98..891f8cbcd043 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -311,6 +311,13 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; + + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ + unsigned int dma_alignment; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -456,12 +463,6 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; - /* - * Drivers that set dma_alignment to less than 511 must be prepared to - * handle individual bvec's that are not a multiple of a SECTOR_SIZE - * due to possible offsets. - */ - unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; @@ -944,7 +945,6 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); -extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); @@ -1324,7 +1324,7 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) static inline int queue_dma_alignment(const struct request_queue *q) { - return q ? q->dma_alignment : 511; + return q ? q->limits.dma_alignment : 511; } static inline unsigned int bdev_dma_alignment(struct block_device *bdev) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9e7d46d16032..c1bd1bd10506 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,6 +27,7 @@ #include <linux/bpfptr.h> #include <linux/btf.h> #include <linux/rcupdate_trace.h> +#include <linux/static_call.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -314,7 +315,7 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b u32 next_off = map->off_arr->field_off[i]; memcpy(dst + curr_off, src + curr_off, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } @@ -343,7 +344,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) u32 next_off = map->off_arr->field_off[i]; memset(dst + curr_off, 0, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memset(dst + curr_off, 0, map->value_size - curr_off); } @@ -953,6 +954,10 @@ struct bpf_dispatcher { void *rw_image; u32 image_off; struct bpf_ksym ksym; +#ifdef CONFIG_HAVE_STATIC_CALL + struct static_call_key *sc_key; + void *sc_tramp; +#endif }; static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( @@ -970,6 +975,34 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); + +/* + * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn + * indirection with a direct call to the bpf program. If the architecture does + * not have STATIC_CALL, avoid a double-indirection. + */ +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __BPF_DISPATCHER_SC_INIT(_name) \ + .sc_key = &STATIC_CALL_KEY(_name), \ + .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), + +#define __BPF_DISPATCHER_SC(name) \ + DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) + +#define __BPF_DISPATCHER_CALL(name) \ + static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) + +#define __BPF_DISPATCHER_UPDATE(_d, _new) \ + __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) + +#else +#define __BPF_DISPATCHER_SC_INIT(name) +#define __BPF_DISPATCHER_SC(name) +#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) +#define __BPF_DISPATCHER_UPDATE(_d, _new) +#endif + #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -981,32 +1014,29 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func .name = #_name, \ .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ }, \ + __BPF_DISPATCHER_SC_INIT(_name##_call) \ } -#ifdef CONFIG_X86_64 -#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) -#else -#define BPF_DISPATCHER_ATTRIBUTES -#endif - #define DEFINE_BPF_DISPATCHER(name) \ - notrace BPF_DISPATCHER_ATTRIBUTES \ + __BPF_DISPATCHER_SC(name); \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ { \ - return bpf_func(ctx, insnsi); \ + return __BPF_DISPATCHER_CALL(name); \ } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ BPF_DISPATCHER_INIT(bpf_dispatcher_##name); + #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 58f5431a5559..982ba245eb41 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -152,6 +152,22 @@ static inline bool can_is_canxl_dev_mtu(unsigned int mtu) return (mtu >= CANXL_MIN_MTU && mtu <= CANXL_MAX_MTU); } +/* drop skb if it does not contain a valid CAN frame for sending */ +static inline bool can_dev_dropped_skb(struct net_device *dev, struct sk_buff *skb) +{ + struct can_priv *priv = netdev_priv(dev); + + if (priv->ctrlmode & CAN_CTRLMODE_LISTENONLY) { + netdev_info_once(dev, + "interface in listen only mode, dropping skb\n"); + kfree_skb(skb); + dev->stats.tx_dropped++; + return true; + } + + return can_dropped_invalid_skb(dev, skb); +} + void can_setup(struct net_device *dev); struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 5755ae5a4712..6a869682c120 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -14,7 +14,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 -#define OCR_MODE_MASK 0x07 +#define OCR_MODE_MASK 0x03 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f2a9f2274c3b..3410aecffdb4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -68,6 +68,7 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; @@ -106,6 +107,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup *cgroup_get_from_path(const char *path); struct cgroup *cgroup_get_from_fd(int fd); +struct cgroup *cgroup_v1v2_get_from_fd(int fd); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); @@ -308,72 +310,25 @@ void css_task_iter_end(struct css_task_iter *it); * Inline functions. */ +#ifdef CONFIG_DEBUG_CGROUP_REF +void css_get(struct cgroup_subsys_state *css); +void css_get_many(struct cgroup_subsys_state *css, unsigned int n); +bool css_tryget(struct cgroup_subsys_state *css); +bool css_tryget_online(struct cgroup_subsys_state *css); +void css_put(struct cgroup_subsys_state *css); +void css_put_many(struct cgroup_subsys_state *css, unsigned int n); +#else +#define CGROUP_REF_FN_ATTRS static inline +#define CGROUP_REF_EXPORT(fn) +#include <linux/cgroup_refcnt.h> +#endif + static inline u64 cgroup_id(const struct cgroup *cgrp) { return cgrp->kn->id; } /** - * css_get - obtain a reference on the specified css - * @css: target css - * - * The caller must already have a reference. - */ -static inline void css_get(struct cgroup_subsys_state *css) -{ - if (!(css->flags & CSS_NO_REF)) - percpu_ref_get(&css->refcnt); -} - -/** - * css_get_many - obtain references on the specified css - * @css: target css - * @n: number of references to get - * - * The caller must already have a reference. - */ -static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) -{ - if (!(css->flags & CSS_NO_REF)) - percpu_ref_get_many(&css->refcnt, n); -} - -/** - * css_tryget - try to obtain a reference on the specified css - * @css: target css - * - * Obtain a reference on @css unless it already has reached zero and is - * being released. This function doesn't care whether @css is on or - * offline. The caller naturally needs to ensure that @css is accessible - * but doesn't have to be holding a reference on it - IOW, RCU protected - * access is good enough for this function. Returns %true if a reference - * count was successfully obtained; %false otherwise. - */ -static inline bool css_tryget(struct cgroup_subsys_state *css) -{ - if (!(css->flags & CSS_NO_REF)) - return percpu_ref_tryget(&css->refcnt); - return true; -} - -/** - * css_tryget_online - try to obtain a reference on the specified css if online - * @css: target css - * - * Obtain a reference on @css if it's online. The caller naturally needs - * to ensure that @css is accessible but doesn't have to be holding a - * reference on it - IOW, RCU protected access is good enough for this - * function. Returns %true if a reference count was successfully obtained; - * %false otherwise. - */ -static inline bool css_tryget_online(struct cgroup_subsys_state *css) -{ - if (!(css->flags & CSS_NO_REF)) - return percpu_ref_tryget_live(&css->refcnt); - return true; -} - -/** * css_is_dying - test whether the specified css is dying * @css: target css * @@ -393,31 +348,6 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css) return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); } -/** - * css_put - put a css reference - * @css: target css - * - * Put a reference obtained via css_get() and css_tryget_online(). - */ -static inline void css_put(struct cgroup_subsys_state *css) -{ - if (!(css->flags & CSS_NO_REF)) - percpu_ref_put(&css->refcnt); -} - -/** - * css_put_many - put css references - * @css: target css - * @n: number of references to put - * - * Put references obtained via css_get() and css_tryget_online(). - */ -static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) -{ - if (!(css->flags & CSS_NO_REF)) - percpu_ref_put_many(&css->refcnt, n); -} - static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); diff --git a/include/linux/cgroup_refcnt.h b/include/linux/cgroup_refcnt.h new file mode 100644 index 000000000000..2eea0a69ecfc --- /dev/null +++ b/include/linux/cgroup_refcnt.h @@ -0,0 +1,96 @@ +/** + * css_get - obtain a reference on the specified css + * @css: target css + * + * The caller must already have a reference. + */ +CGROUP_REF_FN_ATTRS +void css_get(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + percpu_ref_get(&css->refcnt); +} +CGROUP_REF_EXPORT(css_get) + +/** + * css_get_many - obtain references on the specified css + * @css: target css + * @n: number of references to get + * + * The caller must already have a reference. + */ +CGROUP_REF_FN_ATTRS +void css_get_many(struct cgroup_subsys_state *css, unsigned int n) +{ + if (!(css->flags & CSS_NO_REF)) + percpu_ref_get_many(&css->refcnt, n); +} +CGROUP_REF_EXPORT(css_get_many) + +/** + * css_tryget - try to obtain a reference on the specified css + * @css: target css + * + * Obtain a reference on @css unless it already has reached zero and is + * being released. This function doesn't care whether @css is on or + * offline. The caller naturally needs to ensure that @css is accessible + * but doesn't have to be holding a reference on it - IOW, RCU protected + * access is good enough for this function. Returns %true if a reference + * count was successfully obtained; %false otherwise. + */ +CGROUP_REF_FN_ATTRS +bool css_tryget(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + return percpu_ref_tryget(&css->refcnt); + return true; +} +CGROUP_REF_EXPORT(css_tryget) + +/** + * css_tryget_online - try to obtain a reference on the specified css if online + * @css: target css + * + * Obtain a reference on @css if it's online. The caller naturally needs + * to ensure that @css is accessible but doesn't have to be holding a + * reference on it - IOW, RCU protected access is good enough for this + * function. Returns %true if a reference count was successfully obtained; + * %false otherwise. + */ +CGROUP_REF_FN_ATTRS +bool css_tryget_online(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + return percpu_ref_tryget_live(&css->refcnt); + return true; +} +CGROUP_REF_EXPORT(css_tryget_online) + +/** + * css_put - put a css reference + * @css: target css + * + * Put a reference obtained via css_get() and css_tryget_online(). + */ +CGROUP_REF_FN_ATTRS +void css_put(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + percpu_ref_put(&css->refcnt); +} +CGROUP_REF_EXPORT(css_put) + +/** + * css_put_many - put css references + * @css: target css + * @n: number of references to put + * + * Put references obtained via css_get() and css_tryget_online(). + */ +CGROUP_REF_FN_ATTRS +void css_put_many(struct cgroup_subsys_state *css, unsigned int n) +{ + if (!(css->flags & CSS_NO_REF)) + percpu_ref_put_many(&css->refcnt, n); +} +CGROUP_REF_EXPORT(css_put_many) diff --git a/include/linux/compat.h b/include/linux/compat.h index 594357881b0b..44b1736c95b5 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -126,11 +126,9 @@ struct compat_tms { #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) -#ifndef compat_sigset_t typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; -#endif int set_compat_user_sigmask(const compat_sigset_t __user *umask, size_t sigsetsize); diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 97cfd13bae51..2606711adb18 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -204,8 +204,6 @@ static struct configfs_bin_attribute _pfx##attr_##_name = { \ * group children. default_groups may coexist alongsize make_group() or * make_item(), but if the group wishes to have only default_groups * children (disallowing mkdir(2)), it need not provide either function. - * If the group has commit(), it supports pending and committed (active) - * items. */ struct configfs_item_operations { void (*release)(struct config_item *); @@ -216,7 +214,6 @@ struct configfs_item_operations { struct configfs_group_operations { struct config_item *(*make_item)(struct config_group *group, const char *name); struct config_group *(*make_group)(struct config_group *group, const char *name); - int (*commit_item)(struct config_item *item); void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); }; diff --git a/include/linux/console.h b/include/linux/console.h index 8c1686e2c233..9cea254b34b8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -15,6 +15,7 @@ #define _LINUX_CONSOLE_H_ 1 #include <linux/atomic.h> +#include <linux/rculist.h> #include <linux/types.h> struct vc_data; @@ -154,14 +155,132 @@ struct console { u64 seq; unsigned long dropped; void *data; - struct console *next; + struct hlist_node node; }; +#ifdef CONFIG_LOCKDEP +extern void lockdep_assert_console_list_lock_held(void); +#else +static inline void lockdep_assert_console_list_lock_held(void) +{ +} +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern bool console_srcu_read_lock_is_held(void); +#else +static inline bool console_srcu_read_lock_is_held(void) +{ + return 1; +} +#endif + +extern int console_srcu_read_lock(void); +extern void console_srcu_read_unlock(int cookie); + +extern void console_list_lock(void) __acquires(console_mutex); +extern void console_list_unlock(void) __releases(console_mutex); + +extern struct hlist_head console_list; + +/** + * console_srcu_read_flags - Locklessly read the console flags + * @con: struct console pointer of console to read flags from + * + * This function provides the necessary READ_ONCE() and data_race() + * notation for locklessly reading the console flags. The READ_ONCE() + * in this function matches the WRITE_ONCE() when @flags are modified + * for registered consoles with console_srcu_write_flags(). + * + * Only use this function to read console flags when locklessly + * iterating the console list via srcu. + * + * Context: Any context. + */ +static inline short console_srcu_read_flags(const struct console *con) +{ + WARN_ON_ONCE(!console_srcu_read_lock_is_held()); + + /* + * Locklessly reading console->flags provides a consistent + * read value because there is at most one CPU modifying + * console->flags and that CPU is using only read-modify-write + * operations to do so. + */ + return data_race(READ_ONCE(con->flags)); +} + +/** + * console_srcu_write_flags - Write flags for a registered console + * @con: struct console pointer of console to write flags to + * @flags: new flags value to write + * + * Only use this function to write flags for registered consoles. It + * requires holding the console_list_lock. + * + * Context: Any context. + */ +static inline void console_srcu_write_flags(struct console *con, short flags) +{ + lockdep_assert_console_list_lock_held(); + + /* This matches the READ_ONCE() in console_srcu_read_flags(). */ + WRITE_ONCE(con->flags, flags); +} + +/* Variant of console_is_registered() when the console_list_lock is held. */ +static inline bool console_is_registered_locked(const struct console *con) +{ + lockdep_assert_console_list_lock_held(); + return !hlist_unhashed(&con->node); +} + /* - * for_each_console() allows you to iterate on each console + * console_is_registered - Check if the console is registered + * @con: struct console pointer of console to check + * + * Context: Process context. May sleep while acquiring console list lock. + * Return: true if the console is in the console list, otherwise false. + * + * If false is returned for a console that was previously registered, it + * can be assumed that the console's unregistration is fully completed, + * including the exit() callback after console list removal. + */ +static inline bool console_is_registered(const struct console *con) +{ + bool ret; + + console_list_lock(); + ret = console_is_registered_locked(con); + console_list_unlock(); + return ret; +} + +/** + * for_each_console_srcu() - Iterator over registered consoles + * @con: struct console pointer used as loop cursor + * + * Although SRCU guarantees the console list will be consistent, the + * struct console fields may be updated by other CPUs while iterating. + * + * Requires console_srcu_read_lock to be held. Can be invoked from + * any context. + */ +#define for_each_console_srcu(con) \ + hlist_for_each_entry_srcu(con, &console_list, node, \ + console_srcu_read_lock_is_held()) + +/** + * for_each_console() - Iterator over registered consoles + * @con: struct console pointer used as loop cursor + * + * The console list and the console->flags are immutable while iterating. + * + * Requires console_list_lock to be held. */ -#define for_each_console(con) \ - for (con = console_drivers; con != NULL; con = con->next) +#define for_each_console(con) \ + lockdep_assert_console_list_lock_held(); \ + hlist_for_each_entry(con, &console_list, node) extern int console_set_on_cmdline; extern struct console *early_console; @@ -172,9 +291,9 @@ enum con_flush_mode { }; extern int add_preferred_console(char *name, int idx, char *options); +extern void console_force_preferred_locked(struct console *con); extern void register_console(struct console *); extern int unregister_console(struct console *); -extern struct console *console_drivers; extern void console_lock(void); extern int console_trylock(void); extern void console_unlock(void); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 08a1d3e7e46d..d3eba4360150 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -18,10 +18,10 @@ struct core_vma_metadata { struct coredump_params { const kernel_siginfo_t *siginfo; - struct pt_regs *regs; struct file *file; unsigned long limit; unsigned long mm_flags; + int cpu; loff_t written; loff_t pos; loff_t to_skip; diff --git a/include/linux/counter.h b/include/linux/counter.h index c41fa602ed28..b63746637de2 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -542,11 +542,10 @@ struct counter_array { #define DEFINE_COUNTER_ARRAY_CAPTURE(_name, _length) \ DEFINE_COUNTER_ARRAY_U64(_name, _length) -#define DEFINE_COUNTER_ARRAY_POLARITY(_name, _enums, _length) \ - DEFINE_COUNTER_AVAILABLE(_name##_available, _enums); \ +#define DEFINE_COUNTER_ARRAY_POLARITY(_name, _available, _length) \ struct counter_array _name = { \ .type = COUNTER_COMP_SIGNAL_POLARITY, \ - .avail = &(_name##_available), \ + .avail = &(_available), \ .length = (_length), \ } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d5595d57f4e5..6a94a6eaad27 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1110,10 +1110,10 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int parse_perf_domain(int cpu, const char *list_name, - const char *cell_name) + const char *cell_name, + struct of_phandle_args *args) { struct device_node *cpu_np; - struct of_phandle_args args; int ret; cpu_np = of_cpu_device_node_get(cpu); @@ -1121,41 +1121,44 @@ static inline int parse_perf_domain(int cpu, const char *list_name, return -ENODEV; ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0, - &args); + args); if (ret < 0) return ret; of_node_put(cpu_np); - return args.args[0]; + return 0; } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { - int target_idx; int cpu, ret; + struct of_phandle_args args; - ret = parse_perf_domain(pcpu, list_name, cell_name); + ret = parse_perf_domain(pcpu, list_name, cell_name, pargs); if (ret < 0) return ret; - target_idx = ret; cpumask_set_cpu(pcpu, cpumask); for_each_possible_cpu(cpu) { if (cpu == pcpu) continue; - ret = parse_perf_domain(cpu, list_name, cell_name); + ret = parse_perf_domain(cpu, list_name, cell_name, &args); if (ret < 0) continue; - if (target_idx == ret) + if (pargs->np == args.np && pargs->args_count == args.args_count && + !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count)) cpumask_set_cpu(cpu, cpumask); + + of_node_put(args.np); } - return target_idx; + return 0; } #else static inline int cpufreq_boost_trigger_state(int state) @@ -1185,7 +1188,8 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { return -EOPNOTSUPP; } diff --git a/include/linux/damon.h b/include/linux/damon.h index 620ada094c3b..84525b9cdf6e 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -21,7 +21,7 @@ /* Get a random number in [l, r) */ static inline unsigned long damon_rand(unsigned long l, unsigned long r) { - return l + prandom_u32_max(r - l); + return l + get_random_u32_below(r - l); } /** diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index f60674692d36..ea2d919fd9c7 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -45,7 +45,7 @@ struct debugfs_u32_array { extern struct dentry *arch_debugfs_dir; -#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ +#define DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ __simple_attr_check_format(__fmt, 0ull); \ @@ -56,10 +56,16 @@ static const struct file_operations __fops = { \ .open = __fops ## _open, \ .release = simple_attr_release, \ .read = debugfs_attr_read, \ - .write = debugfs_attr_write, \ + .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \ .llseek = no_llseek, \ } +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ + DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) + +#define DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ + DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) + typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); #if defined(CONFIG_DEBUG_FS) @@ -102,6 +108,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t debugfs_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); +ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -254,6 +262,13 @@ static inline ssize_t debugfs_attr_write(struct file *file, return -ENODEV; } +static inline ssize_t debugfs_attr_write_signed(struct file *file, + const char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 34aab4dd336c..4dc7cda4fd46 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -152,8 +152,8 @@ struct devfreq_stats { * @max_state: count of entry present in the frequency table. * @previous_freq: previously configured frequency value. * @last_status: devfreq user device info, performance statistics - * @data: Private data of the governor. The devfreq framework does not - * touch this. + * @data: devfreq driver pass to governors, governor should not change it. + * @governor_data: private data for governors, devfreq core doesn't touch it. * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs) * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs) * @scaling_min_freq: Limit minimum frequency requested by OPP interface @@ -193,7 +193,8 @@ struct devfreq { unsigned long previous_freq; struct devfreq_dev_status last_status; - void *data; /* private data for governors */ + void *data; + void *governor_data; struct dev_pm_qos_request user_min_freq_req; struct dev_pm_qos_request user_max_freq_req; diff --git a/include/linux/device.h b/include/linux/device.h index 424b55df0272..c90a444be1c4 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -378,10 +378,8 @@ struct dev_links_info { * @data: Pointer to MSI device data */ struct dev_msi_info { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - struct irq_domain *domain; -#endif #ifdef CONFIG_GENERIC_MSI_IRQ + struct irq_domain *domain; struct msi_device_data *data; #endif }; @@ -742,7 +740,7 @@ static inline void set_dev_node(struct device *dev, int node) static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ return dev->msi.domain; #else return NULL; @@ -751,7 +749,7 @@ static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d) { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ dev->msi.domain = d; #endif } diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index 50be7cbd93a5..b1b5720d89a5 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -61,9 +61,9 @@ struct sk_buff; /* Special struct emulating a Ethernet header */ struct qca_mgmt_ethhdr { - u32 command; /* command bit 31:0 */ - u32 seq; /* seq 63:32 */ - u32 mdio_data; /* first 4byte mdio */ + __le32 command; /* command bit 31:0 */ + __le32 seq; /* seq 63:32 */ + __le32 mdio_data; /* first 4byte mdio */ __be16 hdr; /* qca hdr */ } __packed; @@ -73,7 +73,7 @@ enum mdio_cmd { }; struct mib_ethhdr { - u32 data[3]; /* first 3 mib counter */ + __le32 data[3]; /* first 3 mib counter */ __be16 hdr; /* qca hdr */ } __packed; diff --git a/include/linux/efi.h b/include/linux/efi.h index da3974bf05d3..7603fc58c47c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -389,6 +389,7 @@ void efi_native_runtime_setup(void); #define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d) #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) +#define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) @@ -1085,9 +1086,6 @@ efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor, efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data); -efi_status_t check_var_size(u32 attributes, unsigned long size); -efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size); - #if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER) extern bool efi_capsule_pending(int *reset_type); @@ -1225,7 +1223,7 @@ efi_status_t efi_random_get_seed(void); arch_efi_call_virt_teardown(); \ }) -#define EFI_RANDOM_SEED_SIZE 64U +#define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE struct linux_efi_random_seed { u32 size; diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 346a8b56cdc8..9ec81290e3c8 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -88,22 +88,13 @@ static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* { #if defined (ELF_CORE_COPY_TASK_REGS) return ELF_CORE_COPY_TASK_REGS(t, elfregs); -#elif defined (task_pt_regs) +#else elf_core_copy_regs(elfregs, task_pt_regs(t)); #endif return 0; } -extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); - -static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_regs *regs, elf_fpregset_t *fpu) -{ -#ifdef ELF_CORE_COPY_FPREGS - return ELF_CORE_COPY_FPREGS(t, fpu); -#else - return dump_fpu(regs, fpu); -#endif -} +int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu); #ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 30eb30d6909b..3cd202d3eefb 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -61,7 +61,7 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) return ERR_PTR(-ENOSYS); } -static inline int eventfd_signal(struct eventfd_ctx *ctx, int n) +static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n) { return -ENOSYS; } diff --git a/include/linux/evm.h b/include/linux/evm.h index aa63e0b3c0a2..7a9ee2157f69 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -35,6 +35,27 @@ extern int evm_inode_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *xattr_name); extern void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name); +static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + evm_inode_post_removexattr(dentry, acl_name); +} +extern int evm_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); +static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return evm_inode_set_acl(mnt_userns, dentry, acl_name, NULL); +} +static inline void evm_inode_post_set_acl(struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ + return evm_inode_post_setxattr(dentry, acl_name, NULL, 0); +} extern int evm_inode_init_security(struct inode *inode, const struct xattr *xattr_array, struct xattr *evm); @@ -108,6 +129,34 @@ static inline void evm_inode_post_removexattr(struct dentry *dentry, return; } +static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return; +} + +static inline int evm_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl) +{ + return 0; +} + +static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return 0; +} + +static inline void evm_inode_post_set_acl(struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ + return; +} + static inline int evm_inode_init_security(struct inode *inode, const struct xattr *xattr_array, struct xattr *evm) diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ } #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/include/linux/fb.h b/include/linux/fb.h index 0aff76bcbb00..bcb8658f5b64 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -555,7 +555,7 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { #elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || \ defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || \ - defined(__arm__) || defined(__aarch64__) + defined(__arm__) || defined(__aarch64__) || defined(__mips__) #define fb_readb __raw_readb #define fb_readw __raw_readw diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 4029fe368a4f..1067a8450826 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -43,11 +43,24 @@ extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen); extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat); extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy); #else -#define __underlying_memchr __builtin_memchr -#define __underlying_memcmp __builtin_memcmp + +#if defined(__SANITIZE_MEMORY__) +/* + * For KMSAN builds all memcpy/memset/memmove calls should be replaced by the + * corresponding __msan_XXX functions. + */ +#include <linux/kmsan_string.h> +#define __underlying_memcpy __msan_memcpy +#define __underlying_memmove __msan_memmove +#define __underlying_memset __msan_memset +#else #define __underlying_memcpy __builtin_memcpy #define __underlying_memmove __builtin_memmove #define __underlying_memset __builtin_memset +#endif + +#define __underlying_memchr __builtin_memchr +#define __underlying_memcmp __builtin_memcmp #define __underlying_strcat __builtin_strcat #define __underlying_strcpy __builtin_strcpy #define __underlying_strlen __builtin_strlen @@ -441,13 +454,18 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, #define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ p_size_field, q_size_field, op) ({ \ - size_t __fortify_size = (size_t)(size); \ - WARN_ONCE(fortify_memcpy_chk(__fortify_size, p_size, q_size, \ - p_size_field, q_size_field, #op), \ + const size_t __fortify_size = (size_t)(size); \ + const size_t __p_size = (p_size); \ + const size_t __q_size = (q_size); \ + const size_t __p_size_field = (p_size_field); \ + const size_t __q_size_field = (q_size_field); \ + WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \ + __q_size, __p_size_field, \ + __q_size_field, #op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ - p_size_field); \ + __p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) diff --git a/include/linux/fs.h b/include/linux/fs.h index e654435f1651..066555ad1bf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -560,8 +560,8 @@ struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) /* * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to - * cache the ACL. This also means that ->get_acl() can be called in RCU mode - * with the LOOKUP_RCU flag. + * cache the ACL. This also means that ->get_inode_acl() can be called in RCU + * mode with the LOOKUP_RCU flag. */ #define ACL_DONT_CACHE ((void *)(-3)) @@ -1131,9 +1131,8 @@ struct file_lock_context { /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX -#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) -#define OFFSET_MAX INT_LIMIT(loff_t) -#define OFFT_OFFSET_MAX INT_LIMIT(off_t) +#define OFFSET_MAX type_max(loff_t) +#define OFFT_OFFSET_MAX type_max(off_t) #endif extern void send_sigio(struct fown_struct *fown, int fd, int band); @@ -1170,6 +1169,7 @@ extern int locks_delete_block(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +bool vfs_inode_has_locks(struct inode *inode); extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec64 *time); @@ -1186,6 +1186,13 @@ extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); extern bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner); + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return smp_load_acquire(&inode->i_flctx); +} + #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1284,6 +1291,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } +static inline bool vfs_inode_has_locks(struct inode *inode) +{ + return false; +} + static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) { return -ENOLCK; @@ -1326,6 +1338,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, { return false; } + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return NULL; +} + #endif /* !CONFIG_FILE_LOCKING */ static inline struct inode *file_inode(const struct file *f) @@ -1613,23 +1632,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) } /** - * i_uid_into_mnt - map an inode's i_uid down into a mnt_userns - * @mnt_userns: user namespace of the mount the inode was found from - * @inode: inode to map - * - * Note, this will eventually be removed completely in favor of the type-safe - * i_uid_into_vfsuid(). - * - * Return: the inode's i_uid mapped down according to @mnt_userns. - * If the inode's i_uid has no mapping INVALID_UID is returned. - */ -static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, - const struct inode *inode) -{ - return AS_KUIDT(make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid)); -} - -/** * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns * @mnt_userns: user namespace of the mount the inode was found from * @inode: inode to map @@ -1682,23 +1684,6 @@ static inline void i_uid_update(struct user_namespace *mnt_userns, } /** - * i_gid_into_mnt - map an inode's i_gid down into a mnt_userns - * @mnt_userns: user namespace of the mount the inode was found from - * @inode: inode to map - * - * Note, this will eventually be removed completely in favor of the type-safe - * i_gid_into_vfsgid(). - * - * Return: the inode's i_gid mapped down according to @mnt_userns. - * If the inode's i_gid has no mapping INVALID_GID is returned. - */ -static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, - const struct inode *inode) -{ - return AS_KGIDT(make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid)); -} - -/** * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns * @mnt_userns: user namespace of the mount the inode was found from * @inode: inode to map @@ -2089,6 +2074,14 @@ struct dir_context { */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) +/* + * These flags control the behavior of vfs_copy_file_range(). + * They are not available to the user via syscall. + * + * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops + */ +#define COPY_FILE_SPLICE (1 << 0) + struct iov_iter; struct io_uring_cmd; @@ -2142,7 +2135,7 @@ struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct user_namespace *, struct inode *, int); - struct posix_acl * (*get_acl)(struct inode *, int, bool); + struct posix_acl * (*get_inode_acl)(struct inode *, int, bool); int (*readlink) (struct dentry *, char __user *,int); @@ -2172,7 +2165,9 @@ struct inode_operations { umode_t create_mode); int (*tmpfile) (struct user_namespace *, struct inode *, struct file *, umode_t); - int (*set_acl)(struct user_namespace *, struct inode *, + struct posix_acl *(*get_acl)(struct user_namespace *, struct dentry *, + int); + int (*set_acl)(struct user_namespace *, struct dentry *, struct posix_acl *, int); int (*fileattr_set)(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa); @@ -2732,18 +2727,22 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) return mnt_user_ns(file->f_path.mnt); } +static inline struct mnt_idmap *file_mnt_idmap(struct file *file) +{ + return mnt_idmap(file->f_path.mnt); +} + /** * is_idmapped_mnt - check whether a mount is mapped * @mnt: the mount to check * - * If @mnt has an idmapping attached different from the - * filesystem's idmapping then @mnt is mapped. + * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped. * * Return: true if mount is mapped, false if not. */ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) { - return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; + return mnt_idmap(mnt) != &nop_mnt_idmap; } extern long vfs_truncate(const struct path *, loff_t); @@ -3104,7 +3103,7 @@ extern void __destroy_inode(struct inode *); extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); -extern int should_remove_suid(struct dentry *); +extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *); extern int file_remove_privs(struct file *); /* @@ -3485,7 +3484,7 @@ void simple_transaction_set(struct file *file, size_t n); * All attributes contain a text representation of a numeric value * that are accessed with the get() and set() functions. */ -#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ +#define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ __simple_attr_check_format(__fmt, 0ull); \ @@ -3496,10 +3495,16 @@ static const struct file_operations __fops = { \ .open = __fops ## _open, \ .release = simple_attr_release, \ .read = simple_attr_read, \ - .write = simple_attr_write, \ + .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \ .llseek = generic_file_llseek, \ } +#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ + DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) + +#define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ + DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) + static inline __printf(1, 2) void __simple_attr_check_format(const char *fmt, ...) { @@ -3514,6 +3519,8 @@ ssize_t simple_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); +ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); @@ -3527,7 +3534,7 @@ int __init list_bdev_fs_names(char *buf, size_t size); static inline bool is_sxid(umode_t mode) { - return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); + return mode & (S_ISUID | S_ISGID); } static inline int check_sticky(struct user_namespace *mnt_userns, diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 6fbf49cc10e4..5469ffee21c7 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -145,20 +145,6 @@ extern void fc_drop_locked(struct fs_context *fc); int reconfigure_single(struct super_block *s, int flags, void *data); -/* - * sget() wrappers to be called from the ->get_tree() op. - */ -enum vfs_get_super_keying { - vfs_get_single_super, /* Only one such superblock may exist */ - vfs_get_single_reconf_super, /* As above, but reconfigure if it exists */ - vfs_get_keyed_super, /* Superblocks with different s_fs_info keys may exist */ - vfs_get_independent_super, /* Multiple independent superblocks may exist */ -}; -extern int vfs_get_super(struct fs_context *fc, - enum vfs_get_super_keying keying, - int (*fill_super)(struct super_block *sb, - struct fs_context *fc)); - extern int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index f103c91139d4..01542c4b87a2 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -76,6 +76,7 @@ static inline int fs_parse(struct fs_context *fc, extern int fs_lookup_param(struct fs_context *fc, struct fs_parameter *param, bool want_bdev, + unsigned int flags, struct path *_path); extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 36e5dd84cf59..8e312c8323a8 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -75,7 +75,7 @@ struct fscache_volume { atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; unsigned int key_hash; /* Hash of key string */ - char *key; /* Volume ID, eg. "afs@example.com@1234" */ + u8 *key; /* Volume ID, eg. "afs@example.com@1234" */ struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ struct hlist_bl_node hash_link; /* Link in hash table */ struct work_struct work; diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index cad78b569c7e..4f5f8a651213 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -307,7 +307,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -void fscrypt_sb_delete(struct super_block *sb); +void fscrypt_destroy_keyring(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_add_test_dummy_key(struct super_block *sb, const struct fscrypt_dummy_policy *dummy_policy); @@ -521,7 +521,7 @@ fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) } /* keyring.c */ -static inline void fscrypt_sb_delete(struct super_block *sb) +static inline void fscrypt_destroy_keyring(struct super_block *sb) { } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 62557d4bffc2..99f1146614c0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -37,9 +37,10 @@ extern void ftrace_boot_snapshot(void); static inline void ftrace_boot_snapshot(void) { } #endif -#ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops; struct ftrace_regs; + +#ifdef CONFIG_FUNCTION_TRACER /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that @@ -110,12 +111,11 @@ struct ftrace_regs { #define arch_ftrace_get_regs(fregs) (&(fregs)->regs) /* - * ftrace_instruction_pointer_set() is to be defined by the architecture - * if to allow setting of the instruction pointer from the ftrace_regs - * when HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports - * live kernel patching. + * ftrace_regs_set_instruction_pointer() is to be defined by the architecture + * if to allow setting of the instruction pointer from the ftrace_regs when + * HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports live kernel patching. */ -#define ftrace_instruction_pointer_set(fregs, ip) do { } while (0) +#define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) @@ -126,6 +126,35 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs return arch_ftrace_get_regs(fregs); } +/* + * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs. + * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs. + */ +static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) +{ + if (IS_ENABLED(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)) + return true; + + return ftrace_get_regs(fregs) != NULL; +} + +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +#define ftrace_regs_get_instruction_pointer(fregs) \ + instruction_pointer(ftrace_get_regs(fregs)) +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(ftrace_get_regs(fregs), n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(ftrace_get_regs(fregs)) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(ftrace_get_regs(fregs)) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(ftrace_get_regs(fregs), ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(ftrace_get_regs(fregs)) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) +#endif + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); @@ -427,9 +456,7 @@ static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsi { return -ENODEV; } -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* * This must be implemented by the architecture. * It is the way the ftrace direct_ops helper, when called @@ -443,9 +470,9 @@ static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsi * the return from the trampoline jump to the direct caller * instead of going back to the function it just traced. */ -static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { } -#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifdef CONFIG_STACK_TRACER diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ef4aea3b356e..65a78773dcca 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); } +static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) +{ + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); + + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) + return; + + if (node_online(this_node)) + return; + + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); + dump_stack(); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). @@ -218,7 +232,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp_mask); return __alloc_pages(gfp_mask, order, nid, NULL); } @@ -227,7 +241,7 @@ static inline struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp); return __folio_alloc(gfp, order, nid, NULL); } diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 6aeea1071b1b..88ae4513abb5 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -27,7 +27,7 @@ struct gpio_chip; union gpio_irq_fwspec { struct irq_fwspec fwspec; -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ msi_alloc_info_t msiinfo; #endif }; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 3b42264333ef..85f7c5a63aa6 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -969,7 +969,7 @@ struct vmbus_channel { * mechanism improves throughput by: * * A) Making the host more efficient - each time it wakes up, - * potentially it will process morev number of packets. The + * potentially it will process more number of packets. The * monitor latency allows a batch to build up. * B) By deferring the hypercall to signal, we will also minimize * the interrupts. @@ -1341,6 +1341,8 @@ struct hv_ring_buffer_debug_info { int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, struct hv_ring_buffer_debug_info *debug_info); +bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel); + /* Vmbus interface */ #define vmbus_driver_register(driver) \ __vmbus_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) diff --git a/include/linux/ima.h b/include/linux/ima.h index 81708ca0ebc7..5a0b2a285a18 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -187,6 +187,15 @@ extern void ima_inode_post_setattr(struct user_namespace *mnt_userns, struct dentry *dentry); extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len); +extern int ima_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); +static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return ima_inode_set_acl(mnt_userns, dentry, acl_name, NULL); +} extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name); #else static inline bool is_ima_appraise_enabled(void) @@ -208,11 +217,26 @@ static inline int ima_inode_setxattr(struct dentry *dentry, return 0; } +static inline int ima_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl) +{ + + return 0; +} + static inline int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name) { return 0; } + +static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return 0; +} #endif /* CONFIG_IMA_APPRAISE */ #if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING) diff --git a/include/linux/init.h b/include/linux/init.h index 077d7f93b402..c5fe6d26f5b1 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -2,7 +2,9 @@ #ifndef _LINUX_INIT_H #define _LINUX_INIT_H +#include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/stringify.h> #include <linux/types.h> /* Built-in __init functions needn't be compiled with retpoline */ @@ -143,6 +145,7 @@ struct file_system_type; extern int do_one_initcall(initcall_t fn); extern char __initdata boot_command_line[]; extern char *saved_command_line; +extern unsigned int saved_command_line_len; extern unsigned int reset_devices; /* used by init/main.c */ diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 66a774d2710e..09d4f17c8d3b 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -213,7 +213,7 @@ io_mapping_free(struct io_mapping *iomap) kfree(iomap); } -#endif /* _LINUX_IO_MAPPING_H */ - int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size); + +#endif /* _LINUX_IO_MAPPING_H */ diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 43bc8a2edccf..0ded9e271523 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -16,6 +16,9 @@ enum io_uring_cmd_flags { IO_URING_F_SQE128 = 4, IO_URING_F_CQE32 = 8, IO_URING_F_IOPOLL = 16, + + /* the request is executed from poll, it should not be freed */ + IO_URING_F_MULTISHOT = 32, }; struct io_uring_cmd { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a325532aeab5..3c9da1f8979e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -455,7 +455,7 @@ extern void iommu_set_default_translated(bool cmd_line); extern bool iommu_default_passthrough(void); extern struct iommu_resv_region * iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, - enum iommu_resv_type type); + enum iommu_resv_type type, gfp_t gfp); extern int iommu_get_group_resv_regions(struct iommu_group *group, struct list_head *head); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 27642ca15d93..4ae3c541ea6f 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -318,6 +318,8 @@ extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); extern bool iomem_is_exclusive(u64 addr); +extern bool resource_is_exclusive(struct resource *resource, u64 addr, + resource_size_t size); extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 00d577f90883..a372086750ca 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -31,6 +31,7 @@ #define _LINUX_IRQDOMAIN_H #include <linux/types.h> +#include <linux/irqdomain_defs.h> #include <linux/irqhandler.h> #include <linux/of.h> #include <linux/mutex.h> @@ -45,6 +46,7 @@ struct irq_desc; struct cpumask; struct seq_file; struct irq_affinity_desc; +struct msi_parent_ops; #define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16 @@ -68,27 +70,6 @@ struct irq_fwspec { void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, unsigned int count, struct irq_fwspec *fwspec); -/* - * Should several domains have the same device node, but serve - * different purposes (for example one domain is for PCI/MSI, and the - * other for wired IRQs), they can be distinguished using a - * bus-specific token. Most domains are expected to only carry - * DOMAIN_BUS_ANY. - */ -enum irq_domain_bus_token { - DOMAIN_BUS_ANY = 0, - DOMAIN_BUS_WIRED, - DOMAIN_BUS_GENERIC_MSI, - DOMAIN_BUS_PCI_MSI, - DOMAIN_BUS_PLATFORM_MSI, - DOMAIN_BUS_NEXUS, - DOMAIN_BUS_IPI, - DOMAIN_BUS_FSL_MC_MSI, - DOMAIN_BUS_TI_SCI_INTA_MSI, - DOMAIN_BUS_WAKEUP, - DOMAIN_BUS_VMD_MSI, -}; - /** * struct irq_domain_ops - Methods for irq_domain objects * @match: Match an interrupt controller device node to a host, returns @@ -137,53 +118,61 @@ struct irq_domain_chip_generic; /** * struct irq_domain - Hardware interrupt number translation object - * @link: Element in global irq_domain list. - * @name: Name of interrupt domain - * @ops: pointer to irq_domain methods - * @host_data: private data pointer for use by owner. Not touched by irq_domain - * core code. - * @flags: host per irq_domain flags - * @mapcount: The number of mapped interrupts + * @link: Element in global irq_domain list. + * @name: Name of interrupt domain + * @ops: Pointer to irq_domain methods + * @host_data: Private data pointer for use by owner. Not touched by irq_domain + * core code. + * @flags: Per irq_domain flags + * @mapcount: The number of mapped interrupts * - * Optional elements - * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy - * to swap it for the of_node via the irq_domain_get_of_node accessor - * @gc: Pointer to a list of generic chips. There is a helper function for - * setting up one or more generic chips for interrupt controllers - * drivers using the generic chip library which uses this pointer. - * @dev: Pointer to a device that the domain represent, and that will be - * used for power management purposes. - * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * Optional elements: + * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy + * to swap it for the of_node via the irq_domain_get_of_node accessor + * @gc: Pointer to a list of generic chips. There is a helper function for + * setting up one or more generic chips for interrupt controllers + * drivers using the generic chip library which uses this pointer. + * @dev: Pointer to the device which instantiated the irqdomain + * With per device irq domains this is not necessarily the same + * as @pm_dev. + * @pm_dev: Pointer to a device that can be utilized for power management + * purposes related to the irq domain. + * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init * - * Revmap data, used internally by irq_domain - * @revmap_size: Size of the linear map table @revmap[] - * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map - * @revmap_mutex: Lock for the revmap - * @revmap: Linear table of irq_data pointers + * Revmap data, used internally by the irq domain code: + * @revmap_size: Size of the linear map table @revmap[] + * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map + * @revmap_mutex: Lock for the revmap + * @revmap: Linear table of irq_data pointers */ struct irq_domain { - struct list_head link; - const char *name; - const struct irq_domain_ops *ops; - void *host_data; - unsigned int flags; - unsigned int mapcount; + struct list_head link; + const char *name; + const struct irq_domain_ops *ops; + void *host_data; + unsigned int flags; + unsigned int mapcount; /* Optional data */ - struct fwnode_handle *fwnode; - enum irq_domain_bus_token bus_token; - struct irq_domain_chip_generic *gc; - struct device *dev; + struct fwnode_handle *fwnode; + enum irq_domain_bus_token bus_token; + struct irq_domain_chip_generic *gc; + struct device *dev; + struct device *pm_dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - struct irq_domain *parent; + struct irq_domain *parent; +#endif +#ifdef CONFIG_GENERIC_MSI_IRQ + const struct msi_parent_ops *msi_parent_ops; #endif /* reverse map data. The linear map gets appended to the irq_domain */ - irq_hw_number_t hwirq_max; - unsigned int revmap_size; - struct radix_tree_root revmap_tree; - struct mutex revmap_mutex; - struct irq_data __rcu *revmap[]; + irq_hw_number_t hwirq_max; + unsigned int revmap_size; + struct radix_tree_root revmap_tree; + struct mutex revmap_mutex; + struct irq_data __rcu *revmap[]; }; /* Irq domain flags */ @@ -206,15 +195,14 @@ enum { /* Irq domain implements MSI remapping */ IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), - /* - * Quirk to handle MSI implementations which do not provide - * masking. Currently known to affect x86, but partially - * handled in core code. - */ - IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6), - /* Irq domain doesn't translate anything */ - IRQ_DOMAIN_FLAG_NO_MAP = (1 << 7), + IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), + + /* Irq domain is a MSI parent domain */ + IRQ_DOMAIN_FLAG_MSI_PARENT = (1 << 8), + + /* Irq domain is a MSI device domain */ + IRQ_DOMAIN_FLAG_MSI_DEVICE = (1 << 9), /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved @@ -233,7 +221,7 @@ static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev) { if (d) - d->dev = dev; + d->pm_dev = dev; } #ifdef CONFIG_IRQ_DOMAIN @@ -578,6 +566,16 @@ static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); +static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; +} + +static inline bool irq_domain_is_msi_device(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE; +} + #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) @@ -623,6 +621,17 @@ irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) { return false; } + +static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) +{ + return false; +} + +static inline bool irq_domain_is_msi_device(struct irq_domain *domain) +{ + return false; +} + #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN */ diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h new file mode 100644 index 000000000000..c29921fd8cd1 --- /dev/null +++ b/include/linux/irqdomain_defs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IRQDOMAIN_DEFS_H +#define _LINUX_IRQDOMAIN_DEFS_H + +/* + * Should several domains have the same device node, but serve + * different purposes (for example one domain is for PCI/MSI, and the + * other for wired IRQs), they can be distinguished using a + * bus-specific token. Most domains are expected to only carry + * DOMAIN_BUS_ANY. + */ +enum irq_domain_bus_token { + DOMAIN_BUS_ANY = 0, + DOMAIN_BUS_WIRED, + DOMAIN_BUS_GENERIC_MSI, + DOMAIN_BUS_PCI_MSI, + DOMAIN_BUS_PLATFORM_MSI, + DOMAIN_BUS_NEXUS, + DOMAIN_BUS_IPI, + DOMAIN_BUS_FSL_MC_MSI, + DOMAIN_BUS_TI_SCI_INTA_MSI, + DOMAIN_BUS_WAKEUP, + DOMAIN_BUS_VMD_MSI, + DOMAIN_BUS_PCI_DEVICE_MSI, + DOMAIN_BUS_PCI_DEVICE_MSIX, + DOMAIN_BUS_DMAR, + DOMAIN_BUS_AMDVI, + DOMAIN_BUS_PCI_DEVICE_IMS, +}; + +#endif /* _LINUX_IRQDOMAIN_DEFS_H */ diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h index bd4c066ad39b..d426c7ad92bf 100644 --- a/include/linux/irqreturn.h +++ b/include/linux/irqreturn.h @@ -3,10 +3,10 @@ #define _LINUX_IRQRETURN_H /** - * enum irqreturn - * @IRQ_NONE interrupt was not from this device or was not handled - * @IRQ_HANDLED interrupt was handled by this device - * @IRQ_WAKE_THREAD handler requests to wake the handler thread + * enum irqreturn - irqreturn type values + * @IRQ_NONE: interrupt was not from this device or was not handled + * @IRQ_HANDLED: interrupt was handled by this device + * @IRQ_WAKE_THREAD: handler requests to wake the handler thread */ enum irqreturn { IRQ_NONE = (0 << 0), diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0b7242370b56..2170e0cc279d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1662,7 +1662,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); -int jbd2_submit_inode_data(struct jbd2_inode *jinode); +int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d811b3d7d2a1..96c9d56e5510 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -302,7 +302,7 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} #ifdef CONFIG_KASAN_GENERIC -size_t kasan_metadata_size(struct kmem_cache *cache); +size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object); slab_flags_t kasan_never_merge(void); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); @@ -315,7 +315,8 @@ void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ /* Tag-based KASAN modes do not use per-object metadata. */ -static inline size_t kasan_metadata_size(struct kmem_cache *cache) +static inline size_t kasan_metadata_size(struct kmem_cache *cache, + bool in_object) { return 0; } diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 55dc338f6bcd..ee04256f28af 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -56,7 +56,7 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary * work around for kcov's lack of nested remote coverage sections support in - * task context. Adding suport for nested sections is tracked in: + * task context. Adding support for nested sections is tracked in: * https://bugzilla.kernel.org/show_bug.cgi?id=210337 */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 41a686996aaa..5dd4343c1bbe 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -17,6 +17,7 @@ #include <linux/crash_core.h> #include <asm/io.h> +#include <linux/range.h> #include <uapi/linux/kexec.h> #include <linux/verification.h> @@ -240,14 +241,10 @@ static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 -struct crash_mem_range { - u64 start, end; -}; - struct crash_mem { unsigned int max_nr_ranges; unsigned int nr_ranges; - struct crash_mem_range ranges[]; + struct range ranges[]; }; extern int crash_exclude_mem_range(struct crash_mem *mem, diff --git a/include/linux/kmsan_string.h b/include/linux/kmsan_string.h new file mode 100644 index 000000000000..7287da6f52ef --- /dev/null +++ b/include/linux/kmsan_string.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KMSAN string functions API used in other headers. + * + * Copyright (C) 2022 Google LLC + * Author: Alexander Potapenko <glider@google.com> + * + */ +#ifndef _LINUX_KMSAN_STRING_H +#define _LINUX_KMSAN_STRING_H + +/* + * KMSAN overrides the default memcpy/memset/memmove implementations in the + * kernel, which requires having __msan_XXX function prototypes in several other + * headers. Keep them in one place instead of open-coding. + */ +void *__msan_memcpy(void *dst, const void *src, size_t size); +void *__msan_memset(void *s, int c, size_t n); +void *__msan_memmove(void *dest, const void *src, size_t len); + +#endif /* _LINUX_KMSAN_STRING_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 32f259fa5801..915142abdf76 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -416,7 +416,7 @@ static __always_inline void guest_context_enter_irqoff(void) */ if (!context_tracking_guest_enter()) { instrumentation_begin(); - rcu_virt_note_context_switch(smp_processor_id()); + rcu_virt_note_context_switch(); instrumentation_end(); } } @@ -776,6 +776,7 @@ struct kvm { struct srcu_struct srcu; struct srcu_struct irq_srcu; pid_t userspace_pid; + bool override_halt_poll_ns; unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool vm_bugged; @@ -1240,8 +1241,18 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); /** - * kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a - * given guest physical address. + * kvm_gpc_init - initialize gfn_to_pfn_cache. + * + * @gpc: struct gfn_to_pfn_cache object. + * + * This sets up a gfn_to_pfn_cache by initializing locks. Note, the cache must + * be zero-allocated (or zeroed by the caller before init). + */ +void kvm_gpc_init(struct gfn_to_pfn_cache *gpc); + +/** + * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest + * physical address. * * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. @@ -1265,9 +1276,9 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before * accessing the target page. */ -int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, - struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, - gpa_t gpa, unsigned long len); +int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, + struct kvm_vcpu *vcpu, enum pfn_cache_usage usage, + gpa_t gpa, unsigned long len); /** * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache. @@ -1324,7 +1335,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); /** - * kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache. + * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. * * @kvm: pointer to kvm instance. * @gpc: struct gfn_to_pfn_cache object. @@ -1332,7 +1343,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); * This removes a cache from the @kvm's list to be processed on MMU notifier * invocation. */ -void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); +void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc); void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); @@ -1390,6 +1401,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); +long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg); int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index c74acfa1a3fe..af38252ad704 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -35,6 +35,11 @@ enum { NDD_WORK_PENDING = 4, /* dimm supports namespace labels */ NDD_LABELING = 6, + /* + * dimm contents have changed requiring invalidation of CPU caches prior + * to activation of a region that includes this device + */ + NDD_INCOHERENT = 7, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, @@ -183,6 +188,8 @@ struct nvdimm_security_ops { int (*overwrite)(struct nvdimm *nvdimm, const struct nvdimm_key_data *key_data); int (*query_overwrite)(struct nvdimm *nvdimm); + int (*disable_master)(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data); }; enum nvdimm_fwa_state { diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 6abde829b6e5..ed6cb2ac55fa 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -145,6 +145,12 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) LSM_HOOK(int, 0, inode_removexattr, struct user_namespace *mnt_userns, struct dentry *dentry, const char *name) +LSM_HOOK(int, 0, inode_set_acl, struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) +LSM_HOOK(int, 0, inode_get_acl, struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name) +LSM_HOOK(int, 0, inode_remove_acl, struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct user_namespace *mnt_userns, struct dentry *dentry) @@ -177,6 +183,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, struct fown_struct *fown, int sig) LSM_HOOK(int, 0, file_receive, struct file *file) LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, file_truncate, struct file *file) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, unsigned long clone_flags) LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 20e70132584c..0a5ba81f7367 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -406,7 +406,9 @@ * @attr is the iattr structure containing the new file attributes. * Return 0 if permission is granted. * @path_truncate: - * Check permission before truncating a file. + * Check permission before truncating the file indicated by path. + * Note that truncation permissions may also be checked based on + * already opened files, using the @file_truncate hook. * @path contains the path structure for the file. * Return 0 if permission is granted. * @inode_getattr: @@ -432,6 +434,18 @@ * Check permission before removing the extended attribute * identified by @name for @dentry. * Return 0 if permission is granted. + * @inode_set_acl: + * Check permission before setting posix acls + * The posix acls in @kacl are identified by @acl_name. + * Return 0 if permission is granted. + * @inode_get_acl: + * Check permission before getting osix acls + * The posix acls are identified by @acl_name. + * Return 0 if permission is granted. + * @inode_remove_acl: + * Check permission before removing posix acls + * The posix acls are identified by @acl_name. + * Return 0 if permission is granted. * @inode_getsecurity: * Retrieve a copy of the extended attribute representation of the * security label associated with @name for @inode via @buffer. Note that @@ -597,6 +611,12 @@ * to receive an open file descriptor via socket IPC. * @file contains the file structure being received. * Return 0 if permission is granted. + * @file_truncate: + * Check permission before truncating a file, i.e. using ftruncate. + * Note that truncation permission may also be checked based on the path, + * using the @path_truncate hook. + * @file contains the file structure for the file. + * Return 0 if permission is granted. * @file_open: * Save open-time permission checking state for later use upon * file_permission, and recheck access if anything has changed diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 2effab72add1..e594db58a0f1 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -638,6 +638,12 @@ static inline void mt_set_in_rcu(struct maple_tree *mt) } } +static inline unsigned int mt_height(const struct maple_tree *mt) + +{ + return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET; +} + void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max); void *mt_find_after(struct maple_tree *mt, unsigned long *index, unsigned long max); @@ -664,6 +670,7 @@ extern atomic_t maple_tree_tests_passed; void mt_dump(const struct maple_tree *mt); void mt_validate(struct maple_tree *mt); +void mt_cache_shrink(void); #define MT_BUG_ON(__tree, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ diff --git a/include/linux/math64.h b/include/linux/math64.h index a14f40de1dca..8958f4c005c1 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -29,7 +29,7 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) return dividend / divisor; } -/* +/** * div_s64_rem - signed 64bit divide with 32bit divisor with remainder * @dividend: signed 64bit dividend * @divisor: signed 32bit divisor @@ -43,7 +43,7 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) return dividend / divisor; } -/* +/** * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder * @dividend: unsigned 64bit dividend * @divisor: unsigned 64bit divisor @@ -57,7 +57,7 @@ static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) return dividend / divisor; } -/* +/** * div64_u64 - unsigned 64bit divide with 64bit divisor * @dividend: unsigned 64bit dividend * @divisor: unsigned 64bit divisor @@ -69,7 +69,7 @@ static inline u64 div64_u64(u64 dividend, u64 divisor) return dividend / divisor; } -/* +/** * div64_s64 - signed 64bit divide with 64bit divisor * @dividend: signed 64bit dividend * @divisor: signed 64bit divisor @@ -120,6 +120,8 @@ extern s64 div64_s64(s64 dividend, s64 divisor); * This is the most common 64bit divide and should be used if possible, * as many 32bit archs can optimize this variant better than a full 64bit * divide. + * + * Return: dividend / divisor */ #ifndef div_u64 static inline u64 div_u64(u64 dividend, u32 divisor) @@ -133,6 +135,8 @@ static inline u64 div_u64(u64 dividend, u32 divisor) * div_s64 - signed 64bit divide with 32bit divisor * @dividend: signed 64bit dividend * @divisor: signed 32bit divisor + * + * Return: dividend / divisor */ #ifndef div_s64 static inline s64 div_s64(s64 dividend, s32 divisor) @@ -284,6 +288,16 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); +/** + * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up + * @ll: unsigned 64bit dividend + * @d: unsigned 64bit divisor + * + * Divide unsigned 64bit dividend by unsigned 64bit divisor + * and round up. + * + * Return: dividend / divisor rounded up + */ #define DIV64_U64_ROUND_UP(ll, d) \ ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) @@ -300,7 +314,7 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); #define DIV64_U64_ROUND_CLOSEST(dividend, divisor) \ ({ u64 _tmp = (divisor); div64_u64((dividend) + _tmp / 2, _tmp); }) -/* +/** * DIV_U64_ROUND_CLOSEST - unsigned 64bit divide with 32bit divisor rounded to nearest integer * @dividend: unsigned 64bit dividend * @divisor: unsigned 32bit divisor @@ -313,7 +327,7 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); #define DIV_U64_ROUND_CLOSEST(dividend, divisor) \ ({ u32 _tmp = (divisor); div_u64((u64)(dividend) + _tmp / 2, _tmp); }) -/* +/** * DIV_S64_ROUND_CLOSEST - signed 64bit divide with 32bit divisor rounded to nearest integer * @dividend: signed 64bit dividend * @divisor: signed 32bit divisor diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 2da63fd7b98f..97e64184767d 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -10,6 +10,12 @@ struct mb_cache; +/* Cache entry flags */ +enum { + MBE_REFERENCED_B = 0, + MBE_REUSABLE_B +}; + struct mb_cache_entry { /* List of entries in cache - protected by cache->c_list_lock */ struct list_head e_list; @@ -26,8 +32,7 @@ struct mb_cache_entry { atomic_t e_refcnt; /* Key in hash - stable during lifetime of the entry */ u32 e_key; - u32 e_referenced:1; - u32 e_reusable:1; + unsigned long e_flags; /* User provided value - stable during lifetime of the entry */ u64 e_value; }; diff --git a/include/linux/memregion.h b/include/linux/memregion.h index c04c4fd2e209..bf83363807ac 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -3,6 +3,7 @@ #define _MEMREGION_H_ #include <linux/types.h> #include <linux/errno.h> +#include <linux/bug.h> struct memregion_info { int target_node; @@ -20,4 +21,41 @@ static inline void memregion_free(int id) { } #endif + +/** + * cpu_cache_invalidate_memregion - drop any CPU cached data for + * memregions described by @res_desc + * @res_desc: one of the IORES_DESC_* types + * + * Perform cache maintenance after a memory event / operation that + * changes the contents of physical memory in a cache-incoherent manner. + * For example, device memory technologies like NVDIMM and CXL have + * device secure erase, and dynamic region provision that can replace + * the memory mapped to a given physical address. + * + * Limit the functionality to architectures that have an efficient way + * to writeback and invalidate potentially terabytes of address space at + * once. Note that this routine may or may not write back any dirty + * contents while performing the invalidation. It is only exported for + * the explicit usage of the NVDIMM and CXL modules in the 'DEVMEM' + * symbol namespace on bare platforms. + * + * Returns 0 on success or negative error code on a failure to perform + * the cache maintenance. + */ +#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION +int cpu_cache_invalidate_memregion(int res_desc); +bool cpu_cache_has_invalidate_memregion(void); +#else +static inline bool cpu_cache_has_invalidate_memregion(void) +{ + return false; +} + +static inline int cpu_cache_invalidate_memregion(int res_desc) +{ + WARN_ON_ONCE("CPU cache invalidation required"); + return -ENXIO; +} +#endif #endif /* _MEMREGION_H_ */ diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 5433c08fcc68..396df1121bff 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -37,6 +37,28 @@ __cmp(x, y, op), \ __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) +#define __clamp(val, lo, hi) \ + ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) + +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ + typeof(val) unique_val = (val); \ + typeof(lo) unique_lo = (lo); \ + typeof(hi) unique_hi = (hi); \ + __clamp(unique_val, unique_lo, unique_hi); }) + +#define __clamp_input_check(lo, hi) \ + (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ + __is_constexpr((lo) > (hi)), (lo) > (hi), false))) + +#define __careful_clamp(val, lo, hi) ({ \ + __clamp_input_check(lo, hi) + \ + __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) && \ + __typecheck(hi, lo) && __is_constexpr(val) && \ + __is_constexpr(lo) && __is_constexpr(hi), \ + __clamp(val, lo, hi), \ + __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ + __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) + /** * min - return minimum of two values of the same or compatible types * @x: first value @@ -86,7 +108,7 @@ * This macro does strict typechecking of @lo/@hi to make sure they are of the * same type as @val. See the unnecessary pointer comparisons. */ -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) +#define clamp(val, lo, hi) __careful_clamp(val, lo, hi) /* * ..and if you can't take the strict @@ -121,7 +143,7 @@ * This macro does no typechecking and uses temporary variables of type * @type to make all the comparisons. */ -#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) +#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi)) /** * clamp_val - return a value clamped to a given range using val's type diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a12929bc31b2..06cbad166225 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -970,7 +970,7 @@ void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode); struct mlx5_async_ctx { struct mlx5_core_dev *dev; atomic_t num_inflight; - struct wait_queue_head wait; + struct completion inflight_done; }; struct mlx5_async_work; @@ -981,6 +981,7 @@ struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ + u16 op_mod; /* cmd op_mod */ void *out; /* pointer to the cmd output buffer */ }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..974ccca609d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1852,6 +1852,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct folio *single_folio; /* Locked folio to be unmapped */ + bool even_cows; /* Zap COWed private pages too? */ + zap_flags_t zap_flags; /* Extra flags for zapping */ +}; + +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) +/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ +#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else @@ -1869,6 +1888,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details); void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -3467,12 +3488,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -/* - * Whether to drop the pte markers, for example, the uffd-wp information for - * file-backed memory. This should only be specified when we will completely - * drop the page in the mm, either by truncation or unmapping of the vma. By - * default, the flag is not set. - */ -#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) - #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9c50bc40f8ff..6f7993803ee7 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index f6e5369d2928..092c52aa6c2c 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -5,12 +5,10 @@ #include <linux/types.h> #include <linux/uidgid.h> +struct mnt_idmap; struct user_namespace; -/* - * Carries the initial idmapping of 0:0:4294967295 which is an identity - * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is - * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. - */ + +extern struct mnt_idmap nop_mnt_idmap; extern struct user_namespace init_user_ns; typedef struct { @@ -98,6 +96,26 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid); } +static inline bool vfsuid_gt_kuid(vfsuid_t vfsuid, kuid_t kuid) +{ + return __vfsuid_val(vfsuid) > __kuid_val(kuid); +} + +static inline bool vfsgid_gt_kgid(vfsgid_t vfsgid, kgid_t kgid) +{ + return __vfsgid_val(vfsgid) > __kgid_val(kgid); +} + +static inline bool vfsuid_lt_kuid(vfsuid_t vfsuid, kuid_t kuid) +{ + return __vfsuid_val(vfsuid) < __kuid_val(kuid); +} + +static inline bool vfsgid_lt_kgid(vfsgid_t vfsgid, kgid_t kgid) +{ + return __vfsgid_val(vfsgid) < __kgid_val(kgid); +} + /* * vfs{g,u}ids are created from k{g,u}ids. * We don't allow them to be created from regular {u,g}id. @@ -208,13 +226,6 @@ static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns, return VFSUIDT_INIT(make_kuid(mnt_userns, uid)); } -static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kuid_t kuid) -{ - return AS_KUIDT(make_vfsuid(mnt_userns, fs_userns, kuid)); -} - /** * make_vfsgid - map a filesystem kgid into a mnt_userns * @mnt_userns: the mount's idmapping @@ -253,13 +264,6 @@ static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns, return VFSGIDT_INIT(make_kgid(mnt_userns, gid)); } -static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kgid_t kgid) -{ - return AS_KGIDT(make_vfsgid(mnt_userns, fs_userns, kgid)); -} - /** * from_vfsuid - map a vfsuid into the filesystem idmapping * @mnt_userns: the mount's idmapping @@ -288,33 +292,6 @@ static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns, } /** - * mapped_kuid_user - map a user kuid into a mnt_userns - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @kuid : kuid to be mapped - * - * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this - * function when preparing a @kuid to be written to disk or inode. - * - * If no_idmapping() determines that this is not an idmapped mount we can - * simply return @kuid unchanged. - * If initial_idmapping() tells us that the filesystem is not mounted with an - * idmapping we know the value of @kuid won't change when calling - * make_kuid() so we can simply retrieve the value via KUIDT_INIT() - * directly. - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is - * returned. - */ -static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kuid_t kuid) -{ - return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid)); -} - -/** * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem * @mnt_userns: the mount's idmapping * @fs_userns: the filesystem's idmapping @@ -333,6 +310,12 @@ static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns, return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid)); } +static inline bool vfsuid_has_mapping(struct user_namespace *userns, + vfsuid_t vfsuid) +{ + return from_kuid(userns, AS_KUIDT(vfsuid)) != (uid_t)-1; +} + /** * vfsuid_into_kuid - convert vfsuid into kuid * @vfsuid: the vfsuid to convert @@ -374,33 +357,6 @@ static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns, } /** - * mapped_kgid_user - map a user kgid into a mnt_userns - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @kgid : kgid to be mapped - * - * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this - * function when preparing a @kgid to be written to disk or inode. - * - * If no_idmapping() determines that this is not an idmapped mount we can - * simply return @kgid unchanged. - * If initial_idmapping() tells us that the filesystem is not mounted with an - * idmapping we know the value of @kgid won't change when calling - * make_kgid() so we can simply retrieve the value via KGIDT_INIT() - * directly. - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is - * returned. - */ -static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kgid_t kgid) -{ - return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid)); -} - -/** * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem * @mnt_userns: the mount's idmapping * @fs_userns: the filesystem's idmapping @@ -419,6 +375,12 @@ static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns, return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid)); } +static inline bool vfsgid_has_mapping(struct user_namespace *userns, + vfsgid_t vfsgid) +{ + return from_kgid(userns, AS_KGIDT(vfsgid)) != (gid_t)-1; +} + /** * vfsgid_into_kgid - convert vfsgid into kgid * @vfsgid: the vfsgid to convert diff --git a/include/linux/mount.h b/include/linux/mount.h index 55a4abaf6715..62475996fac6 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -16,6 +16,7 @@ struct super_block; struct dentry; struct user_namespace; +struct mnt_idmap; struct file_system_type; struct fs_context; struct file; @@ -70,13 +71,15 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; - struct user_namespace *mnt_userns; + struct mnt_idmap *mnt_idmap; } __randomize_layout; -static inline struct user_namespace *mnt_user_ns(const struct vfsmount *mnt) +struct user_namespace *mnt_user_ns(const struct vfsmount *mnt); +struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap); +static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt) { /* Pairs with smp_store_release() in do_idmap_mount(). */ - return smp_load_acquire(&mnt->mnt_userns); + return smp_load_acquire(&mnt->mnt_idmap); } extern int mnt_want_write(struct vfsmount *mnt); diff --git a/include/linux/msi.h b/include/linux/msi.h index fc918a658d48..a112b913fff9 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -13,13 +13,20 @@ * * Regular device drivers have no business with any of these functions and * especially storing MSI descriptor pointers in random code is considered - * abuse. The only function which is relevant for drivers is msi_get_virq(). + * abuse. + * + * Device driver relevant functions are available in <linux/msi_api.h> */ +#include <linux/irqdomain_defs.h> #include <linux/cpumask.h> +#include <linux/msi_api.h> #include <linux/xarray.h> #include <linux/mutex.h> #include <linux/list.h> +#include <linux/irq.h> +#include <linux/bits.h> + #include <asm/msi.h> /* Dummy shadow structures if an architecture does not define them */ @@ -68,19 +75,18 @@ struct msi_msg { extern int pci_msi_ignore_mask; /* Helper functions */ -struct irq_data; struct msi_desc; struct pci_dev; struct platform_msi_priv_data; struct device_attribute; +struct irq_domain; +struct irq_affinity_desc; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); #else -static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) -{ -} +static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) { } #endif typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, @@ -120,6 +126,38 @@ struct pci_msi_desc { }; }; +/** + * union msi_domain_cookie - Opaque MSI domain specific data + * @value: u64 value store + * @ptr: Pointer to domain specific data + * @iobase: Domain specific IOmem pointer + * + * The content of this data is implementation defined and used by the MSI + * domain to store domain specific information which is requried for + * interrupt chip callbacks. + */ +union msi_domain_cookie { + u64 value; + void *ptr; + void __iomem *iobase; +}; + +/** + * struct msi_desc_data - Generic MSI descriptor data + * @dcookie: Cookie for MSI domain specific data which is required + * for irq_chip callbacks + * @icookie: Cookie for the MSI interrupt instance provided by + * the usage site to the allocation function + * + * The content of this data is implementation defined, e.g. PCI/IMS + * implementations define the meaning of the data. The MSI core ignores + * this data completely. + */ +struct msi_desc_data { + union msi_domain_cookie dcookie; + union msi_instance_cookie icookie; +}; + #define MSI_MAX_INDEX ((unsigned int)USHRT_MAX) /** @@ -137,6 +175,7 @@ struct pci_msi_desc { * * @msi_index: Index of the msi descriptor * @pci: PCI specific msi descriptor data + * @data: Generic MSI descriptor data */ struct msi_desc { /* Shared device/bus type independent data */ @@ -156,7 +195,10 @@ struct msi_desc { void *write_msi_msg_data; u16 msi_index; - struct pci_msi_desc pci; + union { + struct pci_msi_desc pci; + struct msi_desc_data data; + }; }; /* @@ -171,33 +213,80 @@ enum msi_desc_filter { MSI_DESC_ASSOCIATED, }; + +/** + * struct msi_dev_domain - The internals of MSI domain info per device + * @store: Xarray for storing MSI descriptor pointers + * @irqdomain: Pointer to a per device interrupt domain + */ +struct msi_dev_domain { + struct xarray store; + struct irq_domain *domain; +}; + /** * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers * @platform_data: Platform-MSI specific data * @mutex: Mutex protecting the MSI descriptor store - * @__store: Xarray for storing MSI descriptor pointers + * @__domains: Internal data for per device MSI domains * @__iter_idx: Index to search the next entry for iterators */ struct msi_device_data { unsigned long properties; struct platform_msi_priv_data *platform_data; struct mutex mutex; - struct xarray __store; + struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS]; unsigned long __iter_idx; }; int msi_setup_device_data(struct device *dev); -unsigned int msi_get_virq(struct device *dev, unsigned int index); void msi_lock_descs(struct device *dev); void msi_unlock_descs(struct device *dev); -struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter); -struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); +struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter); /** - * msi_for_each_desc - Iterate the MSI descriptors + * msi_first_desc - Get the first MSI descriptor of the default irqdomain + * @dev: Device to operate on + * @filter: Descriptor state filter + * + * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs() + * must be invoked before the call. + * + * Return: Pointer to the first MSI descriptor matching the search + * criteria, NULL if none found. + */ +static inline struct msi_desc *msi_first_desc(struct device *dev, + enum msi_desc_filter filter) +{ + return msi_domain_first_desc(dev, MSI_DEFAULT_DOMAIN, filter); +} + +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter); + +/** + * msi_domain_for_each_desc - Iterate the MSI descriptors in a specific domain + * + * @desc: struct msi_desc pointer used as iterator + * @dev: struct device pointer - device to iterate + * @domid: The id of the interrupt domain which should be walked. + * @filter: Filter for descriptor selection + * + * Notes: + * - The loop must be protected with a msi_lock_descs()/msi_unlock_descs() + * pair. + * - It is safe to remove a retrieved MSI descriptor in the loop. + */ +#define msi_domain_for_each_desc(desc, dev, domid, filter) \ + for ((desc) = msi_domain_first_desc((dev), (domid), (filter)); (desc); \ + (desc) = msi_next_desc((dev), (domid), (filter))) + +/** + * msi_for_each_desc - Iterate the MSI descriptors in the default irqdomain * * @desc: struct msi_desc pointer used as iterator * @dev: struct device pointer - device to iterate @@ -208,9 +297,8 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); * pair. * - It is safe to remove a retrieved MSI descriptor in the loop. */ -#define msi_for_each_desc(desc, dev, filter) \ - for ((desc) = msi_first_desc((dev), (filter)); (desc); \ - (desc) = msi_next_desc((dev), (filter))) +#define msi_for_each_desc(desc, dev, filter) \ + msi_domain_for_each_desc((desc), (dev), MSI_DEFAULT_DOMAIN, (filter)) #define msi_desc_to_dev(desc) ((desc)->dev) @@ -237,34 +325,47 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, } #endif -#ifdef CONFIG_PCI_MSI -struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); -void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); -#else /* CONFIG_PCI_MSI */ -static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) +int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, + struct msi_desc *init_desc); +/** + * msi_insert_msi_desc - Allocate and initialize a MSI descriptor in the + * default irqdomain and insert it at @init_desc->msi_index + * @dev: Pointer to the device for which the descriptor is allocated + * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor + * + * Return: 0 on success or an appropriate failure code. + */ +static inline int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc) { + return msi_domain_insert_msi_desc(dev, MSI_DEFAULT_DOMAIN, init_desc); } -#endif /* CONFIG_PCI_MSI */ -int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc); -void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, - unsigned int first_index, unsigned int last_index); +void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); + +/** + * msi_free_msi_descs_range - Free a range of MSI descriptors of a device + * in the default irqdomain + * + * @dev: Device for which to free the descriptors + * @first: Index to start freeing from (inclusive) + * @last: Last index to be freed (inclusive) + */ +static inline void msi_free_msi_descs_range(struct device *dev, unsigned int first, + unsigned int last) +{ + msi_domain_free_msi_descs_range(dev, MSI_DEFAULT_DOMAIN, first, last); +} /** - * msi_free_msi_descs - Free MSI descriptors of a device + * msi_free_msi_descs - Free all MSI descriptors of a device in the default irqdomain * @dev: Device to free the descriptors */ static inline void msi_free_msi_descs(struct device *dev) { - msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX); + msi_free_msi_descs_range(dev, 0, MSI_MAX_INDEX); } -void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); -void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); - -void pci_msi_mask_irq(struct irq_data *data); -void pci_msi_unmask_irq(struct irq_data *data); - /* * The arch hooks to setup up msi irqs. Default functions are implemented * as weak symbols so that they /can/ be overriden by architecture specific @@ -293,7 +394,7 @@ static inline void msi_device_destroy_sysfs(struct device *dev) { } */ bool arch_restore_msi_irqs(struct pci_dev *dev); -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ #include <linux/irqhandler.h> @@ -309,19 +410,22 @@ struct msi_domain_info; * @get_hwirq: Retrieve the resulting hw irq number * @msi_init: Domain specific init function for MSI interrupts * @msi_free: Domain specific function to free a MSI interrupts - * @msi_check: Callback for verification of the domain/info/dev data * @msi_prepare: Prepare the allocation of the interrupts in the domain + * @prepare_desc: Optional function to prepare the allocated MSI descriptor + * in the domain * @set_desc: Set the msi descriptor for an interrupt * @domain_alloc_irqs: Optional function to override the default allocation * function. * @domain_free_irqs: Optional function to override the default free * function. + * @msi_post_free: Optional function which is invoked after freeing + * all interrupts. * * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. * - * @msi_check, @msi_prepare and @set_desc are callbacks used by - * msi_domain_alloc/free_irqs(). + * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the + * msi_domain_alloc/free_irqs*() variants. * * @domain_alloc_irqs, @domain_free_irqs can be used to override the * default allocation/free functions (__msi_domain_alloc/free_irqs). This @@ -329,15 +433,6 @@ struct msi_domain_info; * be wrapped into the regular irq domains concepts by mere mortals. This * allows to universally use msi_domain_alloc/free_irqs without having to * special case XEN all over the place. - * - * Contrary to other operations @domain_alloc_irqs and @domain_free_irqs - * are set to the default implementation if NULL and even when - * MSI_FLAG_USE_DEF_DOM_OPS is not set to avoid breaking existing users and - * because these callbacks are obviously mandatory. - * - * This is NOT meant to be abused, but it can be useful to build wrappers - * for specialized MSI irq domains which need extra work before and after - * calling __msi_domain_alloc_irqs()/__msi_domain_free_irqs(). */ struct msi_domain_ops { irq_hw_number_t (*get_hwirq)(struct msi_domain_info *info, @@ -349,23 +444,29 @@ struct msi_domain_ops { void (*msi_free)(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq); - int (*msi_check)(struct irq_domain *domain, - struct msi_domain_info *info, - struct device *dev); int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); + void (*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg, + struct msi_desc *desc); void (*set_desc)(msi_alloc_info_t *arg, struct msi_desc *desc); int (*domain_alloc_irqs)(struct irq_domain *domain, struct device *dev, int nvec); void (*domain_free_irqs)(struct irq_domain *domain, struct device *dev); + void (*msi_post_free)(struct irq_domain *domain, + struct device *dev); }; /** * struct msi_domain_info - MSI interrupt domain data * @flags: Flags to decribe features and capabilities + * @bus_token: The domain bus token + * @hwsize: The hardware table size or the software index limit. + * If 0 then the size is considered unlimited and + * gets initialized to the maximum software index limit + * by the domain creation code. * @ops: The callback data structure * @chip: Optional: associated interrupt chip * @chip_data: Optional: associated interrupt chip data @@ -375,17 +476,42 @@ struct msi_domain_ops { * @data: Optional: domain specific data */ struct msi_domain_info { - u32 flags; - struct msi_domain_ops *ops; - struct irq_chip *chip; - void *chip_data; - irq_flow_handler_t handler; - void *handler_data; - const char *handler_name; - void *data; + u32 flags; + enum irq_domain_bus_token bus_token; + unsigned int hwsize; + struct msi_domain_ops *ops; + struct irq_chip *chip; + void *chip_data; + irq_flow_handler_t handler; + void *handler_data; + const char *handler_name; + void *data; }; -/* Flags for msi_domain_info */ +/** + * struct msi_domain_template - Template for MSI device domains + * @name: Storage for the resulting name. Filled in by the core. + * @chip: Interrupt chip for this domain + * @ops: MSI domain ops + * @info: MSI domain info data + */ +struct msi_domain_template { + char name[48]; + struct irq_chip chip; + struct msi_domain_ops ops; + struct msi_domain_info info; +}; + +/* + * Flags for msi_domain_info + * + * Bit 0-15: Generic MSI functionality which is not subject to restriction + * by parent domains + * + * Bit 16-31: Functionality which depends on the underlying parent domain and + * can be masked out by msi_parent_ops::init_dev_msi_info() when + * a device MSI domain is initialized. + */ enum { /* * Init non implemented ops callbacks with default MSI domain @@ -397,44 +523,100 @@ enum { * callbacks. */ MSI_FLAG_USE_DEF_CHIP_OPS = (1 << 1), - /* Support multiple PCI MSI interrupts */ - MSI_FLAG_MULTI_PCI_MSI = (1 << 2), - /* Support PCI MSIX interrupts */ - MSI_FLAG_PCI_MSIX = (1 << 3), /* Needs early activate, required for PCI */ - MSI_FLAG_ACTIVATE_EARLY = (1 << 4), + MSI_FLAG_ACTIVATE_EARLY = (1 << 2), /* * Must reactivate when irq is started even when * MSI_FLAG_ACTIVATE_EARLY has been set. */ - MSI_FLAG_MUST_REACTIVATE = (1 << 5), - /* Is level-triggered capable, using two messages */ - MSI_FLAG_LEVEL_CAPABLE = (1 << 6), + MSI_FLAG_MUST_REACTIVATE = (1 << 3), /* Populate sysfs on alloc() and destroy it on free() */ - MSI_FLAG_DEV_SYSFS = (1 << 7), - /* MSI-X entries must be contiguous */ - MSI_FLAG_MSIX_CONTIGUOUS = (1 << 8), + MSI_FLAG_DEV_SYSFS = (1 << 4), /* Allocate simple MSI descriptors */ - MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 9), + MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ - MSI_FLAG_FREE_MSI_DESCS = (1 << 10), + MSI_FLAG_FREE_MSI_DESCS = (1 << 6), + /* + * Quirk to handle MSI implementations which do not provide + * masking. Currently known to affect x86, but has to be partially + * handled in the core MSI code. + */ + MSI_FLAG_NOMASK_QUIRK = (1 << 7), + + /* Mask for the generic functionality */ + MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), + + /* Mask for the domain specific functionality */ + MSI_DOMAIN_FLAGS_MASK = GENMASK(31, 16), + + /* Support multiple PCI MSI interrupts */ + MSI_FLAG_MULTI_PCI_MSI = (1 << 16), + /* Support PCI MSIX interrupts */ + MSI_FLAG_PCI_MSIX = (1 << 17), + /* Is level-triggered capable, using two messages */ + MSI_FLAG_LEVEL_CAPABLE = (1 << 18), + /* MSI-X entries must be contiguous */ + MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19), + /* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */ + MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), + /* Support for PCI/IMS */ + MSI_FLAG_PCI_IMS = (1 << 21), }; +/** + * struct msi_parent_ops - MSI parent domain callbacks and configuration info + * + * @supported_flags: Required: The supported MSI flags of the parent domain + * @prefix: Optional: Prefix for the domain and chip name + * @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent + * domain specific domain flags, domain ops and interrupt chip + * callbacks when a per device domain is created. + */ +struct msi_parent_ops { + u32 supported_flags; + const char *prefix; + bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info); +}; + +bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info); + int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force); struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec); -int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, - int nvec); -int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec); -void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); -void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev); -void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); + +bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, + const struct msi_domain_template *template, + unsigned int hwsize, void *domain_data, + void *chip_data); +void msi_remove_device_irq_domain(struct device *dev, unsigned int domid); + +bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, + enum irq_domain_bus_token bus_token); + +int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs); + +struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *cookie); + +void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid); +void msi_domain_free_irqs_all(struct device *dev, unsigned int domid); + struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, @@ -467,20 +649,27 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); -#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ +#endif /* CONFIG_GENERIC_MSI_IRQ */ -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +/* PCI specific interfaces */ +#ifdef CONFIG_PCI_MSI +struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); +void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); +void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +void pci_msi_mask_irq(struct irq_data *data); +void pci_msi_unmask_irq(struct irq_data *data); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); -bool pci_dev_has_special_msi_domain(struct pci_dev *pdev); -#else +#else /* CONFIG_PCI_MSI */ static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { return NULL; } -#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ +static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) { } +#endif /* !CONFIG_PCI_MSI */ #endif /* LINUX_MSI_H */ diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h new file mode 100644 index 000000000000..391087ad99b1 --- /dev/null +++ b/include/linux/msi_api.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_MSI_API_H +#define LINUX_MSI_API_H + +/* + * APIs which are relevant for device driver code for allocating and + * freeing MSI interrupts and querying the associations between + * hardware/software MSI indices and the Linux interrupt number. + */ + +struct device; + +/* + * Per device interrupt domain related constants. + */ +enum msi_domain_ids { + MSI_DEFAULT_DOMAIN, + MSI_SECONDARY_DOMAIN, + MSI_MAX_DEVICE_IRQDOMAINS, +}; + +/** + * union msi_instance_cookie - MSI instance cookie + * @value: u64 value store + * @ptr: Pointer to usage site specific data + * + * This cookie is handed to the IMS allocation function and stored in the + * MSI descriptor for the interrupt chip callbacks. + * + * The content of this cookie is MSI domain implementation defined. For + * PCI/IMS implementations this could be a PASID or a pointer to queue + * memory. + */ +union msi_instance_cookie { + u64 value; + void *ptr; +}; + +/** + * msi_map - Mapping between MSI index and Linux interrupt number + * @index: The MSI index, e.g. slot in the MSI-X table or + * a software managed index if >= 0. If negative + * the allocation function failed and it contains + * the error code. + * @virq: The associated Linux interrupt number + */ +struct msi_map { + int index; + int virq; +}; + +/* + * Constant to be used for dynamic allocations when the allocation is any + * free MSI index, which is either an entry in a hardware table or a + * software managed index. + */ +#define MSI_ANY_INDEX UINT_MAX + +unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index); + +/** + * msi_get_virq - Lookup the Linux interrupt number for a MSI index on the default interrupt domain + * @dev: Device for which the lookup happens + * @index: The MSI index to lookup + * + * Return: The Linux interrupt number on success (> 0), 0 if not found + */ +static inline unsigned int msi_get_virq(struct device *dev, unsigned int index) +{ + return msi_domain_get_virq(dev, MSI_DEFAULT_DOMAIN, index); +} + +#endif diff --git a/include/linux/net.h b/include/linux/net.h index 711c3593c3b8..18d942bbdf6e 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -41,6 +41,7 @@ struct net; #define SOCK_NOSPACE 2 #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 +#define SOCK_SUPPORT_ZC 5 #ifndef ARCH_HAS_SOCKET_TYPES /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a36edb0ec199..eddf8ee270e7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3663,8 +3663,9 @@ static inline bool netif_attr_test_online(unsigned long j, static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) { - /* n is a prior cpu */ - cpu_max_bits_warn(n + 1, nr_bits); + /* -1 is a legal arg here. */ + if (n != -1) + cpu_max_bits_warn(n, nr_bits); if (srcp) return find_next_bit(srcp, nr_bits, n + 1); @@ -3685,8 +3686,9 @@ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, unsigned int nr_bits) { - /* n is a prior cpu */ - cpu_max_bits_warn(n + 1, nr_bits); + /* -1 is a legal arg here. */ + if (n != -1) + cpu_max_bits_warn(n, nr_bits); if (src1p && src2p) return find_next_and_bit(src1p, src2p, nr_bits, n + 1); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index f2402ddeafbf..4c76ddfb6a67 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -267,6 +267,14 @@ struct netfs_cache_ops { loff_t *_start, size_t *_len, loff_t i_size, bool no_space_allocated_yet); + /* Prepare an on-demand read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ + enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres, + loff_t start, size_t *_len, + loff_t i_size, + unsigned long *_flags, ino_t ino); + /* Query the occupancy of the cache in a region, returning where the * next chunk of data starts and how long it is. */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8d04b6a5964c..730003c4f4af 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -732,4 +732,17 @@ enum nfs4_setxattr_options { SETXATTR4_CREATE = 1, SETXATTR4_REPLACE = 2, }; + +enum { + RCA4_TYPE_MASK_RDATA_DLG = 0, + RCA4_TYPE_MASK_WDATA_DLG = 1, + RCA4_TYPE_MASK_DIR_DLG = 2, + RCA4_TYPE_MASK_FILE_LAYOUT = 3, + RCA4_TYPE_MASK_BLK_LAYOUT = 4, + RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, + RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, + RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, + RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, +}; + #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7931fa472561..d92fdfd2444c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -59,6 +59,7 @@ struct nfs_access_entry { kuid_t fsuid; kgid_t fsgid; struct group_info *group_info; + u64 timestamp; __u32 mask; struct rcu_head rcu_head; }; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index efef68c9352a..bb0ee80526b2 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -516,7 +516,7 @@ static inline int node_random(const nodemask_t *maskp) bit = first_node(*maskp); break; default: - bit = find_nth_bit(maskp->bits, MAX_NUMNODES, prandom_u32_max(w)); + bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_u32_below(w)); break; } return bit; diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index cdb171efc7cb..fee881cded01 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -94,6 +94,7 @@ static inline struct cred *nsset_cred(struct nsset *set) int copy_namespaces(unsigned long flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); +int exec_task_namespaces(void); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct cred *, struct fs_struct *); diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 19dfdd74835e..1d3be1a2204c 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -51,8 +51,8 @@ static inline bool __must_check __must_check_overflow(bool overflow) return unlikely(overflow); } -/** check_add_overflow() - Calculate addition with overflow checking - * +/** + * check_add_overflow() - Calculate addition with overflow checking * @a: first addend * @b: second addend * @d: pointer to store sum @@ -66,8 +66,8 @@ static inline bool __must_check __must_check_overflow(bool overflow) #define check_add_overflow(a, b, d) \ __must_check_overflow(__builtin_add_overflow(a, b, d)) -/** check_sub_overflow() - Calculate subtraction with overflow checking - * +/** + * check_sub_overflow() - Calculate subtraction with overflow checking * @a: minuend; value to subtract from * @b: subtrahend; value to subtract from @a * @d: pointer to store difference @@ -81,8 +81,8 @@ static inline bool __must_check __must_check_overflow(bool overflow) #define check_sub_overflow(a, b, d) \ __must_check_overflow(__builtin_sub_overflow(a, b, d)) -/** check_mul_overflow() - Calculate multiplication with overflow checking - * +/** + * check_mul_overflow() - Calculate multiplication with overflow checking * @a: first factor * @b: second factor * @d: pointer to store product @@ -96,23 +96,24 @@ static inline bool __must_check __must_check_overflow(bool overflow) #define check_mul_overflow(a, b, d) \ __must_check_overflow(__builtin_mul_overflow(a, b, d)) -/** check_shl_overflow() - Calculate a left-shifted value and check overflow - * +/** + * check_shl_overflow() - Calculate a left-shifted value and check overflow * @a: Value to be shifted * @s: How many bits left to shift * @d: Pointer to where to store the result * * Computes *@d = (@a << @s) * - * Returns true if '*d' cannot hold the result or when 'a << s' doesn't + * Returns true if '*@d' cannot hold the result or when '@a << @s' doesn't * make sense. Example conditions: - * - 'a << s' causes bits to be lost when stored in *d. - * - 's' is garbage (e.g. negative) or so large that the result of - * 'a << s' is guaranteed to be 0. - * - 'a' is negative. - * - 'a << s' sets the sign bit, if any, in '*d'. * - * '*d' will hold the results of the attempted shift, but is not + * - '@a << @s' causes bits to be lost when stored in *@d. + * - '@s' is garbage (e.g. negative) or so large that the result of + * '@a << @s' is guaranteed to be 0. + * - '@a' is negative. + * - '@a << @s' sets the sign bit, if any, in '*@d'. + * + * '*@d' will hold the results of the attempted shift, but is not * considered "safe for use" if true is returned. */ #define check_shl_overflow(a, s, d) __must_check_overflow(({ \ @@ -129,7 +130,6 @@ static inline bool __must_check __must_check_overflow(bool overflow) /** * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX - * * @factor1: first factor * @factor2: second factor * @@ -149,7 +149,6 @@ static inline size_t __must_check size_mul(size_t factor1, size_t factor2) /** * size_add() - Calculate size_t addition with saturation at SIZE_MAX - * * @addend1: first addend * @addend2: second addend * @@ -169,7 +168,6 @@ static inline size_t __must_check size_add(size_t addend1, size_t addend2) /** * size_sub() - Calculate size_t subtraction with saturation at SIZE_MAX - * * @minuend: value to subtract from * @subtrahend: value to subtract from @minuend * @@ -192,7 +190,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) /** * array_size() - Calculate size of 2-dimensional array. - * * @a: dimension one * @b: dimension two * @@ -205,7 +202,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) /** * array3_size() - Calculate size of 3-dimensional array. - * * @a: dimension one * @b: dimension two * @c: dimension three @@ -220,7 +216,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) /** * flex_array_size() - Calculate size of a flexible array member * within an enclosing structure. - * * @p: Pointer to the structure. * @member: Name of the flexible array member. * @count: Number of elements in the array. @@ -237,7 +232,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) /** * struct_size() - Calculate size of structure with trailing flexible array. - * * @p: Pointer to the structure. * @member: Name of the array member. * @count: Number of elements in the array. diff --git a/include/linux/pci.h b/include/linux/pci.h index 2bda4a4e47e8..c0d939f3169c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -38,6 +38,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/resource_ext.h> +#include <linux/msi_api.h> #include <uapi/linux/pci.h> #include <linux/pci_ids.h> @@ -409,6 +410,7 @@ struct pci_dev { */ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + struct resource driver_exclusive_resource; /* driver exclusive resource ranges */ bool match_driver; /* Skip attaching driver */ @@ -843,6 +845,9 @@ struct pci_error_handlers { /* Device driver may resume normal operations */ void (*resume)(struct pci_dev *dev); + + /* Allow device driver to record more details of a correctable error */ + void (*cor_error_detected)(struct pci_dev *dev); }; @@ -1407,6 +1412,21 @@ int pci_request_selected_regions(struct pci_dev *, int, const char *); int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); +static inline __must_check struct resource * +pci_request_config_region_exclusive(struct pci_dev *pdev, unsigned int offset, + unsigned int len, const char *name) +{ + return __request_region(&pdev->driver_exclusive_resource, offset, len, + name, IORESOURCE_EXCLUSIVE); +} + +static inline void pci_release_config_region(struct pci_dev *pdev, + unsigned int offset, + unsigned int len) +{ + __release_region(&pdev->driver_exclusive_resource, offset, len); +} + /* drivers/pci/bus.c */ void pci_add_resource(struct list_head *resources, struct resource *res); void pci_add_resource_offset(struct list_head *resources, struct resource *res, @@ -1553,10 +1573,17 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, return rc; return 0; } +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags); int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, struct irq_affinity *affd); +bool pci_msix_can_alloc_dyn(struct pci_dev *dev); +struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index, + const struct irq_affinity_desc *affdesc); +void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map); + void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); @@ -1586,6 +1613,13 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, return 1; return -ENOSPC; } +static inline int +pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags) +{ + return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, + flags, NULL); +} static inline void pci_free_irq_vectors(struct pci_dev *dev) { @@ -1898,15 +1932,13 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, { return -ENOSPC; } -#endif /* CONFIG_PCI */ - static inline int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags) { - return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags, - NULL); + return -ENOSPC; } +#endif /* CONFIG_PCI */ /* Include architecture-dependent settings and functions */ @@ -2474,6 +2506,14 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif +struct msi_domain_template; + +bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, + unsigned int hwsize, void *data); +struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc); +void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); + #include <linux/dma-mapping.h> #define pci_printk(level, pdev, fmt, arg...) \ @@ -2484,6 +2524,7 @@ void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg) #define pci_err(pdev, fmt, arg...) dev_err(&(pdev)->dev, fmt, ##arg) #define pci_warn(pdev, fmt, arg...) dev_warn(&(pdev)->dev, fmt, ##arg) +#define pci_warn_once(pdev, fmt, arg...) dev_warn_once(&(pdev)->dev, fmt, ##arg) #define pci_notice(pdev, fmt, arg...) dev_notice(&(pdev)->dev, fmt, ##arg) #define pci_info(pdev, fmt, arg...) dev_info(&(pdev)->dev, fmt, ##arg) #define pci_dbg(pdev, fmt, arg...) dev_dbg(&(pdev)->dev, fmt, ##arg) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f1ec5ad1351c..1338ea2aa720 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -37,12 +37,11 @@ /* * Percpu allocator can serve percpu allocations before slab is * initialized which allows slab to depend on the percpu allocator. - * The following two parameters decide how much resource to - * preallocate for this. Keep PERCPU_DYNAMIC_RESERVE equal to or - * larger than PERCPU_DYNAMIC_EARLY_SIZE. + * The following parameter decide how much resource to preallocate + * for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than + * PERCPU_DYNAMIC_EARLY_SIZE. */ -#define PERCPU_DYNAMIC_EARLY_SLOTS 128 -#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10) +#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 0356cb6a215d..ef914a600087 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -100,7 +100,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); - int (*filter_match)(struct perf_event *event); + bool (*filter)(struct pmu *pmu, int cpu); int num_events; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 @@ -174,7 +174,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu); /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); -struct arm_pmu *armpmu_alloc_atomic(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); int armpmu_request_irq(int irq, int cpu); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 853f64b6c8c2..c6a3bac76966 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -266,6 +266,7 @@ struct hw_perf_event { }; struct perf_event; +struct perf_event_pmu_context; /* * Common implementation detail of pmu::{start,commit,cancel}_txn @@ -308,7 +309,7 @@ struct pmu { int capabilities; int __percpu *pmu_disable_count; - struct perf_cpu_context __percpu *pmu_cpu_context; + struct perf_cpu_pmu_context __percpu *cpu_pmu_context; atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; @@ -443,7 +444,7 @@ struct pmu { /* * context-switches callback */ - void (*sched_task) (struct perf_event_context *ctx, + void (*sched_task) (struct perf_event_pmu_context *pmu_ctx, bool sched_in); /* @@ -457,8 +458,8 @@ struct pmu { * implementation and Perf core context switch handling callbacks for usage * examples. */ - void (*swap_task_ctx) (struct perf_event_context *prev, - struct perf_event_context *next); + void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc); /* optional */ /* @@ -522,9 +523,10 @@ struct pmu { /* optional */ /* - * Filter events for PMU-specific reasons. + * Skip programming this PMU on the given CPU. Typically needed for + * big.LITTLE things. */ - int (*filter_match) (struct perf_event *event); /* optional */ + bool (*filter) (struct pmu *pmu, int cpu); /* optional */ /* * Check period value for PERF_EVENT_IOC_PERIOD ioctl. @@ -695,6 +697,11 @@ struct perf_event { int group_caps; struct perf_event *group_leader; + /* + * event->pmu will always point to pmu in which this event belongs. + * Whereas event->pmu_ctx->pmu may point to other pmu when group of + * different pmu events is created. + */ struct pmu *pmu; void *pmu_private; @@ -720,6 +727,12 @@ struct perf_event { struct hw_perf_event hw; struct perf_event_context *ctx; + /* + * event->pmu_ctx points to perf_event_pmu_context in which the event + * is added. This pmu_ctx can be of other pmu for sw event when that + * sw event is part of a group which also contains non-sw events. + */ + struct perf_event_pmu_context *pmu_ctx; atomic_long_t refcount; /* @@ -756,11 +769,14 @@ struct perf_event { struct fasync_struct *fasync; /* delayed work for NMIs and such */ - int pending_wakeup; - int pending_kill; - int pending_disable; + unsigned int pending_wakeup; + unsigned int pending_kill; + unsigned int pending_disable; + unsigned int pending_sigtrap; unsigned long pending_addr; /* SIGTRAP */ - struct irq_work pending; + struct irq_work pending_irq; + struct callback_head pending_task; + unsigned int pending_work; atomic_t event_limit; @@ -809,19 +825,69 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; +/* + * ,-----------------------[1:n]----------------------. + * V V + * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event + * ^ ^ | | + * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-' + * + * + * struct perf_event_pmu_context lifetime is refcount based and RCU freed + * (similar to perf_event_context). Locking is as if it were a member of + * perf_event_context; specifically: + * + * modification, both: ctx->mutex && ctx->lock + * reading, either: ctx->mutex || ctx->lock + * + * There is one exception to this; namely put_pmu_ctx() isn't always called + * with ctx->mutex held; this means that as long as we can guarantee the epc + * has events the above rules hold. + * + * Specificially, sys_perf_event_open()'s group_leader case depends on + * ctx->mutex pinning the configuration. Since we hold a reference on + * group_leader (through the filedesc) it can't go away, therefore it's + * associated pmu_ctx must exist and cannot change due to ctx->mutex. + */ +struct perf_event_pmu_context { + struct pmu *pmu; + struct perf_event_context *ctx; + + struct list_head pmu_ctx_entry; + + struct list_head pinned_active; + struct list_head flexible_active; + + /* Used to avoid freeing per-cpu perf_event_pmu_context */ + unsigned int embedded : 1; + + unsigned int nr_events; + + atomic_t refcount; /* event <-> epc */ + struct rcu_head rcu_head; + + void *task_ctx_data; /* pmu specific data */ + /* + * Set when one or more (plausibly active) event can't be scheduled + * due to pmu overcommit or pmu constraints, except tolerant to + * events not necessary to be active due to scheduling constraints, + * such as cgroups. + */ + int rotate_necessary; +}; struct perf_event_groups { struct rb_root tree; u64 index; }; + /** * struct perf_event_context - event context structure * * Used as a container for task events and CPU events as well: */ struct perf_event_context { - struct pmu *pmu; /* * Protect the states of the events in the list, * nr_active, and the list: @@ -834,27 +900,21 @@ struct perf_event_context { */ struct mutex mutex; - struct list_head active_ctx_list; + struct list_head pmu_ctx_list; struct perf_event_groups pinned_groups; struct perf_event_groups flexible_groups; struct list_head event_list; - struct list_head pinned_active; - struct list_head flexible_active; - int nr_events; - int nr_active; int nr_user; int is_active; + + int nr_task_data; int nr_stat; int nr_freq; int rotate_disable; - /* - * Set when nr_events != nr_active, except tolerant to events not - * necessary to be active due to scheduling constraints, such as cgroups. - */ - int rotate_necessary; - refcount_t refcount; + + refcount_t refcount; /* event <-> ctx */ struct task_struct *task; /* @@ -875,8 +935,15 @@ struct perf_event_context { #ifdef CONFIG_CGROUP_PERF int nr_cgroups; /* cgroup evts */ #endif - void *task_ctx_data; /* pmu specific data */ struct rcu_head rcu_head; + + /* + * Sum (event->pending_sigtrap + event->pending_work) + * + * The SIGTRAP is targeted at ctx->task, as such it won't do changing + * that until the signal is delivered. + */ + local_t nr_pending; }; /* @@ -885,12 +952,13 @@ struct perf_event_context { */ #define PERF_NR_CONTEXTS 4 -/** - * struct perf_cpu_context - per cpu event context structure - */ -struct perf_cpu_context { - struct perf_event_context ctx; - struct perf_event_context *task_ctx; +struct perf_cpu_pmu_context { + struct perf_event_pmu_context epc; + struct perf_event_pmu_context *task_epc; + + struct list_head sched_cb_entry; + int sched_cb_usage; + int active_oncpu; int exclusive; @@ -898,16 +966,20 @@ struct perf_cpu_context { struct hrtimer hrtimer; ktime_t hrtimer_interval; unsigned int hrtimer_active; +}; + +/** + * struct perf_event_cpu_context - per cpu event context structure + */ +struct perf_cpu_context { + struct perf_event_context ctx; + struct perf_event_context *task_ctx; + int online; #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; - struct list_head cgrp_cpuctx_entry; #endif - struct list_head sched_cb_entry; - int sched_cb_usage; - - int online; /* * Per-CPU storage for iterators used in visit_groups_merge. The default * storage is of size 2 to hold the CPU and any CPU event iterators. @@ -971,6 +1043,8 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) #ifdef CONFIG_PERF_EVENTS +extern struct perf_event_context *perf_cpu_task_ctx(void); + extern void *perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event); extern void perf_aux_output_end(struct perf_output_handle *handle, @@ -1176,7 +1250,7 @@ static inline int is_software_event(struct perf_event *event) */ static inline int in_software_context(struct perf_event *event) { - return event->ctx->pmu->task_ctx_nr == perf_sw_context; + return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context; } static inline int is_exclusive_pmu(struct pmu *pmu) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..5f0d7d0b9471 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 664dd409feb9..3f01ac8017e0 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -122,6 +122,7 @@ enum phylink_op_type { * (See commit 7cceb599d15d ("net: phylink: avoid mac_config calls") * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. + * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM. * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. @@ -134,6 +135,7 @@ struct phylink_config { enum phylink_op_type type; bool legacy_pre_march2020; bool poll_fixed_state; + bool mac_managed_pm; bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h index c9cc4e32435d..dcca6c5e23bb 100644 --- a/include/linux/platform_data/gpmc-omap.h +++ b/include/linux/platform_data/gpmc-omap.h @@ -136,6 +136,13 @@ struct gpmc_device_timings { #define GPMC_MUX_AAD 1 /* Addr-Addr-Data multiplex */ #define GPMC_MUX_AD 2 /* Addr-Data multiplex */ +/* Wait pin polarity values */ +#define GPMC_WAITPINPOLARITY_INVALID UINT_MAX +#define GPMC_WAITPINPOLARITY_ACTIVE_LOW 0 +#define GPMC_WAITPINPOLARITY_ACTIVE_HIGH 1 + +#define GPMC_WAITPIN_INVALID UINT_MAX + struct gpmc_settings { bool burst_wrap; /* enables wrap bursting */ bool burst_read; /* enables read page/burst mode */ @@ -149,6 +156,7 @@ struct gpmc_settings { u32 device_width; /* device bus width (8 or 16 bit) */ u32 mux_add_data; /* multiplex address & data */ u32 wait_pin; /* wait-pin to be used */ + u32 wait_pin_polarity; }; /* Data for each chip select */ diff --git a/include/linux/platform_data/st33zp24.h b/include/linux/platform_data/st33zp24.h deleted file mode 100644 index 61db674f36cc..000000000000 --- a/include/linux/platform_data/st33zp24.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * STMicroelectronics TPM Linux driver for TPM 1.2 ST33ZP24 - * Copyright (C) 2009 - 2016 STMicroelectronics - */ -#ifndef __ST33ZP24_H__ -#define __ST33ZP24_H__ - -#define TPM_ST33_I2C "st33zp24-i2c" -#define TPM_ST33_SPI "st33zp24-spi" - -struct st33zp24_platform_data { - int io_lpcpd; -}; - -#endif /* __ST33ZP24_H__ */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index ebc351698090..1cd41bdf73cf 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -17,6 +17,7 @@ #include <linux/notifier.h> #include <linux/spinlock.h> #include <linux/cpumask.h> +#include <linux/time64.h> /* * Flags to control the behaviour of a genpd. @@ -95,6 +96,7 @@ struct genpd_governor_data { s64 max_off_time_ns; bool max_off_time_changed; ktime_t next_wakeup; + ktime_t next_hrtimer; bool cached_power_down_ok; bool cached_power_down_state_idx; }; @@ -232,6 +234,7 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); int dev_pm_genpd_remove_notifier(struct device *dev); void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next); +ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -293,6 +296,10 @@ static inline int dev_pm_genpd_remove_notifier(struct device *dev) static inline void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next) { } +static inline ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev) +{ + return KTIME_MAX; +} #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 7d1e604c1325..ee608d22ecb9 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -69,21 +69,21 @@ extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); -extern int set_posix_acl(struct user_namespace *, struct inode *, int, - struct posix_acl *); +int set_posix_acl(struct user_namespace *, struct dentry *, int, + struct posix_acl *); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags); #ifdef CONFIG_FS_POSIX_ACL -int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t); +int posix_acl_chmod(struct user_namespace *, struct dentry *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, struct posix_acl **); int posix_acl_update_mode(struct user_namespace *, struct inode *, umode_t *, struct posix_acl **); -extern int simple_set_acl(struct user_namespace *, struct inode *, - struct posix_acl *, int); +int simple_set_acl(struct user_namespace *, struct dentry *, + struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); struct posix_acl *get_cached_acl(struct inode *inode, int type); @@ -99,9 +99,16 @@ static inline void cache_no_acl(struct inode *inode) inode->i_acl = NULL; inode->i_default_acl = NULL; } + +int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name, struct posix_acl *kacl); +struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name); +int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name); #else static inline int posix_acl_chmod(struct user_namespace *mnt_userns, - struct inode *inode, umode_t mode) + struct dentry *dentry, umode_t mode) { return 0; } @@ -126,8 +133,28 @@ static inline int posix_acl_create(struct inode *inode, umode_t *mode, static inline void forget_all_cached_acls(struct inode *inode) { } + +static inline int vfs_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *name, + struct posix_acl *acl) +{ + return -EOPNOTSUPP; +} + +static inline struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int vfs_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_FS_POSIX_ACL */ -struct posix_acl *get_acl(struct inode *inode, int type); +struct posix_acl *get_inode_acl(struct inode *inode, int type); #endif /* __LINUX_POSIX_ACL_H */ diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 8163dd48c430..54cd7a14330d 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -33,33 +33,40 @@ posix_acl_xattr_count(size_t size) } #ifdef CONFIG_FS_POSIX_ACL -void posix_acl_fix_xattr_from_user(void *value, size_t size); -void posix_acl_fix_xattr_to_user(void *value, size_t size); -void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, - void *value, size_t size); +struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, + const void *value, size_t size); #else -static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) -{ -} -static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) -{ -} -static inline void -posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, void *value, - size_t size) +static inline struct posix_acl * +posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, + size_t size) { + return ERR_PTR(-EOPNOTSUPP); } #endif -struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, - const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); -struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - const void *value, size_t size); +static inline const char *posix_acl_xattr_name(int type) +{ + switch (type) { + case ACL_TYPE_ACCESS: + return XATTR_NAME_POSIX_ACL_ACCESS; + case ACL_TYPE_DEFAULT: + return XATTR_NAME_POSIX_ACL_DEFAULT; + } + + return ""; +} + +static inline int posix_acl_type(const char *name) +{ + if (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) + return ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0) + return ACL_TYPE_DEFAULT; + + return -1; +} extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; diff --git a/include/linux/prandom.h b/include/linux/prandom.h index e0a0759dd09c..c94c02ba065c 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -9,6 +9,7 @@ #define _LINUX_PRANDOM_H #include <linux/types.h> +#include <linux/once.h> #include <linux/percpu.h> #include <linux/random.h> @@ -23,24 +24,10 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); #define prandom_init_once(pcpu_state) \ DO_ONCE(prandom_seed_full_state, (pcpu_state)) -/** - * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) - * @ep_ro: right open interval endpoint - * - * Returns a pseudo-random number that is in interval [0, ep_ro). This is - * useful when requesting a random index of an array containing ep_ro elements, - * for example. The result is somewhat biased when ep_ro is not a power of 2, - * so do not use this for cryptographic purposes. - * - * Returns: pseudo-random number in interval [0, ep_ro) - */ +/* Deprecated: use get_random_u32_below() instead. */ static inline u32 prandom_u32_max(u32 ep_ro) { - if (__builtin_constant_p(ep_ro <= 1U << 8) && ep_ro <= 1U << 8) - return (get_random_u8() * ep_ro) >> 8; - if (__builtin_constant_p(ep_ro <= 1U << 16) && ep_ro <= 1U << 16) - return (get_random_u16() * ep_ro) >> 16; - return ((u64)get_random_u32() * ep_ro) >> 32; + return get_random_u32_below(ep_ro); } /* diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 6e4372735068..1e0a0d7ace3a 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -72,6 +72,9 @@ enum psi_states { /* Use one bit in the state mask to track TSK_ONCPU */ #define PSI_ONCPU (1 << NR_PSI_STATES) +/* Flag whether to re-arm avgs_work, see details in get_recent_times() */ +#define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1)) + enum psi_aggregators { PSI_AVGS = 0, PSI_POLL, @@ -177,6 +180,7 @@ struct psi_group { struct timer_list poll_timer; wait_queue_head_t poll_wait; atomic_t poll_wakeup; + atomic_t poll_scheduled; /* Protects data used by the monitor */ struct mutex trigger_lock; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9f16afec7290..9d65ff94e216 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -8,28 +8,7 @@ #ifndef __LINUX_PSTORE_RAM_H__ #define __LINUX_PSTORE_RAM_H__ -#include <linux/compiler.h> -#include <linux/device.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/list.h> #include <linux/pstore.h> -#include <linux/types.h> - -/* - * Choose whether access to the RAM zone requires locking or not. If a zone - * can be written to from different CPUs like with ftrace for example, then - * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required. - */ -#define PRZ_FLAG_NO_LOCK BIT(0) -/* - * If a PRZ should only have a single-boot lifetime, this marks it as - * getting wiped after its contents get copied out after boot. - */ -#define PRZ_FLAG_ZAP_OLD BIT(1) - -struct persistent_ram_buffer; -struct rs_control; struct persistent_ram_ecc_info { int block_size; @@ -39,84 +18,6 @@ struct persistent_ram_ecc_info { uint16_t *par; }; -/** - * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ) - * used as a pstore backend - * - * @paddr: physical address of the mapped RAM area - * @size: size of mapping - * @label: unique name of this PRZ - * @type: frontend type for this PRZ - * @flags: holds PRZ_FLAGS_* bits - * - * @buffer_lock: - * locks access to @buffer "size" bytes and "start" offset - * @buffer: - * pointer to actual RAM area managed by this PRZ - * @buffer_size: - * bytes in @buffer->data (not including any trailing ECC bytes) - * - * @par_buffer: - * pointer into @buffer->data containing ECC bytes for @buffer->data - * @par_header: - * pointer into @buffer->data containing ECC bytes for @buffer header - * (i.e. all fields up to @data) - * @rs_decoder: - * RSLIB instance for doing ECC calculations - * @corrected_bytes: - * ECC corrected bytes accounting since boot - * @bad_blocks: - * ECC uncorrectable bytes accounting since boot - * @ecc_info: - * ECC configuration details - * - * @old_log: - * saved copy of @buffer->data prior to most recent wipe - * @old_log_size: - * bytes contained in @old_log - * - */ -struct persistent_ram_zone { - phys_addr_t paddr; - size_t size; - void *vaddr; - char *label; - enum pstore_type_id type; - u32 flags; - - raw_spinlock_t buffer_lock; - struct persistent_ram_buffer *buffer; - size_t buffer_size; - - char *par_buffer; - char *par_header; - struct rs_control *rs_decoder; - int corrected_bytes; - int bad_blocks; - struct persistent_ram_ecc_info ecc_info; - - char *old_log; - size_t old_log_size; -}; - -struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, - u32 sig, struct persistent_ram_ecc_info *ecc_info, - unsigned int memtype, u32 flags, char *label); -void persistent_ram_free(struct persistent_ram_zone *prz); -void persistent_ram_zap(struct persistent_ram_zone *prz); - -int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, - unsigned int count); -int persistent_ram_write_user(struct persistent_ram_zone *prz, - const void __user *s, unsigned int count); - -void persistent_ram_save_old(struct persistent_ram_zone *prz); -size_t persistent_ram_old_size(struct persistent_ram_zone *prz); -void *persistent_ram_old(struct persistent_ram_zone *prz); -void persistent_ram_free_old(struct persistent_ram_zone *prz); -ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, - char *str, size_t len); - /* * Ramoops platform data * @mem_size memory size for ramoops diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index c952c5ba8fab..eaaef3ffec22 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -389,15 +389,6 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_pt_regs() task_pt_regs(current) #endif -/* - * unlike current_pt_regs(), this one is equal to task_pt_regs(current) - * on *all* architectures; the only reason to have a per-arch definition - * is optimisation. - */ -#ifndef signal_pt_regs -#define signal_pt_regs() task_pt_regs(current) -#endif - #ifndef current_user_stack_pointer #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif diff --git a/include/linux/random.h b/include/linux/random.h index 147a5e0d0b8e..4a2a1de423cd 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -6,7 +6,6 @@ #include <linux/bug.h> #include <linux/kernel.h> #include <linux/list.h> -#include <linux/once.h> #include <uapi/linux/random.h> @@ -17,16 +16,16 @@ void __init add_bootloader_randomness(const void *buf, size_t len); void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; void add_interrupt_randomness(int irq) __latent_entropy; -void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after); -#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) { +#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); -} #else -static inline void add_latent_entropy(void) { } + add_device_randomness(NULL, 0); #endif +} #if IS_ENABLED(CONFIG_VMGENID) void add_vmfork_randomness(const void *unique_vm_id, size_t len); @@ -51,29 +50,76 @@ static inline unsigned long get_random_long(void) #endif } +u32 __get_random_u32_below(u32 ceil); + /* - * On 64-bit architectures, protect against non-terminated C string overflows - * by zeroing out the first byte of the canary; this leaves 56 bits of entropy. + * Returns a random integer in the interval [0, ceil), with uniform + * distribution, suitable for all uses. Fastest when ceil is a constant, but + * still fast for variable ceil as well. */ -#ifdef CONFIG_64BIT -# ifdef __LITTLE_ENDIAN -# define CANARY_MASK 0xffffffffffffff00UL -# else /* big endian, 64 bits: */ -# define CANARY_MASK 0x00ffffffffffffffUL -# endif -#else /* 32 bits: */ -# define CANARY_MASK 0xffffffffUL -#endif +static inline u32 get_random_u32_below(u32 ceil) +{ + if (!__builtin_constant_p(ceil)) + return __get_random_u32_below(ceil); + + /* + * For the fast path, below, all operations on ceil are precomputed by + * the compiler, so this incurs no overhead for checking pow2, doing + * divisions, or branching based on integer size. The resultant + * algorithm does traditional reciprocal multiplication (typically + * optimized by the compiler into shifts and adds), rejecting samples + * whose lower half would indicate a range indivisible by ceil. + */ + BUILD_BUG_ON_MSG(!ceil, "get_random_u32_below() must take ceil > 0"); + if (ceil <= 1) + return 0; + for (;;) { + if (ceil <= 1U << 8) { + u32 mult = ceil * get_random_u8(); + if (likely(is_power_of_2(ceil) || (u8)mult >= (1U << 8) % ceil)) + return mult >> 8; + } else if (ceil <= 1U << 16) { + u32 mult = ceil * get_random_u16(); + if (likely(is_power_of_2(ceil) || (u16)mult >= (1U << 16) % ceil)) + return mult >> 16; + } else { + u64 mult = (u64)ceil * get_random_u32(); + if (likely(is_power_of_2(ceil) || (u32)mult >= -ceil % ceil)) + return mult >> 32; + } + } +} + +/* + * Returns a random integer in the interval (floor, U32_MAX], with uniform + * distribution, suitable for all uses. Fastest when floor is a constant, but + * still fast for variable floor as well. + */ +static inline u32 get_random_u32_above(u32 floor) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && floor == U32_MAX, + "get_random_u32_above() must take floor < U32_MAX"); + return floor + 1 + get_random_u32_below(U32_MAX - floor); +} -static inline unsigned long get_random_canary(void) +/* + * Returns a random integer in the interval [floor, ceil], with uniform + * distribution, suitable for all uses. Fastest when floor and ceil are + * constant, but still fast for variable floor and ceil as well. + */ +static inline u32 get_random_u32_inclusive(u32 floor, u32 ceil) { - return get_random_long() & CANARY_MASK; + BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && __builtin_constant_p(ceil) && + (floor > ceil || ceil - floor == U32_MAX), + "get_random_u32_inclusive() must take floor <= ceil"); + return floor + get_random_u32_below(ceil - floor + 1); } void __init random_init_early(const char *command_line); void __init random_init(void); bool rng_is_initialized(void); int wait_for_random_bytes(void); +int execute_with_initialized_rng(struct notifier_block *nb); /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ @@ -108,26 +154,6 @@ declare_get_random_var_wait(long, unsigned long) #include <asm/archrandom.h> -/* - * Called from the boot CPU during startup; not valid to call once - * secondary CPUs are up and preemption is possible. - */ -#ifndef arch_get_random_seed_longs_early -static inline size_t __init arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs) -{ - WARN_ON(system_state != SYSTEM_BOOTING); - return arch_get_random_seed_longs(v, max_longs); -} -#endif - -#ifndef arch_get_random_longs_early -static inline bool __init arch_get_random_longs_early(unsigned long *v, size_t max_longs) -{ - WARN_ON(system_state != SYSTEM_BOOTING); - return arch_get_random_longs(v, max_longs); -} -#endif - #ifdef CONFIG_SMP int random_prepare_cpu(unsigned int cpu); int random_online_cpu(unsigned int cpu); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 08605ce7379d..4da98ca6273e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_RCU_LAZY +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func); +#else +static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ + call_rcu(head, func); +} +#endif + /* Internal to kernel */ void rcu_init(void); extern int rcu_scheduler_active; @@ -340,6 +349,11 @@ static inline int rcu_read_lock_any_held(void) return !preemptible(); } +static inline int debug_lockdep_rcu_enabled(void) +{ + return 0; +} + #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 768196a5f39d..68f9070aa111 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -142,23 +142,17 @@ static inline int rcu_needs_cpu(void) * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. */ -static inline void rcu_virt_note_context_switch(int cpu) { } +static inline void rcu_virt_note_context_switch(void) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } static inline void rcu_irq_exit_check_preempt(void) { } -#define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { return false; } static inline void rcu_preempt_deferred_qs(struct task_struct *t) { } -#ifdef CONFIG_SRCU void rcu_scheduler_starting(void); -#else /* #ifndef CONFIG_SRCU */ -static inline void rcu_scheduler_starting(void) { } -#endif /* #else #ifndef CONFIG_SRCU */ static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5efb51486e8a..4003bf6cfa1c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void); * wrapper around rcu_note_context_switch(), which allows TINY_RCU * to save a few bytes. The caller must have disabled interrupts. */ -static inline void rcu_virt_note_context_switch(int cpu) +static inline void rcu_virt_note_context_switch(void) { rcu_note_context_switch(false); } @@ -87,8 +87,6 @@ bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); -bool rcu_is_idle_cpu(int cpu); - #ifdef CONFIG_PROVE_RCU void rcu_irq_exit_check_preempt(void); #else diff --git a/include/linux/regset.h b/include/linux/regset.h index a00765f0e8cf..9061266dd8de 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -275,15 +275,15 @@ static inline int user_regset_copyin(unsigned int *pos, unsigned int *count, return 0; } -static inline int user_regset_copyin_ignore(unsigned int *pos, - unsigned int *count, - const void **kbuf, - const void __user **ubuf, - const int start_pos, - const int end_pos) +static inline void user_regset_copyin_ignore(unsigned int *pos, + unsigned int *count, + const void **kbuf, + const void __user **ubuf, + const int start_pos, + const int end_pos) { if (*count == 0) - return 0; + return; BUG_ON(*pos < start_pos); if (end_pos < 0 || *pos < end_pos) { unsigned int copy = (end_pos < 0 ? *count @@ -295,7 +295,6 @@ static inline int user_regset_copyin_ignore(unsigned int *pos, *pos += copy; *count -= copy; } - return 0; } extern int regset_get(struct task_struct *target, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 0cf5b20c6ddf..0cee154abc9f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -89,11 +89,12 @@ struct rdt_domain { /** * struct resctrl_cache - Cache allocation related data * @cbm_len: Length of the cache bit mask - * @min_cbm_bits: Minimum number of consecutive bits to be set + * @min_cbm_bits: Minimum number of consecutive bits to be set. + * The value 0 means the architecture can support + * zero CBM. * @shareable_bits: Bitmask of shareable resource with other * executing entities * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. - * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. */ @@ -102,7 +103,6 @@ struct resctrl_cache { unsigned int min_cbm_bits; unsigned int shareable_bits; bool arch_has_sparse_bitmaps; - bool arch_has_empty_bitmaps; bool arch_has_per_cpu_cfg; }; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 2504df9a0453..3c7d295746f6 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table); + struct file *filp, poll_table *poll_table, int full); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); #define RING_BUFFER_ALL_CPUS -1 diff --git a/include/linux/sched.h b/include/linux/sched.h index ffb6eb55cd13..5affff14993d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -888,9 +888,6 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; -#ifdef CONFIG_PSI - unsigned sched_psi_wake_requeue:1; -#endif /* Force alignment to the next boundary: */ unsigned :0; @@ -1243,7 +1240,7 @@ struct task_struct { unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; + struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif diff --git a/include/linux/scs.h b/include/linux/scs.h index 18122d9e17ff..4ab5bdc898cf 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -53,6 +53,22 @@ static inline bool task_scs_end_corrupted(struct task_struct *tsk) return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; } +DECLARE_STATIC_KEY_FALSE(dynamic_scs_enabled); + +static inline bool scs_is_dynamic(void) +{ + if (!IS_ENABLED(CONFIG_DYNAMIC_SCS)) + return false; + return static_branch_likely(&dynamic_scs_enabled); +} + +static inline bool scs_is_enabled(void) +{ + if (!IS_ENABLED(CONFIG_DYNAMIC_SCS)) + return true; + return scs_is_dynamic(); +} + #else /* CONFIG_SHADOW_CALL_STACK */ static inline void *scs_alloc(int node) { return NULL; } @@ -62,6 +78,8 @@ static inline void scs_task_reset(struct task_struct *tsk) {} static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } static inline void scs_release(struct task_struct *tsk) {} static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; } +static inline bool scs_is_enabled(void) { return false; } +static inline bool scs_is_dynamic(void) { return false; } #endif /* CONFIG_SHADOW_CALL_STACK */ diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index d31d76be4982..175079552f68 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -27,6 +27,7 @@ struct seccomp_filter; * * @mode: indicates one of the valid values above for controlled * system calls available to a process. + * @filter_count: number of seccomp filters * @filter: must always point to a valid seccomp-filter or NULL as it is * accessed without locking during system call entry. * diff --git a/include/linux/security.h b/include/linux/security.h index 0e419c595cee..5b67f208f7de 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -362,6 +362,13 @@ int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *name, const void *value, size_t size, int flags); +int security_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); +int security_inode_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name); +int security_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name); void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int security_inode_getxattr(struct dentry *dentry, const char *name); @@ -397,6 +404,7 @@ int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_file_open(struct file *file); +int security_file_truncate(struct file *file); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); @@ -873,6 +881,28 @@ static inline int security_inode_setxattr(struct user_namespace *mnt_userns, return cap_inode_setxattr(dentry, name, value, size, flags); } +static inline int security_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ + return 0; +} + +static inline int security_inode_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return 0; +} + +static inline int security_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return 0; +} + static inline void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { } @@ -1015,6 +1045,11 @@ static inline int security_file_open(struct file *file) return 0; } +static inline int security_file_truncate(struct file *file) +{ + return 0; +} + static inline int security_task_alloc(struct task_struct *task, unsigned long clone_flags) { diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d657f2a42a7b..91871464b99d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -743,9 +743,15 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif -static inline bool uart_console_enabled(struct uart_port *port) +/* Variant of uart_console_registered() when the console_list_lock is held. */ +static inline bool uart_console_registered_locked(struct uart_port *port) { - return uart_console(port) && (port->cons->flags & CON_ENABLED); + return uart_console(port) && console_is_registered_locked(port->cons); +} + +static inline bool uart_console_registered(struct uart_port *port) +{ + return uart_console(port) && console_is_registered(port->cons); } struct uart_port *uart_get_console(struct uart_port *ports, int nr, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 48f4b645193b..70d6cb94e580 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -376,7 +376,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) } struct sk_psock *sk_psock_init(struct sock *sk, int node); -void sk_psock_stop(struct sk_psock *psock, bool wait); +void sk_psock_stop(struct sk_psock *psock); #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); diff --git a/include/linux/slab.h b/include/linux/slab.h index 90877fcde70b..45af70315a94 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -76,6 +76,17 @@ * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * + * Note that it is not possible to acquire a lock within a structure + * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference + * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages + * are not zeroed before being given to the slab, which means that any + * locks must be initialized after each and every kmem_struct_alloc(). + * Alternatively, make the ctor passed to kmem_cache_create() initialize + * the locks at page-allocation time, as is done in __i915_request_ctor(), + * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers + * to safely acquire those ctor-initialized locks under rcu_read_lock() + * protection. + * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ @@ -129,7 +140,11 @@ /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ +#ifndef CONFIG_SLUB_TINY #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) +#else +#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0) +#endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ /* @@ -336,13 +351,18 @@ enum kmalloc_cache_type { #endif #ifndef CONFIG_MEMCG_KMEM KMALLOC_CGROUP = KMALLOC_NORMAL, -#else - KMALLOC_CGROUP, #endif +#ifdef CONFIG_SLUB_TINY + KMALLOC_RECLAIM = KMALLOC_NORMAL, +#else KMALLOC_RECLAIM, +#endif #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, #endif +#ifdef CONFIG_MEMCG_KMEM + KMALLOC_CGROUP, +#endif NR_KMALLOC_TYPES }; @@ -441,7 +461,18 @@ static_assert(PAGE_SHIFT <= 20); #endif /* !CONFIG_SLOB */ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; + +/** + * kmem_cache_alloc - Allocate an object + * @cachep: The cache to allocate from. + * @flags: See kmalloc(). + * + * Allocate an object from this cache. + * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. + * + * Return: pointer to the new object or %NULL in case of error + */ +void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc; void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *s, void *objp); @@ -470,35 +501,12 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignm void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; -#ifdef CONFIG_TRACING void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) __assume_kmalloc_alignment __alloc_size(3); void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) __assume_kmalloc_alignment __alloc_size(4); -#else /* CONFIG_TRACING */ -/* Save a function call when CONFIG_TRACING=n */ -static __always_inline __alloc_size(3) -void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) -{ - void *ret = kmem_cache_alloc(s, flags); - - ret = kasan_kmalloc(s, ret, size, flags); - return ret; -} - -static __always_inline __alloc_size(4) -void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) -{ - void *ret = kmem_cache_alloc_node(s, gfpflags, node); - - ret = kasan_kmalloc(s, ret, size, gfpflags); - return ret; -} -#endif /* CONFIG_TRACING */ - void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment __alloc_size(1); @@ -506,9 +514,9 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align __alloc_size(1); /** - * kmalloc - allocate memory + * kmalloc - allocate kernel memory * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate. + * @flags: describe the allocation context * * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. @@ -535,12 +543,12 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align * %GFP_ATOMIC * Allocation will not sleep. May use emergency pools. * - * %GFP_HIGHUSER - * Allocate memory from high memory on behalf of user. - * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * + * %__GFP_ZERO + * Zero the allocated memory before returning. Also see kzalloc(). + * * %__GFP_HIGH * This allocation has high priority and may use emergency pools. * @@ -559,42 +567,42 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align * Try really hard to succeed the allocation but fail * eventually. */ +#ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) { - if (__builtin_constant_p(size)) { -#ifndef CONFIG_SLOB + if (__builtin_constant_p(size) && size) { unsigned int index; -#endif + if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); -#ifndef CONFIG_SLOB - index = kmalloc_index(size); - - if (!index) - return ZERO_SIZE_PTR; + index = kmalloc_index(size); return kmalloc_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, size); -#endif } return __kmalloc(size, flags); } +#else +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large(size, flags); + + return __kmalloc(size, flags); +} +#endif #ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { - if (__builtin_constant_p(size)) { + if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large_node(size, flags, node); index = kmalloc_index(size); - - if (!index) - return ZERO_SIZE_PTR; - return kmalloc_node_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, node, size); diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index f0ffad6a3365..5834bad8ad78 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -80,8 +80,10 @@ struct kmem_cache { unsigned int *random_seq; #endif +#ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ +#endif struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f9c68a9dac04..aa0ee1678d29 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -41,6 +41,7 @@ enum stat_item { CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ NR_SLUB_STAT_ITEMS }; +#ifndef CONFIG_SLUB_TINY /* * When changing the layout, make sure freelist and tid are still compatible * with this_cpu_cmpxchg_double() alignment requirements. @@ -57,6 +58,7 @@ struct kmem_cache_cpu { unsigned stat[NR_SLUB_STAT_ITEMS]; #endif }; +#endif /* CONFIG_SLUB_TINY */ #ifdef CONFIG_SLUB_CPU_PARTIAL #define slub_percpu_partial(c) ((c)->partial) @@ -88,7 +90,9 @@ struct kmem_cache_order_objects { * Slab cache management. */ struct kmem_cache { +#ifndef CONFIG_SLUB_TINY struct kmem_cache_cpu __percpu *cpu_slab; +#endif /* Used for retrieving partial slabs, etc. */ slab_flags_t flags; unsigned long min_partial; @@ -136,13 +140,15 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif +#ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ +#endif struct kmem_cache_node *node[MAX_NUMNODES]; }; -#ifdef CONFIG_SYSFS +#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) #define SLAB_SUPPORTS_SYSFS void sysfs_slab_unlink(struct kmem_cache *); void sysfs_slab_release(struct kmem_cache *); diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index d2b02bb43768..b85f66db33e1 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -9,6 +9,13 @@ enum mtk_ddp_comp_id; struct device; +enum mtk_dpi_out_format_con { + MTK_DPI_RGB888_SDR_CON, + MTK_DPI_RGB888_DDR_CON, + MTK_DPI_RGB565_SDR_CON, + MTK_DPI_RGB565_DDR_CON +}; + enum mtk_ddp_comp_id { DDP_COMPONENT_AAL0, DDP_COMPONENT_AAL1, diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index bc2fb8343a94..ad1fd718169d 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -42,7 +42,19 @@ #define LLCC_CPUHWT 36 #define LLCC_MDMCLAD2 37 #define LLCC_CAMEXP1 38 +#define LLCC_CMPTHCP 39 +#define LLCC_LCPDARE 40 #define LLCC_AENPU 45 +#define LLCC_ISLAND1 46 +#define LLCC_ISLAND2 47 +#define LLCC_ISLAND3 48 +#define LLCC_ISLAND4 49 +#define LLCC_CAMEXP2 50 +#define LLCC_CAMEXP3 51 +#define LLCC_CAMEXP4 52 +#define LLCC_DISP_WB 53 +#define LLCC_DISP_1 54 +#define LLCC_VIDVSP 64 /** * struct llcc_slice_desc - Cache slice descriptor diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index 2ba044d0d5e5..8e984d75f5b6 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -225,7 +225,7 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem) /** * struct spi_controller_mem_ops - SPI memory operations * @adjust_op_size: shrink the data xfer of an operation to match controller's - * limitations (can be alignment of max RX/TX size + * limitations (can be alignment or max RX/TX size * limitations) * @supports_op: check if an operation is supported by the controller * @exec_op: execute a SPI memory operation diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d960a..9b9d0bbf1d3c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -47,11 +47,8 @@ int init_srcu_struct(struct srcu_struct *ssp); #include <linux/srcutiny.h> #elif defined(CONFIG_TREE_SRCU) #include <linux/srcutree.h> -#elif defined(CONFIG_SRCU) -#error "Unknown SRCU implementation specified to kernel configuration" #else -/* Dummy definition for things like notifiers. Actual use gets link error. */ -struct srcu_struct { }; +#error "Unknown SRCU implementation specified to kernel configuration" #endif void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, @@ -64,11 +61,21 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); -#ifdef CONFIG_SRCU +#ifdef CONFIG_NEED_SRCU_NMI_SAFE +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); +#else +static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + return __srcu_read_lock(ssp); +} +static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + __srcu_read_unlock(ssp, idx); +} +#endif /* CONFIG_NEED_SRCU_NMI_SAFE */ + void srcu_init(void); -#else /* #ifdef CONFIG_SRCU */ -static inline void srcu_init(void) { } -#endif /* #else #ifdef CONFIG_SRCU */ #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -104,6 +111,18 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#define SRCU_NMI_UNKNOWN 0x0 +#define SRCU_NMI_UNSAFE 0x1 +#define SRCU_NMI_SAFE 0x2 + +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) +void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe); +#else +static inline void srcu_check_nmi_safety(struct srcu_struct *ssp, + bool nmi_safe) { } +#endif + + /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing @@ -161,17 +180,36 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; + srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); rcu_lock_acquire(&(ssp)->dep_map); return retval; } +/** + * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but in an NMI-safe manner. + * See srcu_read_lock() for more information. + */ +static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp) +{ + int retval; + + srcu_check_nmi_safety(ssp, true); + retval = __srcu_read_lock_nmisafe(ssp); + rcu_lock_acquire(&(ssp)->dep_map); + return retval; +} + /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; + srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); return retval; } @@ -187,14 +225,32 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); + srcu_check_nmi_safety(ssp, false); rcu_lock_release(&(ssp)->dep_map); __srcu_read_unlock(ssp, idx); } +/** + * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit an SRCU read-side critical section, but in an NMI-safe manner. + */ +static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + srcu_check_nmi_safety(ssp, true); + rcu_lock_release(&(ssp)->dep_map); + __srcu_read_unlock_nmisafe(ssp, idx); +} + /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { + srcu_check_nmi_safety(ssp, false); __srcu_read_unlock(ssp, idx); } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index e3014319d1ad..c689a81752c9 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -23,8 +23,9 @@ struct srcu_struct; */ struct srcu_data { /* Read-side state. */ - unsigned long srcu_lock_count[2]; /* Locks per CPU. */ - unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ + atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ + atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ + int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index 4c678c4fec58..9c88707d9a0f 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -6,6 +6,25 @@ #include <linux/sched.h> #include <linux/random.h> +/* + * On 64-bit architectures, protect against non-terminated C string overflows + * by zeroing out the first byte of the canary; this leaves 56 bits of entropy. + */ +#ifdef CONFIG_64BIT +# ifdef __LITTLE_ENDIAN +# define CANARY_MASK 0xffffffffffffff00UL +# else /* big endian, 64 bits: */ +# define CANARY_MASK 0x00ffffffffffffffUL +# endif +#else /* 32 bits: */ +# define CANARY_MASK 0xffffffffUL +#endif + +static inline unsigned long get_random_canary(void) +{ + return get_random_long() & CANARY_MASK; +} + #if defined(CONFIG_STACKPROTECTOR) || defined(CONFIG_ARM64_PTR_AUTH) # include <asm/stackprotector.h> #else diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 88de45491376..ed722dd33b46 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -220,13 +220,6 @@ static inline __be32 svc_getu32(struct kvec *iov) return val; } -static inline void svc_ungetu32(struct kvec *iov) -{ - __be32 *vp = (__be32 *)iov->iov_base; - iov->iov_base = (void *)(vp - 1); - iov->iov_len += sizeof(*vp); -} - static inline void svc_putu32(struct kvec *iov, __be32 val) { __be32 *vp = iov->iov_base + iov->iov_len; @@ -311,7 +304,6 @@ struct svc_rqst { struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ struct svc_cacherep * rq_cacherep; /* cache info */ struct task_struct *rq_task; /* service thread */ - spinlock_t rq_lock; /* per-request lock */ struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 86b95ccb81bb..b07b277d6a16 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -33,11 +33,13 @@ * can use the extra bits to store other information besides PFN. */ #ifdef MAX_PHYSMEM_BITS -#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) +#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) #else /* MAX_PHYSMEM_BITS */ -#define SWP_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) +#define SWP_PFN_BITS min_t(int, \ + sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \ + SWP_TYPE_SHIFT) #endif /* MAX_PHYSMEM_BITS */ -#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) +#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) /** * Migration swap entry specific bitfield definitions. Layout: diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a34b0f9a9972..33a0ee3bcb2e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -264,6 +264,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo) #ifdef CONFIG_COMPAT +#define SYSCALL32_DEFINE0 COMPAT_SYSCALL_DEFINE0 #define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1 #define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2 #define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3 @@ -271,6 +272,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5 #define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6 #else +#define SYSCALL32_DEFINE0 SYSCALL_DEFINE0 #define SYSCALL32_DEFINE1 SYSCALL_DEFINE1 #define SYSCALL32_DEFINE2 SYSCALL_DEFINE2 #define SYSCALL32_DEFINE3 SYSCALL_DEFINE3 diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 9ecc128944a1..5e093602e8fc 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -100,6 +100,7 @@ struct thermal_cooling_device_ops { struct thermal_cooling_device { int id; char *type; + unsigned long max_state; struct device device; struct device_node *np; void *devdata; diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 3146f1c056c9..bb9d3f5542f8 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -45,6 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags, void free_time_ns(struct time_namespace *ns); void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); struct vdso_data *arch_get_vdso_data(void *vvar_page); +struct page *find_timens_vvar_page(struct vm_area_struct *vma); static inline void put_time_ns(struct time_namespace *ns) { @@ -141,6 +142,11 @@ static inline void timens_on_fork(struct nsproxy *nsproxy, return; } +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} + static inline void timens_add_monotonic(struct timespec64 *ts) { } static inline void timens_add_boottime(struct timespec64 *ts) { } diff --git a/include/linux/timer.h b/include/linux/timer.h index 648f00105f58..9162f275819a 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -169,7 +169,6 @@ static inline int timer_pending(const struct timer_list * timer) } extern void add_timer_on(struct timer_list *timer, int cpu); -extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); extern int timer_reduce(struct timer_list *timer, unsigned long expires); @@ -183,14 +182,36 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync(struct timer_list *timer); +extern int timer_delete(struct timer_list *timer); +extern int timer_shutdown_sync(struct timer_list *timer); +extern int timer_shutdown(struct timer_list *timer); -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) - extern int del_timer_sync(struct timer_list *timer); -#else -# define del_timer_sync(t) del_timer(t) -#endif +/** + * del_timer_sync - Delete a pending timer and wait for a running callback + * @timer: The timer to be deleted + * + * See timer_delete_sync() for detailed explanation. + * + * Do not use in new code. Use timer_delete_sync() instead. + */ +static inline int del_timer_sync(struct timer_list *timer) +{ + return timer_delete_sync(timer); +} -#define del_singleshot_timer_sync(t) del_timer_sync(t) +/** + * del_timer - Delete a pending timer + * @timer: The timer to be deleted + * + * See timer_delete() for detailed explanation. + * + * Do not use in new code. Use timer_delete() instead. + */ +static inline int del_timer(struct timer_list *timer) +{ + return timer_delete(timer); +} extern void init_timers(void); struct hrtimer; diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index 93884086f392..adc80e29168e 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -35,7 +35,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) { struct rb_node *leftmost = rb_first_cached(&head->rb_root); - return rb_entry(leftmost, struct timerqueue_node, node); + return rb_entry_safe(leftmost, struct timerqueue_node, node); } static inline void timerqueue_init(struct timerqueue_node *node) diff --git a/include/linux/trace.h b/include/linux/trace.h index b5e16e438448..80ffda871749 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -26,13 +26,13 @@ struct trace_export { int flags; }; +struct trace_array; + #ifdef CONFIG_TRACING int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); -struct trace_array; - void trace_printk_init_buffers(void); __printf(3, 4) int trace_array_printk(struct trace_array *tr, unsigned long ip, diff --git a/include/linux/uio.h b/include/linux/uio.h index 2e3134b14ffd..87fc3d0dda98 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -29,6 +29,9 @@ enum iter_type { ITER_UBUF, }; +#define ITER_SOURCE 1 // == WRITE +#define ITER_DEST 0 // == READ + struct iov_iter_state { size_t iov_offset; size_t count; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index f07e6998bb68..9df0b9a762cc 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -146,9 +146,9 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) static inline bool vma_can_userfault(struct vm_area_struct *vma, unsigned long vm_flags) { - if (vm_flags & VM_UFFD_MINOR) - return is_vm_hugetlb_page(vma) || vma_is_shmem(vma); - + if ((vm_flags & VM_UFFD_MINOR) && + (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) + return false; #ifndef CONFIG_PTE_MARKER_UFFD_WP /* * If user requested uffd-wp but not enabled pte markers for diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 2b1737c9b244..bf7613ba412b 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -10,6 +10,7 @@ #include <uapi/linux/utsname.h> enum uts_proc { + UTS_PROC_ARCH, UTS_PROC_OSTYPE, UTS_PROC_OSRELEASE, UTS_PROC_VERSION, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e7cebeb875dd..fdd393f70b19 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -189,6 +189,7 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 2218a9645b89..707374bab4c4 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -22,6 +22,12 @@ struct inode; struct dentry; +static inline bool is_posix_acl_xattr(const char *name) +{ + return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || + (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0); +} + /* * struct xattr_handler: When @name is set, match attributes with exactly that * name. When @prefix is set instead, match attributes with that prefix and |