aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/alloc_tag.h16
-rw-r--r--include/linux/arch_topology.h4
-rw-r--r--include/linux/arm-smccc.h32
-rw-r--r--include/linux/bio-integrity.h4
-rw-r--r--include/linux/bio.h19
-rw-r--r--include/linux/blk-integrity.h5
-rw-r--r--include/linux/blk-mq.h115
-rw-r--r--include/linux/blkdev.h111
-rw-r--r--include/linux/bpf_mem_alloc.h3
-rw-r--r--include/linux/cleanup.h2
-rw-r--r--include/linux/compiler-gcc.h4
-rw-r--r--include/linux/device.h3
-rw-r--r--include/linux/eventpoll.h2
-rw-r--r--include/linux/exportfs.h13
-rw-r--r--include/linux/fdtable.h5
-rw-r--r--include/linux/file.h8
-rw-r--r--include/linux/file_ref.h177
-rw-r--r--include/linux/filelock.h5
-rw-r--r--include/linux/fs.h110
-rw-r--r--include/linux/fs_context.h6
-rw-r--r--include/linux/fs_parser.h5
-rw-r--r--include/linux/input.h10
-rw-r--r--include/linux/io_uring/cmd.h2
-rw-r--r--include/linux/io_uring_types.h89
-rw-r--r--include/linux/iomap.h20
-rw-r--r--include/linux/jbd2.h15
-rw-r--r--include/linux/ksm.h10
-rw-r--r--include/linux/libata.h4
-rw-r--r--include/linux/lsm/apparmor.h17
-rw-r--r--include/linux/lsm/bpf.h16
-rw-r--r--include/linux/lsm/selinux.h16
-rw-r--r--include/linux/lsm/smack.h17
-rw-r--r--include/linux/lsm_hook_defs.h20
-rw-r--r--include/linux/memcontrol.h12
-rw-r--r--include/linux/mman.h28
-rw-r--r--include/linux/mmzone.h8
-rw-r--r--include/linux/netlink.h2
-rw-r--r--include/linux/nfslocalio.h3
-rw-r--r--include/linux/nvme.h135
-rw-r--r--include/linux/page-flags.h12
-rw-r--r--include/linux/pm_domain.h6
-rw-r--r--include/linux/posix_acl.h6
-rw-r--r--include/linux/sched/task_stack.h2
-rw-r--r--include/linux/security.h98
-rw-r--r--include/linux/sed-opal.h1
-rw-r--r--include/linux/shmem_fs.h6
-rw-r--r--include/linux/soc/qcom/llcc-qcom.h2
-rw-r--r--include/linux/sockptr.h4
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/syscalls.h13
-rw-r--r--include/linux/tick.h8
-rw-r--r--include/linux/timekeeping.h5
-rw-r--r--include/linux/tpm.h3
-rw-r--r--include/linux/uaccess.h118
-rw-r--r--include/linux/unicode.h4
-rw-r--r--include/linux/user_namespace.h3
-rw-r--r--include/linux/userfaultfd_k.h5
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--include/linux/wait.h1
-rw-r--r--include/linux/writeback.h32
-rw-r--r--include/linux/xattr.h4
61 files changed, 1096 insertions, 313 deletions
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 1f0a9ff23a2c..941deffc590d 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -135,18 +135,21 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
#endif
/* Caller should verify both ref and tag to be valid */
-static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+static inline bool __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
{
alloc_tag_add_check(ref, tag);
if (!ref || !tag)
- return;
+ return false;
ref->ct = &tag->ct;
+ return true;
}
-static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
{
- __alloc_tag_ref_set(ref, tag);
+ if (unlikely(!__alloc_tag_ref_set(ref, tag)))
+ return false;
+
/*
* We need in increment the call counter every time we have a new
* allocation or when we split a large allocation into smaller ones.
@@ -154,12 +157,13 @@ static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *t
* counter because when we free each part the counter will be decremented.
*/
this_cpu_inc(tag->counters->calls);
+ return true;
}
static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
{
- alloc_tag_ref_set(ref, tag);
- this_cpu_add(tag->counters->bytes, bytes);
+ if (likely(alloc_tag_ref_set(ref, tag)))
+ this_cpu_add(tag->counters->bytes, bytes);
}
static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index b721f360d759..4a952c4885ed 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -11,10 +11,6 @@
void topology_normalize_cpu_scale(void);
int topology_update_cpu_topology(void);
-#ifdef CONFIG_ACPI_CPPC_LIB
-void topology_init_cpu_capacity_cppc(void);
-#endif
-
struct device_node;
bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index f59099a213d0..67f6fdf2e7cd 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -315,8 +315,6 @@ u32 arm_smccc_get_version(void);
void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit);
-extern u64 smccc_has_sve_hint;
-
/**
* arm_smccc_get_soc_id_version()
*
@@ -415,15 +413,6 @@ struct arm_smccc_quirk {
};
/**
- * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls
- *
- * Sets the SMCCC hint bit to indicate if there is live state in the SVE
- * registers, this modifies x0 in place and should never be called from C
- * code.
- */
-asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
-
-/**
* __arm_smccc_smc() - make SMC calls
* @a0-a7: arguments passed in registers 0 to 7
* @res: result values from registers 0 to 3
@@ -490,20 +479,6 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
#endif
-/* nVHE hypervisor doesn't have a current thread so needs separate checks */
-#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__)
-
-#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n", "bl __arm_smccc_sve_check \n", \
- ARM64_SVE)
-#define smccc_sve_clobbers "x16", "x30", "cc",
-
-#else
-
-#define SMCCC_SVE_CHECK
-#define smccc_sve_clobbers
-
-#endif
-
#define __constraint_read_2 "r" (arg0)
#define __constraint_read_3 __constraint_read_2, "r" (arg1)
#define __constraint_read_4 __constraint_read_3, "r" (arg2)
@@ -574,12 +549,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
register unsigned long r3 asm("r3"); \
CONCATENATE(__declare_arg_, \
COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \
- asm volatile(SMCCC_SVE_CHECK \
- inst "\n" : \
+ asm volatile(inst "\n" : \
"=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \
: CONCATENATE(__constraint_read_, \
COUNT_ARGS(__VA_ARGS__)) \
- : smccc_sve_clobbers "memory"); \
+ : "memory"); \
if (___res) \
*___res = (typeof(*___res)){r0, r1, r2, r3}; \
} while (0)
@@ -628,7 +602,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
asm ("" : \
: CONCATENATE(__constraint_read_, \
COUNT_ARGS(__VA_ARGS__)) \
- : smccc_sve_clobbers "memory"); \
+ : "memory"); \
if (___res) \
___res->a0 = SMCCC_RET_NOT_SUPPORTED; \
} while (0)
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index dd831c269e99..dbf0f74c1529 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -72,7 +72,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
unsigned int nr);
int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
unsigned int offset);
-int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len);
void bio_integrity_unmap_user(struct bio *bio);
bool bio_integrity_prep(struct bio *bio);
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
@@ -99,7 +99,7 @@ static inline void bioset_integrity_free(struct bio_set *bs)
}
static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
- ssize_t len, u32 seed)
+ ssize_t len)
{
return -EINVAL;
}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index faceadb040f9..60830a6a5939 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -418,8 +418,6 @@ bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
size_t len, size_t off);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
-int bio_add_zone_append_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
@@ -677,6 +675,23 @@ static inline void bio_clear_polled(struct bio *bio)
bio->bi_opf &= ~REQ_POLLED;
}
+/**
+ * bio_is_zone_append - is this a zone append bio?
+ * @bio: bio to check
+ *
+ * Check if @bio is a zone append operation. Core block layer code and end_io
+ * handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check
+ * because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if
+ * it is not natively supported.
+ */
+static inline bool bio_is_zone_append(struct bio *bio)
+{
+ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+ return false;
+ return bio_op(bio) == REQ_OP_ZONE_APPEND ||
+ bio_flagged(bio, BIO_EMULATES_ZONE_APPEND);
+}
+
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new);
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 676f8f860c47..c7eae0bfb013 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -28,7 +28,7 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
- ssize_t bytes, u32 seed);
+ ssize_t bytes);
static inline bool
blk_integrity_queue_supports_integrity(struct request_queue *q)
@@ -104,8 +104,7 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
}
static inline int blk_rq_integrity_map_user(struct request *rq,
void __user *ubuf,
- ssize_t bytes,
- u32 seed)
+ ssize_t bytes)
{
return -EINVAL;
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4fecf46ef681..c596e0e4cb75 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -156,9 +156,6 @@ struct request {
struct blk_crypto_keyslot *crypt_keyslot;
#endif
- enum rw_hint write_hint;
- unsigned short ioprio;
-
enum mq_rq_state state;
atomic_t ref;
@@ -222,7 +219,9 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
static inline unsigned short req_get_ioprio(struct request *req)
{
- return req->ioprio;
+ if (req->bio)
+ return req->bio->bi_ioprio;
+ return 0;
}
#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
@@ -230,62 +229,61 @@ static inline unsigned short req_get_ioprio(struct request *req)
#define rq_dma_dir(rq) \
(op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
-#define rq_list_add(listptr, rq) do { \
- (rq)->rq_next = *(listptr); \
- *(listptr) = rq; \
-} while (0)
-
-#define rq_list_add_tail(lastpptr, rq) do { \
- (rq)->rq_next = NULL; \
- **(lastpptr) = rq; \
- *(lastpptr) = &rq->rq_next; \
-} while (0)
-
-#define rq_list_pop(listptr) \
-({ \
- struct request *__req = NULL; \
- if ((listptr) && *(listptr)) { \
- __req = *(listptr); \
- *(listptr) = __req->rq_next; \
- } \
- __req; \
-})
+static inline int rq_list_empty(const struct rq_list *rl)
+{
+ return rl->head == NULL;
+}
-#define rq_list_peek(listptr) \
-({ \
- struct request *__req = NULL; \
- if ((listptr) && *(listptr)) \
- __req = *(listptr); \
- __req; \
-})
+static inline void rq_list_init(struct rq_list *rl)
+{
+ rl->head = NULL;
+ rl->tail = NULL;
+}
-#define rq_list_for_each(listptr, pos) \
- for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
+static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq)
+{
+ rq->rq_next = NULL;
+ if (rl->tail)
+ rl->tail->rq_next = rq;
+ else
+ rl->head = rq;
+ rl->tail = rq;
+}
-#define rq_list_for_each_safe(listptr, pos, nxt) \
- for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos); \
- pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL)
+static inline void rq_list_add_head(struct rq_list *rl, struct request *rq)
+{
+ rq->rq_next = rl->head;
+ rl->head = rq;
+ if (!rl->tail)
+ rl->tail = rq;
+}
-#define rq_list_next(rq) (rq)->rq_next
-#define rq_list_empty(list) ((list) == (struct request *) NULL)
+static inline struct request *rq_list_pop(struct rq_list *rl)
+{
+ struct request *rq = rl->head;
-/**
- * rq_list_move() - move a struct request from one list to another
- * @src: The source list @rq is currently in
- * @dst: The destination list that @rq will be appended to
- * @rq: The request to move
- * @prev: The request preceding @rq in @src (NULL if @rq is the head)
- */
-static inline void rq_list_move(struct request **src, struct request **dst,
- struct request *rq, struct request *prev)
+ if (rq) {
+ rl->head = rl->head->rq_next;
+ if (!rl->head)
+ rl->tail = NULL;
+ rq->rq_next = NULL;
+ }
+
+ return rq;
+}
+
+static inline struct request *rq_list_peek(struct rq_list *rl)
{
- if (prev)
- prev->rq_next = rq->rq_next;
- else
- *src = rq->rq_next;
- rq_list_add(dst, rq);
+ return rl->head;
}
+#define rq_list_for_each(rl, pos) \
+ for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next)
+
+#define rq_list_for_each_safe(rl, pos, nxt) \
+ for (pos = rq_list_peek((rl)), nxt = pos->rq_next; \
+ pos; pos = nxt, nxt = pos ? pos->rq_next : NULL)
+
/**
* enum blk_eh_timer_return - How the timeout handler should proceed
* @BLK_EH_DONE: The block driver completed the command or will complete it at
@@ -577,7 +575,7 @@ struct blk_mq_ops {
* empty the @rqlist completely, then the rest will be queued
* individually by the block layer upon return.
*/
- void (*queue_rqs)(struct request **rqlist);
+ void (*queue_rqs)(struct rq_list *rqlist);
/**
* @get_budget: Reserve budget before queue request, once .queue_rq is
@@ -857,12 +855,6 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
*/
static inline bool blk_mq_need_time_stamp(struct request *rq)
{
- /*
- * passthrough io doesn't use iostat accounting, cgroup stats
- * and io scheduler functionalities.
- */
- if (blk_rq_is_passthrough(rq))
- return false;
return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
}
@@ -892,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
else if (iob->complete != complete)
return false;
iob->need_ts |= blk_mq_need_time_stamp(req);
- rq_list_add(&iob->req_list, req);
+ rq_list_add_tail(&iob->req_list, req);
return true;
}
@@ -925,6 +917,8 @@ void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
+void blk_mq_unfreeze_queue_non_owner(struct request_queue *q);
+void blk_freeze_queue_start_non_owner(struct request_queue *q);
void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
@@ -989,7 +983,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
rq->nr_phys_segments = nr_segs;
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;
- rq->ioprio = bio_prio(bio);
}
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50c3b959da28..a1fd0ddce5cf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -25,6 +25,7 @@
#include <linux/uuid.h>
#include <linux/xarray.h>
#include <linux/file.h>
+#include <linux/lockdep.h>
struct module;
struct request_queue;
@@ -194,7 +195,7 @@ struct gendisk {
unsigned int nr_zones;
unsigned int zone_capacity;
unsigned int last_zone_capacity;
- unsigned long *conv_zones_bitmap;
+ unsigned long __rcu *conv_zones_bitmap;
unsigned int zone_wplugs_hash_bits;
spinlock_t zone_wplugs_lock;
struct mempool_s *zone_wplugs_pool;
@@ -349,6 +350,9 @@ typedef unsigned int __bitwise blk_flags_t;
/* I/O topology is misaligned */
#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1))
+/* passthrough command IO accounting */
+#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2))
+
struct queue_limits {
blk_features_t features;
blk_flags_t flags;
@@ -371,6 +375,7 @@ struct queue_limits {
unsigned int max_user_discard_sectors;
unsigned int max_secure_erase_sectors;
unsigned int max_write_zeroes_sectors;
+ unsigned int max_hw_zone_append_sectors;
unsigned int max_zone_append_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -471,6 +476,11 @@ struct request_queue {
struct xarray hctx_table;
struct percpu_ref q_usage_counter;
+ struct lock_class_key io_lock_cls_key;
+ struct lockdep_map io_lockdep_map;
+
+ struct lock_class_key q_lock_cls_key;
+ struct lockdep_map q_lockdep_map;
struct request *last_merge;
@@ -566,6 +576,10 @@ struct request_queue {
struct throtl_data *td;
#endif
struct rcu_head rcu_head;
+#ifdef CONFIG_LOCKDEP
+ struct task_struct *mq_freeze_owner;
+ int mq_freeze_owner_depth;
+#endif
wait_queue_head_t mq_freeze_wq;
/*
* Protect concurrent access to q_usage_counter by
@@ -617,6 +631,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL))
#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT)
+#define blk_queue_passthrough_stat(q) \
+ ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH)
#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX)
#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
@@ -725,6 +741,9 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
#define for_each_bio(_bio) \
for (; _bio; _bio = _bio->bi_next)
+int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups,
+ struct fwnode_handle *fwnode);
int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
const struct attribute_group **groups);
static inline int __must_check add_disk(struct gendisk *disk)
@@ -929,6 +948,7 @@ queue_limits_start_update(struct request_queue *q)
int queue_limits_commit_update(struct request_queue *q,
struct queue_limits *lim);
int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
+int blk_validate_limits(struct queue_limits *lim);
/**
* queue_limits_cancel_update - cancel an atomic update of queue limits
@@ -986,6 +1006,11 @@ extern void blk_put_queue(struct request_queue *);
void blk_mark_disk_dead(struct gendisk *disk);
+struct rq_list {
+ struct request *head;
+ struct request *tail;
+};
+
#ifdef CONFIG_BLOCK
/*
* blk_plug permits building a queue of related requests by holding the I/O
@@ -999,10 +1024,10 @@ void blk_mark_disk_dead(struct gendisk *disk);
* blk_flush_plug() is called.
*/
struct blk_plug {
- struct request *mq_list; /* blk-mq requests */
+ struct rq_list mq_list; /* blk-mq requests */
/* if ios_left is > 1, we can batch tag/rq allocations */
- struct request *cached_rq;
+ struct rq_list cached_rqs;
u64 cur_ktime;
unsigned short nr_ios;
@@ -1145,6 +1170,11 @@ enum blk_default_limits {
*/
#define BLK_DEF_MAX_SECTORS_CAP 2560u
+static inline struct queue_limits *bdev_limits(struct block_device *bdev)
+{
+ return &bdev_get_queue(bdev)->limits;
+}
+
static inline unsigned long queue_segment_boundary(const struct request_queue *q)
{
return q->limits.seg_boundary_mask;
@@ -1185,25 +1215,9 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
return q->limits.max_segment_size;
}
-static inline unsigned int
-queue_limits_max_zone_append_sectors(const struct queue_limits *l)
-{
- unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
-
- return min_not_zero(l->max_zone_append_sectors, max_sectors);
-}
-
-static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
-{
- if (!blk_queue_is_zoned(q))
- return 0;
-
- return queue_limits_max_zone_append_sectors(&q->limits);
-}
-
static inline bool queue_emulates_zone_append(struct request_queue *q)
{
- return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
+ return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors;
}
static inline bool bdev_emulates_zone_append(struct block_device *bdev)
@@ -1214,7 +1228,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev)
static inline unsigned int
bdev_max_zone_append_sectors(struct block_device *bdev)
{
- return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+ return bdev_limits(bdev)->max_zone_append_sectors;
}
static inline unsigned int bdev_max_segments(struct block_device *bdev)
@@ -1279,23 +1293,23 @@ unsigned int bdev_discard_alignment(struct block_device *bdev);
static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.max_discard_sectors;
+ return bdev_limits(bdev)->max_discard_sectors;
}
static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.discard_granularity;
+ return bdev_limits(bdev)->discard_granularity;
}
static inline unsigned int
bdev_max_secure_erase_sectors(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.max_secure_erase_sectors;
+ return bdev_limits(bdev)->max_secure_erase_sectors;
}
static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors;
+ return bdev_limits(bdev)->max_write_zeroes_sectors;
}
static inline bool bdev_nonrot(struct block_device *bdev)
@@ -1331,7 +1345,7 @@ static inline bool bdev_write_cache(struct block_device *bdev)
static inline bool bdev_fua(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA;
+ return bdev_limits(bdev)->features & BLK_FEAT_FUA;
}
static inline bool bdev_nowait(struct block_device *bdev)
@@ -1376,6 +1390,33 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
return bdev_offset_from_zone_start(bdev, sector) == 0;
}
+/**
+ * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
+ * @bdev: block device to check
+ * @sector: sector number
+ *
+ * Check if @sector on @bdev is contained in a sequential write required zone.
+ */
+static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
+{
+ bool is_seq = false;
+
+#if IS_ENABLED(CONFIG_BLK_DEV_ZONED)
+ if (bdev_is_zoned(bdev)) {
+ struct gendisk *disk = bdev->bd_disk;
+ unsigned long *bitmap;
+
+ rcu_read_lock();
+ bitmap = rcu_dereference(disk->conv_zones_bitmap);
+ is_seq = !bitmap ||
+ !test_bit(disk_zone_no(disk, sector), bitmap);
+ rcu_read_unlock();
+ }
+#endif
+
+ return is_seq;
+}
+
static inline int queue_dma_alignment(const struct request_queue *q)
{
return q->limits.dma_alignment;
@@ -1648,7 +1689,7 @@ int bdev_thaw(struct block_device *bdev);
void bdev_fput(struct file *bdev_file);
struct io_comp_batch {
- struct request *req_list;
+ struct rq_list req_list;
bool need_ts;
void (*complete)(struct io_comp_batch *);
};
@@ -1674,6 +1715,22 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev)
return true;
}
+static inline unsigned int
+bdev_atomic_write_unit_min_bytes(struct block_device *bdev)
+{
+ if (!bdev_can_atomic_write(bdev))
+ return 0;
+ return queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev));
+}
+
+static inline unsigned int
+bdev_atomic_write_unit_max_bytes(struct block_device *bdev)
+{
+ if (!bdev_can_atomic_write(bdev))
+ return 0;
+ return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev));
+}
+
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
#endif /* _LINUX_BLKDEV_H */
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index aaf004d94322..e45162ef59bb 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -33,6 +33,9 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
+/* Check the allocation size for kmalloc equivalent allocator */
+int bpf_mem_alloc_check_size(bool percpu, size_t size);
+
/* kmalloc/kfree equivalent: */
void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 038b2d523bf8..875c998275c0 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -234,7 +234,7 @@ const volatile void * __must_check_fn(const volatile void *val)
* DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd)
*
* CLASS(fdget, f)(fd);
- * if (!fd_file(f))
+ * if (fd_empty(f))
* return -EBADF;
*
* // use 'f' without concern
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index f805adaa316e..cd6f9aae311f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -80,7 +80,11 @@
#define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif
+#ifdef __SANITIZE_HWADDRESS__
+#define __no_sanitize_address __attribute__((__no_sanitize__("hwaddress")))
+#else
#define __no_sanitize_address __attribute__((__no_sanitize_address__))
+#endif
#if defined(__SANITIZE_THREAD__)
#define __no_sanitize_thread __attribute__((__no_sanitize_thread__))
diff --git a/include/linux/device.h b/include/linux/device.h
index b4bde8d22697..667cb6db9019 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1078,6 +1078,9 @@ int device_for_each_child(struct device *dev, void *data,
int (*fn)(struct device *dev, void *data));
int device_for_each_child_reverse(struct device *dev, void *data,
int (*fn)(struct device *dev, void *data));
+int device_for_each_child_reverse_from(struct device *parent,
+ struct device *from, const void *data,
+ int (*fn)(struct device *, const void *));
struct device *device_find_child(struct device *dev, void *data,
int (*match)(struct device *dev, void *data));
struct device *device_find_child_by_name(struct device *parent,
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 3337745d81bd..0c0d00fcd131 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -42,7 +42,7 @@ static inline void eventpoll_release(struct file *file)
* because the file in on the way to be removed and nobody ( but
* eventpoll ) has still a reference to this file.
*/
- if (likely(!file->f_ep))
+ if (likely(!READ_ONCE(file->f_ep)))
return;
/*
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 893a1d21dc1c..1ab165c2939f 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -250,19 +250,6 @@ struct export_operations {
unsigned long flags;
};
-/**
- * exportfs_lock_op_is_async() - export op supports async lock operation
- * @export_ops: the nfs export operations to check
- *
- * Returns true if the nfs export_operations structure has
- * EXPORT_OP_ASYNC_LOCK in their flags set
- */
-static inline bool
-exportfs_lock_op_is_async(const struct export_operations *export_ops)
-{
- return export_ops->flags & EXPORT_OP_ASYNC_LOCK;
-}
-
extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
int *max_len, struct inode *parent,
int flags);
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index b1c5722f2b3c..c45306a9f007 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -92,10 +92,6 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un
return files_lookup_fd_raw(files, fd);
}
-struct file *lookup_fdget_rcu(unsigned int fd);
-struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd);
-struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd);
-
static inline bool close_on_exec(unsigned int fd, const struct files_struct *files)
{
return test_bit(fd, files_fdtable(files)->close_on_exec);
@@ -115,7 +111,6 @@ int iterate_fd(struct files_struct *, unsigned,
const void *);
extern int close_fd(unsigned int fd);
-extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern struct file *file_close_fd(unsigned int fd);
extern struct kmem_cache *files_cachep;
diff --git a/include/linux/file.h b/include/linux/file.h
index f98de143245a..302f11355b10 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -30,12 +30,6 @@ extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount
extern struct file *alloc_file_clone(struct file *, int flags,
const struct file_operations *);
-static inline void fput_light(struct file *file, int fput_needed)
-{
- if (fput_needed)
- fput(file);
-}
-
/* either a reference to struct file + flags
* (cloned vs. borrowed, pos locked), with
* flags stored in lower bits of value,
@@ -72,6 +66,7 @@ static inline void fdput(struct fd fd)
extern struct file *fget(unsigned int fd);
extern struct file *fget_raw(unsigned int fd);
extern struct file *fget_task(struct task_struct *task, unsigned int fd);
+extern struct file *fget_task_next(struct task_struct *task, unsigned int *fd);
extern void __f_unlock_pos(struct file *);
struct fd fdget(unsigned int fd);
@@ -87,6 +82,7 @@ static inline void fdput_pos(struct fd f)
DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd)
DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd)
+DEFINE_CLASS(fd_pos, struct fd, fdput_pos(_T), fdget_pos(fd), int fd)
extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h
new file mode 100644
index 000000000000..9b3a8d9b17ab
--- /dev/null
+++ b/include/linux/file_ref.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _LINUX_FILE_REF_H
+#define _LINUX_FILE_REF_H
+
+#include <linux/atomic.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+/*
+ * file_ref is a reference count implementation specifically for use by
+ * files. It takes inspiration from rcuref but differs in key aspects
+ * such as support for SLAB_TYPESAFE_BY_RCU type caches.
+ *
+ * FILE_REF_ONEREF FILE_REF_MAXREF
+ * 0x0000000000000000UL 0x7FFFFFFFFFFFFFFFUL
+ * <-------------------valid ------------------->
+ *
+ * FILE_REF_SATURATED
+ * 0x8000000000000000UL 0xA000000000000000UL 0xBFFFFFFFFFFFFFFFUL
+ * <-----------------------saturation zone---------------------->
+ *
+ * FILE_REF_RELEASED FILE_REF_DEAD
+ * 0xC000000000000000UL 0xE000000000000000UL
+ * <-------------------dead zone------------------->
+ *
+ * FILE_REF_NOREF
+ * 0xFFFFFFFFFFFFFFFFUL
+ */
+
+#ifdef CONFIG_64BIT
+#define FILE_REF_ONEREF 0x0000000000000000UL
+#define FILE_REF_MAXREF 0x7FFFFFFFFFFFFFFFUL
+#define FILE_REF_SATURATED 0xA000000000000000UL
+#define FILE_REF_RELEASED 0xC000000000000000UL
+#define FILE_REF_DEAD 0xE000000000000000UL
+#define FILE_REF_NOREF 0xFFFFFFFFFFFFFFFFUL
+#else
+#define FILE_REF_ONEREF 0x00000000U
+#define FILE_REF_MAXREF 0x7FFFFFFFU
+#define FILE_REF_SATURATED 0xA0000000U
+#define FILE_REF_RELEASED 0xC0000000U
+#define FILE_REF_DEAD 0xE0000000U
+#define FILE_REF_NOREF 0xFFFFFFFFU
+#endif
+
+typedef struct {
+#ifdef CONFIG_64BIT
+ atomic64_t refcnt;
+#else
+ atomic_t refcnt;
+#endif
+} file_ref_t;
+
+/**
+ * file_ref_init - Initialize a file reference count
+ * @ref: Pointer to the reference count
+ * @cnt: The initial reference count typically '1'
+ */
+static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
+{
+ atomic_long_set(&ref->refcnt, cnt - 1);
+}
+
+bool __file_ref_put(file_ref_t *ref, unsigned long cnt);
+
+/**
+ * file_ref_get - Acquire one reference on a file
+ * @ref: Pointer to the reference count
+ *
+ * Similar to atomic_inc_not_zero() but saturates at FILE_REF_MAXREF.
+ *
+ * Provides full memory ordering.
+ *
+ * Return: False if the attempt to acquire a reference failed. This happens
+ * when the last reference has been put already. True if a reference
+ * was successfully acquired
+ */
+static __always_inline __must_check bool file_ref_get(file_ref_t *ref)
+{
+ /*
+ * Unconditionally increase the reference count with full
+ * ordering. The saturation and dead zones provide enough
+ * tolerance for this.
+ *
+ * If this indicates negative the file in question the fail can
+ * be freed and immediately reused due to SLAB_TYPSAFE_BY_RCU.
+ * Hence, unconditionally altering the file reference count to
+ * e.g., reset the file reference count back to the middle of
+ * the deadzone risk end up marking someone else's file as dead
+ * behind their back.
+ *
+ * It would be possible to do a careful:
+ *
+ * cnt = atomic_long_inc_return();
+ * if (likely(cnt >= 0))
+ * return true;
+ *
+ * and then something like:
+ *
+ * if (cnt >= FILE_REF_RELEASE)
+ * atomic_long_try_cmpxchg(&ref->refcnt, &cnt, FILE_REF_DEAD),
+ *
+ * to set the value back to the middle of the deadzone. But it's
+ * practically impossible to go from FILE_REF_DEAD to
+ * FILE_REF_ONEREF. It would need 2305843009213693952/2^61
+ * file_ref_get()s to resurrect such a dead file.
+ */
+ return !atomic_long_add_negative(1, &ref->refcnt);
+}
+
+/**
+ * file_ref_inc - Acquire one reference on a file
+ * @ref: Pointer to the reference count
+ *
+ * Acquire an additional reference on a file. Warns if the caller didn't
+ * already hold a reference.
+ */
+static __always_inline void file_ref_inc(file_ref_t *ref)
+{
+ long prior = atomic_long_fetch_inc_relaxed(&ref->refcnt);
+ WARN_ONCE(prior < 0, "file_ref_inc() on a released file reference");
+}
+
+/**
+ * file_ref_put -- Release a file reference
+ * @ref: Pointer to the reference count
+ *
+ * Provides release memory ordering, such that prior loads and stores
+ * are done before, and provides an acquire ordering on success such
+ * that free() must come after.
+ *
+ * Return: True if this was the last reference with no future references
+ * possible. This signals the caller that it can safely release
+ * the object which is protected by the reference counter.
+ * False if there are still active references or the put() raced
+ * with a concurrent get()/put() pair. Caller is not allowed to
+ * release the protected object.
+ */
+static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
+{
+ long cnt;
+
+ /*
+ * While files are SLAB_TYPESAFE_BY_RCU and thus file_ref_put()
+ * calls don't risk UAFs when a file is recyclyed, it is still
+ * vulnerable to UAFs caused by freeing the whole slab page once
+ * it becomes unused. Prevent file_ref_put() from being
+ * preempted protects against this.
+ */
+ guard(preempt)();
+ /*
+ * Unconditionally decrease the reference count. The saturation
+ * and dead zones provide enough tolerance for this. If this
+ * fails then we need to handle the last reference drop and
+ * cases inside the saturation and dead zones.
+ */
+ cnt = atomic_long_dec_return(&ref->refcnt);
+ if (cnt >= 0)
+ return false;
+ return __file_ref_put(ref, cnt);
+}
+
+/**
+ * file_ref_read - Read the number of file references
+ * @ref: Pointer to the reference count
+ *
+ * Return: The number of held references (0 ... N)
+ */
+static inline unsigned long file_ref_read(file_ref_t *ref)
+{
+ unsigned long c = atomic_long_read(&ref->refcnt);
+
+ /* Return 0 if within the DEAD zone. */
+ return c >= FILE_REF_RELEASED ? 0 : c + 1;
+}
+
+#endif
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index bb44224c6676..c412ded9171e 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -180,6 +180,11 @@ static inline void locks_wake_up(struct file_lock *fl)
wake_up(&fl->c.flc_wait);
}
+static inline bool locks_can_async_lock(const struct file_operations *fops)
+{
+ return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK;
+}
+
/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3559446279c1..7e29433c5ecc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -45,6 +45,8 @@
#include <linux/slab.h>
#include <linux/maple_tree.h>
#include <linux/rw_hint.h>
+#include <linux/file_ref.h>
+#include <linux/unicode.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -623,6 +625,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_NOFOLLOW 0x0004
#define IOP_XATTR 0x0008
#define IOP_DEFAULT_READLINK 0x0010
+#define IOP_MGTIME 0x0020
/*
* Keep mostly read-only and often accessed (especially for
@@ -1005,7 +1008,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
/**
* struct file - Represents a file
- * @f_count: reference count
+ * @f_ref: reference count
* @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
* @f_mode: FMODE_* flags often used in hotpaths
* @f_op: file operations
@@ -1030,7 +1033,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
*/
struct file {
- atomic_long_t f_count;
+ file_ref_t f_ref;
spinlock_t f_lock;
fmode_t f_mode;
const struct file_operations *f_op;
@@ -1078,15 +1081,14 @@ struct file_handle {
static inline struct file *get_file(struct file *f)
{
- long prior = atomic_long_fetch_inc_relaxed(&f->f_count);
- WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n");
+ file_ref_inc(&f->f_ref);
return f;
}
struct file *get_file_rcu(struct file __rcu **f);
struct file *get_file_active(struct file **f);
-#define file_count(x) atomic_long_read(&(x)->f_count)
+#define file_count(f) file_ref_read(&(f)->f_ref)
#define MAX_NON_LFS ((1UL<<31) - 1)
@@ -1584,6 +1586,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
struct timespec64 current_time(struct inode *inode);
struct timespec64 inode_set_ctime_current(struct inode *inode);
+struct timespec64 inode_set_ctime_deleg(struct inode *inode,
+ struct timespec64 update);
static inline time64_t inode_get_atime_sec(const struct inode *inode)
{
@@ -1653,6 +1657,17 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,
return inode_set_mtime_to_ts(inode, ts);
}
+/*
+ * Multigrain timestamps
+ *
+ * Conditionally use fine-grained ctime and mtime timestamps when there
+ * are users actively observing them via getattr. The primary use-case
+ * for this is NFS clients that use the ctime to distinguish between
+ * different states of the file, and that are often fooled by multiple
+ * operations that occur in the same coarse-grained timer tick.
+ */
+#define I_CTIME_QUERIED ((u32)BIT(31))
+
static inline time64_t inode_get_ctime_sec(const struct inode *inode)
{
return inode->i_ctime_sec;
@@ -1660,7 +1675,7 @@ static inline time64_t inode_get_ctime_sec(const struct inode *inode)
static inline long inode_get_ctime_nsec(const struct inode *inode)
{
- return inode->i_ctime_nsec;
+ return inode->i_ctime_nsec & ~I_CTIME_QUERIED;
}
static inline struct timespec64 inode_get_ctime(const struct inode *inode)
@@ -1671,13 +1686,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode)
return ts;
}
-static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
- struct timespec64 ts)
-{
- inode->i_ctime_sec = ts.tv_sec;
- inode->i_ctime_nsec = ts.tv_nsec;
- return ts;
-}
+struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts);
/**
* inode_set_ctime - set the ctime in the inode
@@ -2116,6 +2125,8 @@ struct file_operations {
#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4))
/* Treat loff_t as unsigned (e.g., /dev/mem) */
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
+/* Supports asynchronous lock callbacks */
+#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
@@ -2542,6 +2553,7 @@ struct file_system_type {
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
+#define FS_MGTIME 64 /* FS uses multigrain timestamps */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2565,6 +2577,17 @@ struct file_system_type {
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
+/**
+ * is_mgtime: is this inode using multigrain timestamps
+ * @inode: inode to test for multigrain timestamps
+ *
+ * Return true if the inode uses multigrain timestamps, false otherwise.
+ */
+static inline bool is_mgtime(const struct inode *inode)
+{
+ return inode->i_opflags & IOP_MGTIME;
+}
+
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2766,6 +2789,16 @@ extern struct filename *getname_flags(const char __user *, int);
extern struct filename *getname_uflags(const char __user *, int);
extern struct filename *getname(const char __user *);
extern struct filename *getname_kernel(const char *);
+extern struct filename *__getname_maybe_null(const char __user *);
+static inline struct filename *getname_maybe_null(const char __user *name, int flags)
+{
+ if (!(flags & AT_EMPTY_PATH))
+ return getname(name);
+
+ if (!name)
+ return NULL;
+ return __getname_maybe_null(name);
+}
extern void putname(struct filename *name);
extern int finish_open(struct file *file, struct dentry *dentry,
@@ -3326,6 +3359,7 @@ extern void page_put_link(void *);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode);
void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
void generic_fill_statx_atomic_writes(struct kstat *stat,
@@ -3456,6 +3490,54 @@ extern int generic_ci_match(const struct inode *parent,
const struct qstr *folded_name,
const u8 *de_name, u32 de_name_len);
+#if IS_ENABLED(CONFIG_UNICODE)
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name);
+
+/**
+ * generic_ci_validate_strict_name - Check if a given name is suitable
+ * for a directory
+ *
+ * This functions checks if the proposed filename is valid for the
+ * parent directory. That means that only valid UTF-8 filenames will be
+ * accepted for casefold directories from filesystems created with the
+ * strict encoding flag. That also means that any name will be
+ * accepted for directories that doesn't have casefold enabled, or
+ * aren't being strict with the encoding.
+ *
+ * @dir: inode of the directory where the new file will be created
+ * @name: name of the new file
+ *
+ * Return:
+ * * True: if the filename is suitable for this directory. It can be
+ * true if a given name is not suitable for a strict encoding
+ * directory, but the directory being used isn't strict
+ * * False if the filename isn't suitable for this directory. This only
+ * happens when a directory is casefolded and the filesystem is strict
+ * about its encoding.
+ */
+static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+{
+ if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb))
+ return true;
+
+ /*
+ * A casefold dir must have a encoding set, unless the filesystem
+ * is corrupted
+ */
+ if (WARN_ON_ONCE(!dir->i_sb->s_encoding))
+ return true;
+
+ return !utf8_validate(dir->i_sb->s_encoding, name);
+}
+#else
+static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+{
+ return true;
+}
+#endif
+
static inline bool sb_has_encoding(const struct super_block *sb)
{
#if IS_ENABLED(CONFIG_UNICODE)
@@ -3726,6 +3808,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
return !c;
}
-bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos);
+int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
#endif /* _LINUX_FS_H */
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index c13e99cbbf81..4b4bfef6f053 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -160,6 +160,12 @@ extern int get_tree_keyed(struct fs_context *fc,
int setup_bdev_super(struct super_block *sb, int sb_flags,
struct fs_context *fc);
+
+#define GET_TREE_BDEV_QUIET_LOOKUP 0x0001
+int get_tree_bdev_flags(struct fs_context *fc,
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc), unsigned int flags);
+
extern int get_tree_bdev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 6cf713a7e6c6..3cef566088fc 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -28,7 +28,8 @@ typedef int fs_param_type(struct p_log *,
*/
fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64,
fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev,
- fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid;
+ fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid,
+ fs_param_is_file_or_string;
/*
* Specification of the type of value a parameter wants.
@@ -133,6 +134,8 @@ static inline bool fs_validate_description(const char *name,
#define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL)
#define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
#define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
+#define fsparam_file_or_string(NAME, OPT) \
+ __fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL)
#define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL)
#define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL)
diff --git a/include/linux/input.h b/include/linux/input.h
index 89a0be6ee0e2..cd866b020a01 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -339,12 +339,16 @@ struct input_handler {
* @name: name given to the handle by handler that created it
* @dev: input device the handle is attached to
* @handler: handler that works with the device through this handle
+ * @handle_events: event sequence handler. It is set up by the input core
+ * according to event handling method specified in the @handler. See
+ * input_handle_setup_event_handler().
+ * This method is being called by the input core with interrupts disabled
+ * and dev->event_lock spinlock held and so it may not sleep.
* @d_node: used to put the handle on device's list of attached handles
* @h_node: used to put the handle on handler's list of handles from which
* it gets events
*/
struct input_handle {
-
void *private;
int open;
@@ -353,6 +357,10 @@ struct input_handle {
struct input_dev *dev;
struct input_handler *handler;
+ unsigned int (*handle_events)(struct input_handle *handle,
+ struct input_value *vals,
+ unsigned int count);
+
struct list_head d_node;
struct list_head h_node;
};
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index c189d36ad55e..578a3fdf5c71 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -110,7 +110,7 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
{
- return cmd_to_io_kiocb(cmd)->task;
+ return cmd_to_io_kiocb(cmd)->tctx->task;
}
#endif /* _LINUX_IO_URING_CMD_H */
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 4b9ba523978d..593c10a02144 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -37,6 +37,7 @@ enum io_uring_cmd_flags {
/* set when uring wants to cancel a previously issued command */
IO_URING_F_CANCEL = (1 << 11),
IO_URING_F_COMPAT = (1 << 12),
+ IO_URING_F_TASK_DEAD = (1 << 13),
};
struct io_wq_work_node {
@@ -55,19 +56,18 @@ struct io_wq_work {
int cancel_seq;
};
-struct io_fixed_file {
- /* file * with additional FFS_* flags */
- unsigned long file_ptr;
+struct io_rsrc_data {
+ unsigned int nr;
+ struct io_rsrc_node **nodes;
};
struct io_file_table {
- struct io_fixed_file *files;
+ struct io_rsrc_data data;
unsigned long *bitmap;
unsigned int alloc_hint;
};
struct io_hash_bucket {
- spinlock_t lock;
struct hlist_head list;
} ____cacheline_aligned_in_smp;
@@ -76,6 +76,12 @@ struct io_hash_table {
unsigned hash_bits;
};
+struct io_mapped_region {
+ struct page **pages;
+ void *vmap_ptr;
+ size_t nr_pages;
+};
+
/*
* Arbitrary limit, can be raised if need be
*/
@@ -85,6 +91,7 @@ struct io_uring_task {
/* submission side */
int cached_refs;
const struct io_ring_ctx *last;
+ struct task_struct *task;
struct io_wq *io_wq;
struct file *registered_rings[IO_RINGFD_REG_MAX];
@@ -270,7 +277,6 @@ struct io_ring_ctx {
* Fixed resources fast path, should be accessed only under
* uring_lock, and updated through io_uring_register(2)
*/
- struct io_rsrc_node *rsrc_node;
atomic_t cancel_seq;
/*
@@ -283,15 +289,13 @@ struct io_ring_ctx {
struct io_wq_work_list iopoll_list;
struct io_file_table file_table;
- struct io_mapped_ubuf **user_bufs;
- unsigned nr_user_files;
- unsigned nr_user_bufs;
+ struct io_rsrc_data buf_table;
struct io_submit_state submit_state;
struct xarray io_bl_xa;
- struct io_hash_table cancel_table_locked;
+ struct io_hash_table cancel_table;
struct io_alloc_cache apoll_cache;
struct io_alloc_cache netmsg_cache;
struct io_alloc_cache rw_cache;
@@ -302,6 +306,11 @@ struct io_ring_ctx {
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
*/
struct hlist_head cancelable_uring_cmd;
+ /*
+ * For Hybrid IOPOLL, runtime in hybrid polling, without
+ * scheduling time
+ */
+ u64 hybrid_poll_time;
} ____cacheline_aligned_in_smp;
struct {
@@ -316,6 +325,9 @@ struct io_ring_ctx {
unsigned cq_entries;
struct io_ev_fd __rcu *io_ev_fd;
unsigned cq_extra;
+
+ void *cq_wait_arg;
+ size_t cq_wait_size;
} ____cacheline_aligned_in_smp;
/*
@@ -342,7 +354,6 @@ struct io_ring_ctx {
struct list_head io_buffers_comp;
struct list_head cq_overflow_list;
- struct io_hash_table cancel_table;
struct hlist_head waitid_list;
@@ -366,16 +377,6 @@ struct io_ring_ctx {
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
- /* slow path rsrc auxilary data, used by update/register */
- struct io_rsrc_data *file_data;
- struct io_rsrc_data *buf_data;
-
- /* protected by ->uring_lock */
- struct list_head rsrc_ref_list;
- struct io_alloc_cache rsrc_node_cache;
- struct wait_queue_head rsrc_quiesce_wq;
- unsigned rsrc_quiesce;
-
u32 pers_next;
struct xarray personalities;
@@ -409,7 +410,7 @@ struct io_ring_ctx {
/* napi busy poll default timeout */
ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll;
- bool napi_enabled;
+ u8 napi_track_mode;
DECLARE_HASHTABLE(napi_ht, 4);
#endif
@@ -418,6 +419,13 @@ struct io_ring_ctx {
unsigned evfd_last_cq_tail;
/*
+ * Protection for resize vs mmap races - both the mmap and resize
+ * side will need to grab this lock, to prevent either side from
+ * being run concurrently with the other.
+ */
+ struct mutex resize_lock;
+
+ /*
* If IORING_SETUP_NO_MMAP is used, then the below holds
* the gup'ed pages for the two rings, and the sqes.
*/
@@ -425,6 +433,9 @@ struct io_ring_ctx {
unsigned short n_sqe_pages;
struct page **ring_pages;
struct page **sqe_pages;
+
+ /* used for optimised request parameter and wait argument passing */
+ struct io_mapped_region param_region;
};
struct io_tw_state {
@@ -447,6 +458,7 @@ enum {
REQ_F_LINK_TIMEOUT_BIT,
REQ_F_NEED_CLEANUP_BIT,
REQ_F_POLLED_BIT,
+ REQ_F_HYBRID_IOPOLL_STATE_BIT,
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_BUFFER_RING_BIT,
REQ_F_REISSUE_BIT,
@@ -459,7 +471,6 @@ enum {
REQ_F_DOUBLE_POLL_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
REQ_F_CLEAR_POLLIN_BIT,
- REQ_F_HASH_LOCKED_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
@@ -468,6 +479,7 @@ enum {
REQ_F_BL_EMPTY_BIT,
REQ_F_BL_NO_RECYCLE_BIT,
REQ_F_BUFFERS_COMMIT_BIT,
+ REQ_F_BUF_NODE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -506,6 +518,8 @@ enum {
REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
/* already went through poll handler */
REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT),
+ /* every req only blocks once in hybrid poll */
+ REQ_F_IOPOLL_STATE = IO_REQ_FLAG(REQ_F_HYBRID_IOPOLL_STATE_BIT),
/* buffer already selected */
REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
/* buffer selected from ring, needs commit */
@@ -534,8 +548,6 @@ enum {
REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
/* recvmsg special flag, clear EPOLLIN */
REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
- /* hashed into ->cancel_hash_locked, protected by ->uring_lock */
- REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
/* don't use lazy poll wake for this request */
REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
/* file is pollable */
@@ -546,6 +558,8 @@ enum {
REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
/* buffer ring head needs incrementing on put */
REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT),
+ /* buf node is valid */
+ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
@@ -615,12 +629,9 @@ struct io_kiocb {
struct io_cqe cqe;
struct io_ring_ctx *ctx;
- struct task_struct *task;
+ struct io_uring_task *tctx;
union {
- /* store used ubuf, so we can prevent reloading */
- struct io_mapped_ubuf *imu;
-
/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
struct io_buffer *kbuf;
@@ -629,6 +640,8 @@ struct io_kiocb {
* REQ_F_BUFFER_RING is set.
*/
struct io_buffer_list *buf_list;
+
+ struct io_rsrc_node *buf_node;
};
union {
@@ -638,13 +651,20 @@ struct io_kiocb {
__poll_t apoll_events;
};
- struct io_rsrc_node *rsrc_node;
+ struct io_rsrc_node *file_node;
atomic_t refs;
bool cancel_seq_set;
struct io_task_work io_task_work;
- /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
- struct hlist_node hash_node;
+ union {
+ /*
+ * for polled requests, i.e. IORING_OP_POLL_ADD and async armed
+ * poll
+ */
+ struct hlist_node hash_node;
+ /* For IOPOLL setup queues, with hybrid polling */
+ u64 iopoll_start;
+ };
/* internal polling, see IORING_FEAT_FAST_POLL */
struct async_poll *apoll;
/* opcode allocated if it needs to store data for async defer */
@@ -667,4 +687,9 @@ struct io_overflow_cqe {
struct io_uring_cqe cqe;
};
+static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx)
+{
+ return ctx->flags & IORING_SETUP_CQE32;
+}
+
#endif
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index d0420e962ffd..27048ec10e1c 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -178,6 +178,7 @@ struct iomap_folio_ops {
#else
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
+#define IOMAP_ATOMIC (1 << 9)
struct iomap_ops {
/*
@@ -270,6 +271,25 @@ static inline loff_t iomap_last_written_block(struct inode *inode, loff_t pos,
return round_up(pos + written, i_blocksize(inode));
}
+/*
+ * Check if the range needs to be unshared for a FALLOC_FL_UNSHARE_RANGE
+ * operation.
+ *
+ * Don't bother with blocks that are not shared to start with; or mappings that
+ * cannot be shared, such as inline data, delalloc reservations, holes or
+ * unwritten extents.
+ *
+ * Note that we use srcmap directly instead of iomap_iter_srcmap as unsharing
+ * requires providing a separate source map, and the presence of one is a good
+ * indicator that unsharing is needed, unlike IOMAP_F_SHARED which can be set
+ * for any data that goes into the COW fork for XFS.
+ */
+static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter)
+{
+ return (iter->iomap.flags & IOMAP_F_SHARED) &&
+ iter->srcmap.type == IOMAP_MAPPED;
+}
+
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops, void *private);
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad57..50f7ea8714bf 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1796,22 +1796,21 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
const void *address, unsigned int length)
{
- struct {
- struct shash_desc shash;
- char ctx[JBD_MAX_CHECKSUM_SIZE];
- } desc;
+ DEFINE_RAW_FLEX(struct shash_desc, desc, __ctx,
+ DIV_ROUND_UP(JBD_MAX_CHECKSUM_SIZE,
+ sizeof(*((struct shash_desc *)0)->__ctx)));
int err;
BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
JBD_MAX_CHECKSUM_SIZE);
- desc.shash.tfm = journal->j_chksum_driver;
- *(u32 *)desc.ctx = crc;
+ desc->tfm = journal->j_chksum_driver;
+ *(u32 *)desc->__ctx = crc;
- err = crypto_shash_update(&desc.shash, address, length);
+ err = crypto_shash_update(desc, address, length);
BUG_ON(err);
- return *(u32 *)desc.ctx;
+ return *(u32 *)desc->__ctx;
}
/* Return most recent uncommitted transaction */
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 11690dacd986..ec9c05044d4f 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -54,12 +54,11 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm)
return atomic_long_read(&mm->ksm_zero_pages);
}
-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
+ /* Adding mm to ksm is best effort on fork. */
if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
- return __ksm_enter(mm);
-
- return 0;
+ __ksm_enter(mm);
}
static inline int ksm_execve(struct mm_struct *mm)
@@ -107,9 +106,8 @@ static inline int ksm_disable(struct mm_struct *mm)
return 0;
}
-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
- return 0;
}
static inline int ksm_execve(struct mm_struct *mm)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 9b4a6ff03235..c1a85d46eba6 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -270,9 +270,7 @@ enum {
/* bits 24:31 of host->flags are reserved for LLD specific flags */
- /* various lengths of time */
- ATA_TMOUT_BOOT = 30000, /* heuristic */
- ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */
+ /* Various lengths of time */
ATA_TMOUT_INTERNAL_QUICK = 5000,
ATA_TMOUT_MAX_PARK = 30000,
diff --git a/include/linux/lsm/apparmor.h b/include/linux/lsm/apparmor.h
new file mode 100644
index 000000000000..612cbfacb072
--- /dev/null
+++ b/include/linux/lsm/apparmor.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * AppArmor presents single pointer to an aa_label structure.
+ */
+#ifndef __LINUX_LSM_APPARMOR_H
+#define __LINUX_LSM_APPARMOR_H
+
+struct aa_label;
+
+struct lsm_prop_apparmor {
+#ifdef CONFIG_SECURITY_APPARMOR
+ struct aa_label *label;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_APPARMOR_H */
diff --git a/include/linux/lsm/bpf.h b/include/linux/lsm/bpf.h
new file mode 100644
index 000000000000..8106e206fcef
--- /dev/null
+++ b/include/linux/lsm/bpf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * BPF may present a single u32 value.
+ */
+#ifndef __LINUX_LSM_BPF_H
+#define __LINUX_LSM_BPF_H
+#include <linux/types.h>
+
+struct lsm_prop_bpf {
+#ifdef CONFIG_BPF_LSM
+ u32 secid;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_BPF_H */
diff --git a/include/linux/lsm/selinux.h b/include/linux/lsm/selinux.h
new file mode 100644
index 000000000000..9455a6b5b910
--- /dev/null
+++ b/include/linux/lsm/selinux.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * SELinux presents a single u32 value which is known as a secid.
+ */
+#ifndef __LINUX_LSM_SELINUX_H
+#define __LINUX_LSM_SELINUX_H
+#include <linux/types.h>
+
+struct lsm_prop_selinux {
+#ifdef CONFIG_SECURITY_SELINUX
+ u32 secid;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_SELINUX_H */
diff --git a/include/linux/lsm/smack.h b/include/linux/lsm/smack.h
new file mode 100644
index 000000000000..ff730dd7a734
--- /dev/null
+++ b/include/linux/lsm/smack.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * Smack presents a pointer into the global Smack label list.
+ */
+#ifndef __LINUX_LSM_SMACK_H
+#define __LINUX_LSM_SMACK_H
+
+struct smack_known;
+
+struct lsm_prop_smack {
+#ifdef CONFIG_SECURITY_SMACK
+ struct smack_known *skp;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_SMACK_H */
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 9eca013aa5e1..eb2937599cb0 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -176,7 +176,8 @@ LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode,
const char *name, const void *value, size_t size, int flags)
LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer,
size_t buffer_size)
-LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, inode_getlsmprop, struct inode *inode,
+ struct lsm_prop *prop)
LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new)
LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src,
const char *name)
@@ -217,6 +218,8 @@ LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old,
LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new,
const struct cred *old)
LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, cred_getlsmprop, const struct cred *c,
+ struct lsm_prop *prop)
LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid)
LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode)
LSM_HOOK(int, 0, kernel_module_request, char *kmod_name)
@@ -235,9 +238,9 @@ LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old)
LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid)
LSM_HOOK(int, 0, task_getpgid, struct task_struct *p)
LSM_HOOK(int, 0, task_getsid, struct task_struct *p)
-LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid)
-LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj,
- struct task_struct *p, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, current_getlsmprop_subj, struct lsm_prop *prop)
+LSM_HOOK(void, LSM_RET_VOID, task_getlsmprop_obj,
+ struct task_struct *p, struct lsm_prop *prop)
LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice)
LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio)
LSM_HOOK(int, 0, task_getioprio, struct task_struct *p)
@@ -256,8 +259,8 @@ LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p,
struct inode *inode)
LSM_HOOK(int, 0, userns_create, const struct cred *cred)
LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag)
-LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp,
- u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, ipc_getlsmprop, struct kern_ipc_perm *ipcp,
+ struct lsm_prop *prop)
LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg)
LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg)
LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm)
@@ -294,6 +297,8 @@ LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size)
LSM_HOOK(int, 0, ismaclabel, const char *name)
LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata,
u32 *seclen)
+LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop,
+ char **secdata, u32 *seclen)
LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid)
LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen)
LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode)
@@ -416,7 +421,8 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring,
LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr,
void **lsmrule, gfp_t gfp)
LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule)
-LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule)
+LSM_HOOK(int, 0, audit_rule_match, struct lsm_prop *prop, u32 field, u32 op,
+ void *lsmrule)
LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
#endif /* CONFIG_AUDIT */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 34d2da05f2f1..e1b41554a5fb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1760,8 +1760,9 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg)
struct mem_cgroup *mem_cgroup_from_slab_obj(void *p);
-static inline void count_objcg_event(struct obj_cgroup *objcg,
- enum vm_event_item idx)
+static inline void count_objcg_events(struct obj_cgroup *objcg,
+ enum vm_event_item idx,
+ unsigned long count)
{
struct mem_cgroup *memcg;
@@ -1770,7 +1771,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
- count_memcg_events(memcg, idx, 1);
+ count_memcg_events(memcg, idx, count);
rcu_read_unlock();
}
@@ -1825,8 +1826,9 @@ static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
return NULL;
}
-static inline void count_objcg_event(struct obj_cgroup *objcg,
- enum vm_event_item idx)
+static inline void count_objcg_events(struct obj_cgroup *objcg,
+ enum vm_event_item idx,
+ unsigned long count)
{
}
diff --git a/include/linux/mman.h b/include/linux/mman.h
index bcb201ab7a41..a842783ffa62 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_MMAN_H
#define _LINUX_MMAN_H
+#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/percpu_counter.h>
@@ -94,7 +95,7 @@ static inline void vm_unacct_memory(long pages)
#endif
#ifndef arch_calc_vm_flag_bits
-#define arch_calc_vm_flag_bits(flags) 0
+#define arch_calc_vm_flag_bits(file, flags) 0
#endif
#ifndef arch_validate_prot
@@ -151,13 +152,13 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
* Combine the mmap "flags" argument into "vm_flags" used internally.
*/
static inline unsigned long
-calc_vm_flag_bits(unsigned long flags)
+calc_vm_flag_bits(struct file *file, unsigned long flags)
{
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
_calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) |
- arch_calc_vm_flag_bits(flags);
+ arch_calc_vm_flag_bits(file, flags);
}
unsigned long vm_commit_limit(void);
@@ -188,16 +189,31 @@ static inline bool arch_memory_deny_write_exec_supported(void)
*
* d) mmap(PROT_READ | PROT_EXEC)
* mmap(PROT_READ | PROT_EXEC | PROT_BTI)
+ *
+ * This is only applicable if the user has set the Memory-Deny-Write-Execute
+ * (MDWE) protection mask for the current process.
+ *
+ * @old specifies the VMA flags the VMA originally possessed, and @new the ones
+ * we propose to set.
+ *
+ * Return: false if proposed change is OK, true if not ok and should be denied.
*/
-static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags)
+static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
{
+ /* If MDWE is disabled, we have nothing to deny. */
if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
return false;
- if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE))
+ /* If the new VMA is not executable, we have nothing to deny. */
+ if (!(new & VM_EXEC))
+ return false;
+
+ /* Under MDWE we do not accept newly writably executable VMAs... */
+ if (new & VM_WRITE)
return true;
- if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC))
+ /* ...nor previously non-executable VMAs becoming executable. */
+ if (!(old & VM_EXEC))
return true;
return false;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 17506e4a2835..80bc5640bb60 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -458,9 +458,7 @@ struct lru_gen_folio {
enum {
MM_LEAF_TOTAL, /* total leaf entries */
- MM_LEAF_OLD, /* old leaf entries */
MM_LEAF_YOUNG, /* young leaf entries */
- MM_NONLEAF_TOTAL, /* total non-leaf entries */
MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */
MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */
NR_MM_STATS
@@ -557,7 +555,7 @@ struct lru_gen_memcg {
void lru_gen_init_pgdat(struct pglist_data *pgdat);
void lru_gen_init_lruvec(struct lruvec *lruvec);
-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
+bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
void lru_gen_init_memcg(struct mem_cgroup *memcg);
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
@@ -576,8 +574,9 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
{
}
-static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
+ return false;
}
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
@@ -824,6 +823,7 @@ struct zone {
unsigned long watermark_boost;
unsigned long nr_reserved_highatomic;
+ unsigned long nr_free_highatomic;
/*
* We don't know if the memory that we're going to allocate will be
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index b332c2048c75..a48a30842d84 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -239,7 +239,7 @@ int netlink_register_notifier(struct notifier_block *nb);
int netlink_unregister_notifier(struct notifier_block *nb);
/* finegrained unicast helpers: */
-struct sock *netlink_getsockbyfilp(struct file *filp);
+struct sock *netlink_getsockbyfd(int fd);
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk);
void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index b0dd9b1eef4f..3982fea79919 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -32,7 +32,8 @@ typedef struct {
struct auth_domain *dom; /* auth_domain for localio */
} nfs_uuid_t;
-void nfs_uuid_begin(nfs_uuid_t *);
+void nfs_uuid_init(nfs_uuid_t *);
+bool nfs_uuid_begin(nfs_uuid_t *);
void nfs_uuid_end(nfs_uuid_t *);
void nfs_uuid_is_local(const uuid_t *, struct list_head *,
struct net *, struct auth_domain *, struct module *);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b58d9405d65e..0a6e22038ce3 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -327,7 +327,8 @@ struct nvme_id_ctrl {
__le32 sanicap;
__le32 hmminds;
__le16 hmmaxd;
- __u8 rsvd338[4];
+ __le16 nvmsetidmax;
+ __le16 endgidmax;
__u8 anatt;
__u8 anacap;
__le32 anagrpmax;
@@ -522,6 +523,7 @@ enum {
NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
+ NVME_ID_CNS_NS_ACTIVE_LIST_CS = 0x07,
NVME_ID_CNS_NS_CS_INDEP = 0x08,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
@@ -530,6 +532,7 @@ enum {
NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15,
NVME_ID_CNS_NS_GRANULARITY = 0x16,
NVME_ID_CNS_UUID_LIST = 0x17,
+ NVME_ID_CNS_ENDGRP_LIST = 0x19,
};
enum {
@@ -560,6 +563,8 @@ enum {
NVME_NS_FLBAS_LBA_SHIFT = 1,
NVME_NS_FLBAS_META_EXT = 0x10,
NVME_NS_NMIC_SHARED = 1 << 0,
+ NVME_NS_ROTATIONAL = 1 << 4,
+ NVME_NS_VWC_NOT_PRESENT = 1 << 5,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
@@ -617,6 +622,40 @@ enum {
NVME_NIDT_CSI = 0x04,
};
+struct nvme_endurance_group_log {
+ __u8 egcw;
+ __u8 egfeat;
+ __u8 rsvd2;
+ __u8 avsp;
+ __u8 avspt;
+ __u8 pused;
+ __le16 did;
+ __u8 rsvd8[24];
+ __u8 ee[16];
+ __u8 dur[16];
+ __u8 duw[16];
+ __u8 muw[16];
+ __u8 hrc[16];
+ __u8 hwc[16];
+ __u8 mdie[16];
+ __u8 neile[16];
+ __u8 tegcap[16];
+ __u8 uegcap[16];
+ __u8 rsvd192[320];
+};
+
+struct nvme_rotational_media_log {
+ __le16 endgid;
+ __le16 numa;
+ __le16 nrs;
+ __u8 rsvd6[2];
+ __le32 spinc;
+ __le32 fspinc;
+ __le32 ldc;
+ __le32 fldc;
+ __u8 rsvd24[488];
+};
+
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
@@ -1244,6 +1283,7 @@ enum {
NVME_FEAT_WRITE_PROTECT = 0x84,
NVME_FEAT_VENDOR_START = 0xC0,
NVME_FEAT_VENDOR_END = 0xFF,
+ NVME_LOG_SUPPORTED = 0x00,
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
@@ -1254,6 +1294,8 @@ enum {
NVME_LOG_TELEMETRY_CTRL = 0x08,
NVME_LOG_ENDURANCE_GROUP = 0x09,
NVME_LOG_ANA = 0x0c,
+ NVME_LOG_FEATURES = 0x12,
+ NVME_LOG_RMI = 0x16,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@@ -1261,6 +1303,24 @@ enum {
NVME_FWACT_ACTV = (2 << 3),
};
+struct nvme_supported_log {
+ __le32 lids[256];
+};
+
+enum {
+ NVME_LIDS_LSUPP = 1 << 0,
+};
+
+struct nvme_supported_features_log {
+ __le32 fis[256];
+};
+
+enum {
+ NVME_FIS_FSUPP = 1 << 0,
+ NVME_FIS_NSCPE = 1 << 20,
+ NVME_FIS_CSCPE = 1 << 21,
+};
+
/* NVMe Namespace Write Protect State */
enum {
NVME_NS_NO_WRITE_PROTECT = 0,
@@ -1281,7 +1341,8 @@ struct nvme_identify {
__u8 cns;
__u8 rsvd3;
__le16 ctrlid;
- __u8 rsvd11[3];
+ __le16 cnssid;
+ __u8 rsvd11;
__u8 csi;
__u32 rsvd12[4];
};
@@ -1389,7 +1450,7 @@ struct nvme_get_log_page_command {
__u8 lsp; /* upper 4 bits reserved */
__le16 numdl;
__le16 numdu;
- __u16 rsvd11;
+ __le16 lsi;
union {
struct {
__le32 lpol;
@@ -2037,4 +2098,72 @@ struct nvme_completion {
#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
#define NVME_TERTIARY(ver) ((ver) & 0xff)
+enum {
+ NVME_AEN_RESV_LOG_PAGE_AVALIABLE = 0x00,
+};
+
+enum {
+ NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00,
+ NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01,
+ NVME_PR_LOG_RESERVATION_RELEASED = 0x02,
+ NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03,
+};
+
+enum {
+ NVME_PR_NOTIFY_BIT_REG_PREEMPTED = 1,
+ NVME_PR_NOTIFY_BIT_RESV_RELEASED = 2,
+ NVME_PR_NOTIFY_BIT_RESV_PREEMPTED = 3,
+};
+
+struct nvme_pr_log {
+ __le64 count;
+ __u8 type;
+ __u8 nr_pages;
+ __u8 rsvd1[2];
+ __le32 nsid;
+ __u8 rsvd2[48];
+};
+
+struct nvmet_pr_register_data {
+ __le64 crkey;
+ __le64 nrkey;
+};
+
+struct nvmet_pr_acquire_data {
+ __le64 crkey;
+ __le64 prkey;
+};
+
+struct nvmet_pr_release_data {
+ __le64 crkey;
+};
+
+enum nvme_pr_capabilities {
+ NVME_PR_SUPPORT_PTPL = 1,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE = 1 << 1,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS = 1 << 2,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY = 1 << 3,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY = 1 << 4,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS = 1 << 5,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS = 1 << 6,
+ NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF = 1 << 7,
+};
+
+enum nvme_pr_register_action {
+ NVME_PR_REGISTER_ACT_REG = 0,
+ NVME_PR_REGISTER_ACT_UNREG = 1,
+ NVME_PR_REGISTER_ACT_REPLACE = 1 << 1,
+};
+
+enum nvme_pr_acquire_action {
+ NVME_PR_ACQUIRE_ACT_ACQUIRE = 0,
+ NVME_PR_ACQUIRE_ACT_PREEMPT = 1,
+ NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT = 1 << 1,
+};
+
+enum nvme_pr_release_action {
+ NVME_PR_RELEASE_ACT_RELEASE = 0,
+ NVME_PR_RELEASE_ACT_CLEAR = 1,
+};
+
#endif /* _LINUX_NVME_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1b3a76710487..908ee0aad554 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -543,7 +543,7 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE)
* - PG_private and PG_private_2 cause release_folio() and co to be invoked
*/
PAGEFLAG(Private, private, PF_ANY)
-PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
+FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE)
/* owner_2 can be set on tail pages for anon memory */
FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE)
@@ -554,7 +554,7 @@ FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE)
*/
TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL)
TESTSCFLAG(Writeback, writeback, PF_NO_TAIL)
-PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
+FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE)
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
@@ -975,12 +975,16 @@ static __always_inline bool folio_test_##fname(const struct folio *folio) \
} \
static __always_inline void __folio_set_##fname(struct folio *folio) \
{ \
+ if (folio_test_##fname(folio)) \
+ return; \
VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \
folio); \
folio->page.page_type = (unsigned int)PGTY_##lname << 24; \
} \
static __always_inline void __folio_clear_##fname(struct folio *folio) \
{ \
+ if (folio->page.page_type == UINT_MAX) \
+ return; \
VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
folio->page.page_type = UINT_MAX; \
}
@@ -993,11 +997,15 @@ static __always_inline int Page##uname(const struct page *page) \
} \
static __always_inline void __SetPage##uname(struct page *page) \
{ \
+ if (Page##uname(page)) \
+ return; \
VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \
page->page_type = (unsigned int)PGTY_##lname << 24; \
} \
static __always_inline void __ClearPage##uname(struct page *page) \
{ \
+ if (page->page_type == UINT_MAX) \
+ return; \
VM_BUG_ON_PAGE(!Page##uname(page), page); \
page->page_type = UINT_MAX; \
}
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b637ec14025f..cf4b11be3709 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -92,6 +92,10 @@ struct dev_pm_domain_list {
* GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states,
* but its corresponding OPP tables are not
* described in DT, but are given directly by FW.
+ *
+ * GENPD_FLAG_DEV_NAME_FW: Instructs genpd to generate an unique device name
+ * using ida. It is used by genpd providers which
+ * get their genpd-names directly from FW.
*/
#define GENPD_FLAG_PM_CLK (1U << 0)
#define GENPD_FLAG_IRQ_SAFE (1U << 1)
@@ -101,6 +105,7 @@ struct dev_pm_domain_list {
#define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5)
#define GENPD_FLAG_MIN_RESIDENCY (1U << 6)
#define GENPD_FLAG_OPP_TABLE_FW (1U << 7)
+#define GENPD_FLAG_DEV_NAME_FW (1U << 8)
enum gpd_status {
GENPD_STATE_ON = 0, /* PM domain is on */
@@ -163,6 +168,7 @@ struct generic_pm_domain {
atomic_t sd_count; /* Number of subdomains with power "on" */
enum gpd_status status; /* Current state of the domain */
unsigned int device_count; /* Number of devices */
+ unsigned int device_id; /* unique device id */
unsigned int suspended_count; /* System suspend device counter */
unsigned int prepared_count; /* Suspend counter of prepared devices */
unsigned int performance_state; /* Aggregated max performance state */
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 0e65b3d634d9..e2d47eb1a7f3 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -28,9 +28,9 @@ struct posix_acl_entry {
struct posix_acl {
refcount_t a_refcount;
- struct rcu_head a_rcu;
unsigned int a_count;
- struct posix_acl_entry a_entries[];
+ struct rcu_head a_rcu;
+ struct posix_acl_entry a_entries[] __counted_by(a_count);
};
#define FOREACH_ACL_ENTRY(pa, acl, pe) \
@@ -62,7 +62,7 @@ posix_acl_release(struct posix_acl *acl)
/* posix_acl.c */
extern void posix_acl_init(struct posix_acl *, int);
-extern struct posix_acl *posix_acl_alloc(int, gfp_t);
+extern struct posix_acl *posix_acl_alloc(unsigned int count, gfp_t flags);
extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index bf10bdb487dd..6c2fef89a4fd 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -9,6 +9,7 @@
#include <linux/sched.h>
#include <linux/magic.h>
#include <linux/refcount.h>
+#include <linux/kasan.h>
#ifdef CONFIG_THREAD_INFO_IN_TASK
@@ -89,6 +90,7 @@ static inline int object_is_on_stack(const void *obj)
{
void *stack = task_stack_page(current);
+ obj = kasan_reset_tag(obj);
return (obj >= stack) && (obj < (stack + THREAD_SIZE));
}
diff --git a/include/linux/security.h b/include/linux/security.h
index 2ec8f3014757..cbdba435b798 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -34,6 +34,10 @@
#include <linux/sockptr.h>
#include <linux/bpf.h>
#include <uapi/linux/lsm.h>
+#include <linux/lsm/selinux.h>
+#include <linux/lsm/smack.h>
+#include <linux/lsm/apparmor.h>
+#include <linux/lsm/bpf.h>
struct linux_binprm;
struct cred;
@@ -152,6 +156,16 @@ enum lockdown_reason {
LOCKDOWN_CONFIDENTIALITY_MAX,
};
+/*
+ * Data exported by the security modules
+ */
+struct lsm_prop {
+ struct lsm_prop_selinux selinux;
+ struct lsm_prop_smack smack;
+ struct lsm_prop_apparmor apparmor;
+ struct lsm_prop_bpf bpf;
+};
+
extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1];
extern u32 lsm_active_cnt;
extern const struct lsm_id *lsm_idlist[];
@@ -269,8 +283,32 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id)
return kernel_load_data_str[id];
}
+/**
+ * lsmprop_init - initialize a lsm_prop structure
+ * @prop: Pointer to the data to initialize
+ *
+ * Set all secid for all modules to the specified value.
+ */
+static inline void lsmprop_init(struct lsm_prop *prop)
+{
+ memset(prop, 0, sizeof(*prop));
+}
+
#ifdef CONFIG_SECURITY
+/**
+ * lsmprop_is_set - report if there is a value in the lsm_prop
+ * @prop: Pointer to the exported LSM data
+ *
+ * Returns true if there is a value set, false otherwise
+ */
+static inline bool lsmprop_is_set(struct lsm_prop *prop)
+{
+ const struct lsm_prop empty = {};
+
+ return !!memcmp(prop, &empty, sizeof(*prop));
+}
+
int call_blocking_lsm_notifier(enum lsm_event event, void *data);
int register_blocking_lsm_notifier(struct notifier_block *nb);
int unregister_blocking_lsm_notifier(struct notifier_block *nb);
@@ -408,7 +446,7 @@ int security_inode_getsecurity(struct mnt_idmap *idmap,
void **buffer, bool alloc);
int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size);
-void security_inode_getsecid(struct inode *inode, u32 *secid);
+void security_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop);
int security_inode_copy_up(struct dentry *src, struct cred **new);
int security_inode_copy_up_xattr(struct dentry *src, const char *name);
int security_inode_setintegrity(const struct inode *inode,
@@ -444,6 +482,7 @@ void security_cred_free(struct cred *cred);
int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
void security_transfer_creds(struct cred *new, const struct cred *old);
void security_cred_getsecid(const struct cred *c, u32 *secid);
+void security_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop);
int security_kernel_act_as(struct cred *new, u32 secid);
int security_kernel_create_files_as(struct cred *new, struct inode *inode);
int security_kernel_module_request(char *kmod_name);
@@ -463,8 +502,8 @@ int security_task_fix_setgroups(struct cred *new, const struct cred *old);
int security_task_setpgid(struct task_struct *p, pid_t pgid);
int security_task_getpgid(struct task_struct *p);
int security_task_getsid(struct task_struct *p);
-void security_current_getsecid_subj(u32 *secid);
-void security_task_getsecid_obj(struct task_struct *p, u32 *secid);
+void security_current_getlsmprop_subj(struct lsm_prop *prop);
+void security_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop);
int security_task_setnice(struct task_struct *p, int nice);
int security_task_setioprio(struct task_struct *p, int ioprio);
int security_task_getioprio(struct task_struct *p);
@@ -482,7 +521,7 @@ int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
void security_task_to_inode(struct task_struct *p, struct inode *inode);
int security_create_user_ns(const struct cred *cred);
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
-void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
+void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, struct lsm_prop *prop);
int security_msg_msg_alloc(struct msg_msg *msg);
void security_msg_msg_free(struct msg_msg *msg);
int security_msg_queue_alloc(struct kern_ipc_perm *msq);
@@ -515,6 +554,7 @@ int security_setprocattr(int lsmid, const char *name, void *value, size_t size);
int security_netlink_send(struct sock *sk, struct sk_buff *skb);
int security_ismaclabel(const char *name);
int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
+int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen);
int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
void security_release_secctx(char *secdata, u32 seclen);
void security_inode_invalidate_secctx(struct inode *inode);
@@ -531,6 +571,17 @@ int security_bdev_setintegrity(struct block_device *bdev,
size_t size);
#else /* CONFIG_SECURITY */
+/**
+ * lsmprop_is_set - report if there is a value in the lsm_prop
+ * @prop: Pointer to the exported LSM data
+ *
+ * Returns true if there is a value set, false otherwise
+ */
+static inline bool lsmprop_is_set(struct lsm_prop *prop)
+{
+ return false;
+}
+
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
{
return 0;
@@ -1020,9 +1071,10 @@ static inline int security_inode_listsecurity(struct inode *inode, char *buffer,
return 0;
}
-static inline void security_inode_getsecid(struct inode *inode, u32 *secid)
+static inline void security_inode_getlsmprop(struct inode *inode,
+ struct lsm_prop *prop)
{
- *secid = 0;
+ lsmprop_init(prop);
}
static inline int security_inode_copy_up(struct dentry *src, struct cred **new)
@@ -1172,6 +1224,10 @@ static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
*secid = 0;
}
+static inline void security_cred_getlsmprop(const struct cred *c,
+ struct lsm_prop *prop)
+{ }
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
@@ -1249,14 +1305,15 @@ static inline int security_task_getsid(struct task_struct *p)
return 0;
}
-static inline void security_current_getsecid_subj(u32 *secid)
+static inline void security_current_getlsmprop_subj(struct lsm_prop *prop)
{
- *secid = 0;
+ lsmprop_init(prop);
}
-static inline void security_task_getsecid_obj(struct task_struct *p, u32 *secid)
+static inline void security_task_getlsmprop_obj(struct task_struct *p,
+ struct lsm_prop *prop)
{
- *secid = 0;
+ lsmprop_init(prop);
}
static inline int security_task_setnice(struct task_struct *p, int nice)
@@ -1332,9 +1389,10 @@ static inline int security_ipc_permission(struct kern_ipc_perm *ipcp,
return 0;
}
-static inline void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid)
+static inline void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp,
+ struct lsm_prop *prop)
{
- *secid = 0;
+ lsmprop_init(prop);
}
static inline int security_msg_msg_alloc(struct msg_msg *msg)
@@ -1468,7 +1526,14 @@ static inline int security_ismaclabel(const char *name)
return 0;
}
-static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
+static inline int security_secid_to_secctx(u32 secid, char **secdata,
+ u32 *seclen)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int security_lsmprop_to_secctx(struct lsm_prop *prop,
+ char **secdata, u32 *seclen)
{
return -EOPNOTSUPP;
}
@@ -2095,7 +2160,8 @@ static inline void security_key_post_create_or_update(struct key *keyring,
int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule,
gfp_t gfp);
int security_audit_rule_known(struct audit_krule *krule);
-int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule);
+int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op,
+ void *lsmrule);
void security_audit_rule_free(void *lsmrule);
#else
@@ -2111,8 +2177,8 @@ static inline int security_audit_rule_known(struct audit_krule *krule)
return 0;
}
-static inline int security_audit_rule_match(u32 secid, u32 field, u32 op,
- void *lsmrule)
+static inline int security_audit_rule_match(struct lsm_prop *prop, u32 field,
+ u32 op, void *lsmrule)
{
return 0;
}
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 2ac50822554e..80f33a93f944 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -52,6 +52,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
case IOC_OPAL_GET_GEOMETRY:
case IOC_OPAL_DISCOVERY:
case IOC_OPAL_REVERT_LSP:
+ case IOC_OPAL_SET_SID_PW:
return true;
}
return false;
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 515a9a6a3c6f..018da28c01e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -42,10 +42,10 @@ struct shmem_inode_info {
struct inode vfs_inode;
};
-#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE
+#define SHMEM_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | FS_CASEFOLD_FL)
#define SHMEM_FL_USER_MODIFIABLE \
- (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
-#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL)
+ (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
+#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
struct shmem_quota_limits {
qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 9e9f528b1370..2f20281d4ad4 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -125,6 +125,7 @@ struct llcc_edac_reg_offset {
* @num_banks: Number of llcc banks
* @bitmap: Bit map to track the active slice ids
* @ecc_irq: interrupt for llcc cache error detection and reporting
+ * @ecc_irq_configured: 'True' if firmware has already configured the irq propagation
* @version: Indicates the LLCC version
*/
struct llcc_drv_data {
@@ -139,6 +140,7 @@ struct llcc_drv_data {
u32 num_banks;
unsigned long *bitmap;
int ecc_irq;
+ bool ecc_irq_configured;
u32 version;
};
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index fc5a206c4043..195debe2b1db 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -77,7 +77,9 @@ static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
{
if (optlen < ksize)
return -EINVAL;
- return copy_from_sockptr(dst, optval, ksize);
+ if (copy_from_sockptr(dst, optval, ksize))
+ return -EFAULT;
+ return 0;
}
static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ca533b478c21..f3e0ac20c2e8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -335,6 +335,7 @@ struct swap_info_struct {
* list.
*/
struct work_struct discard_work; /* discard worker */
+ struct work_struct reclaim_work; /* reclaim worker */
struct list_head discard_clusters; /* discard clusters list */
struct plist_node avail_lists[]; /*
* entries in swap_avail_heads, one
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5758104921e6..c6333204d451 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -77,6 +77,7 @@ struct cachestat_range;
struct cachestat;
struct statmount;
struct mnt_id_req;
+struct xattr_args;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -338,23 +339,35 @@ asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
void __user *arg, unsigned int nr_args);
asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
const void __user *value, size_t size, int flags);
+asmlinkage long sys_setxattrat(int dfd, const char __user *path, unsigned int at_flags,
+ const char __user *name,
+ const struct xattr_args __user *args, size_t size);
asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name,
const void __user *value, size_t size, int flags);
asmlinkage long sys_fsetxattr(int fd, const char __user *name,
const void __user *value, size_t size, int flags);
asmlinkage long sys_getxattr(const char __user *path, const char __user *name,
void __user *value, size_t size);
+asmlinkage long sys_getxattrat(int dfd, const char __user *path, unsigned int at_flags,
+ const char __user *name,
+ struct xattr_args __user *args, size_t size);
asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name,
void __user *value, size_t size);
asmlinkage long sys_fgetxattr(int fd, const char __user *name,
void __user *value, size_t size);
asmlinkage long sys_listxattr(const char __user *path, char __user *list,
size_t size);
+asmlinkage long sys_listxattrat(int dfd, const char __user *path,
+ unsigned int at_flags,
+ char __user *list, size_t size);
asmlinkage long sys_llistxattr(const char __user *path, char __user *list,
size_t size);
asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size);
asmlinkage long sys_removexattr(const char __user *path,
const char __user *name);
+asmlinkage long sys_removexattrat(int dfd, const char __user *path,
+ unsigned int at_flags,
+ const char __user *name);
asmlinkage long sys_lremovexattr(const char __user *path,
const char __user *name);
asmlinkage long sys_fremovexattr(int fd, const char __user *name);
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 72744638c5b0..99c9c5a7252a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -251,12 +251,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
+
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
+
+static inline void tick_dep_init_task(struct task_struct *tsk)
+{
+ atomic_set(&tsk->tick_dep_mask, 0);
+}
+
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
@@ -290,6 +297,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
+static inline void tick_dep_init_task(struct task_struct *tsk) { }
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index fc12a9ba2c88..84a035e86ac8 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -45,6 +45,11 @@ extern void ktime_get_real_ts64(struct timespec64 *tv);
extern void ktime_get_coarse_ts64(struct timespec64 *ts);
extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
+/* Multigrain timestamp interfaces */
+extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
+extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+extern unsigned long timekeeping_get_mg_floor_swaps(void);
+
void getboottime64(struct timespec64 *ts);
/*
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 587b96b4418e..20a40ade8030 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -421,6 +421,7 @@ void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value);
u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset);
u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset);
u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset);
+void tpm_buf_append_handle(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle);
/*
* Check if TPM device is in the firmware upgrade mode.
@@ -505,6 +506,8 @@ void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
u8 attributes, u8 *passphrase,
int passphraselen);
+void tpm_buf_append_auth(struct tpm_chip *chip, struct tpm_buf *buf,
+ u8 attributes, u8 *passphrase, int passphraselen);
static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
struct tpm_buf *buf,
u8 attributes,
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 39c7cf82b0c2..e9c702c1908d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -38,6 +38,7 @@
#else
#define can_do_masked_user_access() 0
#define masked_user_access_begin(src) NULL
+ #define mask_user_address(src) (src)
#endif
/*
@@ -159,19 +160,27 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n)
{
unsigned long res = n;
might_fault();
- if (!should_fail_usercopy() && likely(access_ok(from, n))) {
+ if (should_fail_usercopy())
+ goto fail;
+ if (can_do_masked_user_access())
+ from = mask_user_address(from);
+ else {
+ if (!access_ok(from, n))
+ goto fail;
/*
* Ensure that bad access_ok() speculation will not
* lead to nasty side effects *after* the copy is
* finished:
*/
barrier_nospec();
- instrument_copy_from_user_before(to, from, n);
- res = raw_copy_from_user(to, from, n);
- instrument_copy_from_user_after(to, from, n, res);
}
- if (unlikely(res))
- memset(to + (n - res), 0, res);
+ instrument_copy_from_user_before(to, from, n);
+ res = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, res);
+ if (likely(!res))
+ return 0;
+fail:
+ memset(to + (n - res), 0, res);
return res;
}
extern __must_check unsigned long
@@ -394,6 +403,103 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
return 0;
}
+/**
+ * copy_struct_to_user: copy a struct to userspace
+ * @dst: Destination address, in userspace. This buffer must be @ksize
+ * bytes long.
+ * @usize: (Alleged) size of @dst struct.
+ * @src: Source address, in kernel space.
+ * @ksize: Size of @src struct.
+ * @ignored_trailing: Set to %true if there was a non-zero byte in @src that
+ * userspace cannot see because they are using an smaller struct.
+ *
+ * Copies a struct from kernel space to userspace, in a way that guarantees
+ * backwards-compatibility for struct syscall arguments (as long as future
+ * struct extensions are made such that all new fields are *appended* to the
+ * old struct, and zeroed-out new fields have the same meaning as the old
+ * struct).
+ *
+ * Some syscalls may wish to make sure that userspace knows about everything in
+ * the struct, and if there is a non-zero value that userspce doesn't know
+ * about, they want to return an error (such as -EMSGSIZE) or have some other
+ * fallback (such as adding a "you're missing some information" flag). If
+ * @ignored_trailing is non-%NULL, it will be set to %true if there was a
+ * non-zero byte that could not be copied to userspace (ie. was past @usize).
+ *
+ * While unconditionally returning an error in this case is the simplest
+ * solution, for maximum backward compatibility you should try to only return
+ * -EMSGSIZE if the user explicitly requested the data that couldn't be copied.
+ * Note that structure sizes can change due to header changes and simple
+ * recompilations without code changes(!), so if you care about
+ * @ignored_trailing you probably want to make sure that any new field data is
+ * associated with a flag. Otherwise you might assume that a program knows
+ * about data it does not.
+ *
+ * @ksize is just sizeof(*src), and @usize should've been passed by userspace.
+ * The recommended usage is something like the following:
+ *
+ * SYSCALL_DEFINE2(foobar, struct foo __user *, uarg, size_t, usize)
+ * {
+ * int err;
+ * bool ignored_trailing;
+ * struct foo karg = {};
+ *
+ * if (usize > PAGE_SIZE)
+ * return -E2BIG;
+ * if (usize < FOO_SIZE_VER0)
+ * return -EINVAL;
+ *
+ * // ... modify karg somehow ...
+ *
+ * err = copy_struct_to_user(uarg, usize, &karg, sizeof(karg),
+ * &ignored_trailing);
+ * if (err)
+ * return err;
+ * if (ignored_trailing)
+ * return -EMSGSIZE:
+ *
+ * // ...
+ * }
+ *
+ * There are three cases to consider:
+ * * If @usize == @ksize, then it's copied verbatim.
+ * * If @usize < @ksize, then the kernel is trying to pass userspace a newer
+ * struct than it supports. Thus we only copy the interoperable portions
+ * (@usize) and ignore the rest (but @ignored_trailing is set to %true if
+ * any of the trailing (@ksize - @usize) bytes are non-zero).
+ * * If @usize > @ksize, then the kernel is trying to pass userspace an older
+ * struct than userspace supports. In order to make sure the
+ * unknown-to-the-kernel fields don't contain garbage values, we zero the
+ * trailing (@usize - @ksize) bytes.
+ *
+ * Returns (in all cases, some data may have been copied):
+ * * -EFAULT: access to userspace failed.
+ */
+static __always_inline __must_check int
+copy_struct_to_user(void __user *dst, size_t usize, const void *src,
+ size_t ksize, bool *ignored_trailing)
+{
+ size_t size = min(ksize, usize);
+ size_t rest = max(ksize, usize) - size;
+
+ /* Double check if ksize is larger than a known object size. */
+ if (WARN_ON_ONCE(ksize > __builtin_object_size(src, 1)))
+ return -E2BIG;
+
+ /* Deal with trailing bytes. */
+ if (usize > ksize) {
+ if (clear_user(dst + size, rest))
+ return -EFAULT;
+ }
+ if (ignored_trailing)
+ *ignored_trailing = ksize < usize &&
+ memchr_inv(src + size, 0, rest) != NULL;
+ /* Copy the interoperable parts of the struct. */
+ if (copy_to_user(dst, src, size))
+ return -EFAULT;
+ return 0;
+}
+
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size);
long copy_from_kernel_nofault(void *dst, const void *src, size_t size);
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 4d39e6e11a95..5e6b212a2aed 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -16,6 +16,8 @@ struct utf8data_table;
((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
((unsigned int)(REV)))
+#define UTF8_LATEST UNICODE_AGE(12, 1, 0)
+
static inline u8 unicode_major(unsigned int age)
{
return (age >> UNICODE_MAJ_SHIFT) & 0xff;
@@ -76,4 +78,6 @@ int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
struct unicode_map *utf8_load(unsigned int version);
void utf8_unload(struct unicode_map *um);
+int utf8_parse_version(char *version);
+
#endif /* _LINUX_UNICODE_H */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 3625096d5f85..7183e5aca282 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -141,7 +141,8 @@ static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type ty
long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
-long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type);
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type,
+ bool override_rlimit);
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type);
bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max);
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 9fc6ce15c499..cb40f1a1d081 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
extern void dup_userfaultfd_complete(struct list_head *);
+void dup_userfaultfd_fail(struct list_head *);
extern void mremap_userfaultfd_prep(struct vm_area_struct *,
struct vm_userfaultfd_ctx *);
@@ -351,6 +352,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l)
{
}
+static inline void dup_userfaultfd_fail(struct list_head *l)
+{
+}
+
static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx *ctx)
{
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index aed952d04132..f70d0958095c 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -134,6 +134,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_SWAP
SWAP_RA,
SWAP_RA_HIT,
+ SWPIN_ZERO,
+ SWPOUT_ZERO,
#ifdef CONFIG_KSM
KSM_SWPIN_COPY,
#endif
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 8aa3372f21a0..2b322a9b88a2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -221,6 +221,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)
#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)
+#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL)
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d6db822e4bb3..d11b903c2edb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -213,11 +213,8 @@ static inline void wait_on_inode(struct inode *inode)
#include <linux/bio.h>
void __inode_attach_wb(struct inode *inode, struct folio *folio);
-void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
- struct inode *inode)
- __releases(&inode->i_lock);
void wbc_detach_inode(struct writeback_control *wbc);
-void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
+void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio,
size_t bytes);
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
enum wb_reason reason, struct wb_completion *done);
@@ -254,22 +251,8 @@ static inline void inode_detach_wb(struct inode *inode)
}
}
-/**
- * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
- * @wbc: writeback_control of interest
- * @inode: target inode
- *
- * This function is to be used by __filemap_fdatawrite_range(), which is an
- * alternative entry point into writeback code, and first ensures @inode is
- * associated with a bdi_writeback and attaches it to @wbc.
- */
-static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
- struct inode *inode)
-{
- spin_lock(&inode->i_lock);
- inode_attach_wb(inode, NULL);
- wbc_attach_and_unlock_inode(wbc, inode);
-}
+void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
+ struct inode *inode);
/**
* wbc_init_bio - writeback specific initializtion of bio
@@ -303,13 +286,6 @@ static inline void inode_detach_wb(struct inode *inode)
{
}
-static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
- struct inode *inode)
- __releases(&inode->i_lock)
-{
- spin_unlock(&inode->i_lock);
-}
-
static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
struct inode *inode)
{
@@ -324,7 +300,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
}
static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
- struct page *page, size_t bytes)
+ struct folio *folio, size_t bytes)
{
}
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d20051865800..86b0d47984a1 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -19,6 +19,10 @@
#include <linux/user_namespace.h>
#include <uapi/linux/xattr.h>
+/* List of all open_how "versions". */
+#define XATTR_ARGS_SIZE_VER0 16 /* sizeof first published struct */
+#define XATTR_ARGS_SIZE_LATEST XATTR_ARGS_SIZE_VER0
+
struct inode;
struct dentry;