diff options
Diffstat (limited to 'include/linux/fs.h')
| -rw-r--r-- | include/linux/fs.h | 213 |
1 files changed, 141 insertions, 72 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 897eae8faee1..0a257d89208e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -37,6 +37,9 @@ #include <linux/uuid.h> #include <linux/errseq.h> #include <linux/ioprio.h> +#include <linux/fs_types.h> +#include <linux/build_bug.h> +#include <linux/stddef.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -304,13 +307,20 @@ enum rw_hint { struct kiocb { struct file *ki_filp; + + /* The 'ki_filp' pointer is shared in a union for aio */ + randomized_struct_fields_start + loff_t ki_pos; void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void *private; int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ -} __randomize_layout; + unsigned int ki_cookie; /* for ->iopoll */ + + randomized_struct_fields_end +}; static inline bool is_sync_kiocb(struct kiocb *kiocb) { @@ -403,24 +413,40 @@ int pagecache_write_end(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); +/** + * struct address_space - Contents of a cacheable, mappable object. + * @host: Owner, either the inode or the block_device. + * @i_pages: Cached pages. + * @gfp_mask: Memory allocation flags to use for allocating pages. + * @i_mmap_writable: Number of VM_SHARED mappings. + * @i_mmap: Tree of private and shared mappings. + * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. + * @nrpages: Number of page entries, protected by the i_pages lock. + * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock. + * @writeback_index: Writeback starts here. + * @a_ops: Methods. + * @flags: Error bits and flags (AS_*). + * @wb_err: The most recent error which has occurred. + * @private_lock: For use by the owner of the address_space. + * @private_list: For use by the owner of the address_space. + * @private_data: For use by the owner of the address_space. + */ struct address_space { - struct inode *host; /* owner: inode, block_device */ - struct radix_tree_root i_pages; /* cached pages */ - atomic_t i_mmap_writable;/* count VM_SHARED mappings */ - struct rb_root_cached i_mmap; /* tree of private and shared mappings */ - struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ - /* Protected by the i_pages lock */ - unsigned long nrpages; /* number of total pages */ - /* number of shadow or DAX exceptional entries */ + struct inode *host; + struct xarray i_pages; + gfp_t gfp_mask; + atomic_t i_mmap_writable; + struct rb_root_cached i_mmap; + struct rw_semaphore i_mmap_rwsem; + unsigned long nrpages; unsigned long nrexceptional; - pgoff_t writeback_index;/* writeback starts here */ - const struct address_space_operations *a_ops; /* methods */ - unsigned long flags; /* error bits */ - spinlock_t private_lock; /* for use by the address_space */ - gfp_t gfp_mask; /* implicit gfp mask for allocations */ - struct list_head private_list; /* for use by the address_space */ - void *private_data; /* ditto */ + pgoff_t writeback_index; + const struct address_space_operations *a_ops; + unsigned long flags; errseq_t wb_err; + spinlock_t private_lock; + struct list_head private_list; + void *private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but @@ -467,15 +493,18 @@ struct block_device { struct mutex bd_fsfreeze_mutex; } __randomize_layout; +/* XArray tags, for tagging dirty and writeback pages in the pagecache. */ +#define PAGECACHE_TAG_DIRTY XA_MARK_0 +#define PAGECACHE_TAG_WRITEBACK XA_MARK_1 +#define PAGECACHE_TAG_TOWRITE XA_MARK_2 + /* - * Radix-tree tags, for tagging dirty and writeback pages within the pagecache - * radix trees + * Returns true if any of the pages in the mapping are marked with the tag. */ -#define PAGECACHE_TAG_DIRTY 0 -#define PAGECACHE_TAG_WRITEBACK 1 -#define PAGECACHE_TAG_TOWRITE 2 - -int mapping_tagged(struct address_space *mapping, int tag); +static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag) +{ + return xa_marked(&mapping->i_pages, tag); +} static inline void i_mmap_lock_write(struct address_space *mapping) { @@ -932,7 +961,9 @@ static inline struct file *get_file(struct file *f) atomic_long_inc(&f->f_count); return f; } -#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) +#define get_file_rcu_many(x, cnt) \ + atomic_long_add_unless(&(x)->f_count, (cnt), 0) +#define get_file_rcu(x) get_file_rcu_many((x), 1) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) @@ -1025,10 +1056,15 @@ bool opens_in_grace(struct net *); * Obviously, the last two criteria only matter for POSIX locks. */ struct file_lock { - struct file_lock *fl_next; /* singly linked list for this inode */ + struct file_lock *fl_blocker; /* The lock, that is blocking us */ struct list_head fl_list; /* link into file_lock_context */ struct hlist_node fl_link; /* node in global lists */ - struct list_head fl_block; /* circular list of blocked processes */ + struct list_head fl_blocked_requests; /* list of requests with + * ->fl_blocker pointing here + */ + struct list_head fl_blocked_member; /* node in + * ->fl_blocker->fl_blocked_requests + */ fl_owner_t fl_owner; unsigned int fl_flags; unsigned char fl_type; @@ -1100,7 +1136,7 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); -extern int posix_unblock_lock(struct file_lock *); +extern int locks_delete_block(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); @@ -1190,7 +1226,7 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } -static inline int posix_unblock_lock(struct file_lock *waiter) +static inline int locks_delete_block(struct file_lock *waiter) { return -ENOENT; } @@ -1393,17 +1429,26 @@ struct super_block { struct sb_writers s_writers; + /* + * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and + * s_fsnotify_marks together for cache efficiency. They are frequently + * accessed and rarely modified. + */ + void *s_fs_info; /* Filesystem private info */ + + /* Granularity of c/m/atime in ns (cannot be worse than a second) */ + u32 s_time_gran; +#ifdef CONFIG_FSNOTIFY + __u32 s_fsnotify_mask; + struct fsnotify_mark_connector __rcu *s_fsnotify_marks; +#endif + char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ - void *s_fs_info; /* Filesystem private info */ unsigned int s_max_links; fmode_t s_mode; - /* Granularity of c/m/atime in ns. - Cannot be worse than a second */ - u32 s_time_gran; - /* * The next field is for VFS *only*. No filesystems have any business * even looking at it. You had been warned. @@ -1428,6 +1473,9 @@ struct super_block { /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; + /* Pending fsnotify inode refs */ + atomic_long_t s_fsnotify_inode_refs; + /* Being remounted read-only */ int s_readonly_remount; @@ -1443,11 +1491,12 @@ struct super_block { struct user_namespace *s_user_ns; /* - * Keep the lru lists last in the structure so they always sit on their - * own individual cachelines. + * The list_lru structure is essentially just a pointer to a table + * of per-node lru lists, each of which has its own spinlock. + * There is no need to put them into separate cachelines. */ - struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; - struct list_lru s_inode_lru ____cacheline_aligned_in_smp; + struct list_lru s_dentry_lru; + struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; @@ -1664,22 +1713,6 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); /* - * File types - * - * NOTE! These match bits 12..15 of stat.st_mode - * (ie "(i_mode >> 12) & 15"). - */ -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 -#define DT_WHT 14 - -/* * This is the "filldir" function type, used by readdir() to let * the kernel specify what kind of dirent layout it wants to have. * This allows the kernel to read directories into kernel space or @@ -1721,6 +1754,25 @@ struct block_device_operations; #define NOMMU_VMFLAGS \ (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) +/* + * These flags control the behavior of the remap_file_range function pointer. + * If it is called with len == 0 that means "remap to end of source file". + * See Documentation/filesystems/vfs.txt for more details about this call. + * + * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) + * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request + */ +#define REMAP_FILE_DEDUP (1 << 0) +#define REMAP_FILE_CAN_SHORTEN (1 << 1) + +/* + * These flags signal that the caller is ok with altering various aspects of + * the behavior of the remap operation. The changes must be made by the + * implementation; the vfs remap helper functions can take advantage of them. + * Flags in this category exist to preserve the quirky behavior of the hoisted + * btrfs clone/dedupe ioctls. + */ +#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) struct iov_iter; @@ -1731,6 +1783,7 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); + int (*iopoll)(struct kiocb *kiocb, bool spin); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); @@ -1759,10 +1812,9 @@ struct file_operations { #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; @@ -1825,21 +1877,21 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, - struct inode *inode_out, loff_t pos_out, - u64 *len, bool is_dedupe); -extern int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); -extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); -extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, - struct inode *dest, loff_t destoff, - loff_t len, bool *is_same); +extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *count, + unsigned int remap_flags); +extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); +extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, - struct file *dst_file, loff_t dst_pos, - u64 len); +extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + loff_t len, unsigned int remap_flags); struct super_operations { @@ -1972,7 +2024,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) .ki_filp = filp, .ki_flags = iocb_flags(filp), .ki_hint = ki_hint_validate(file_write_hint(filp)), - .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0), + .ki_ioprio = get_current_ioprio(), }; } @@ -2030,7 +2082,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * synchronize competing switching instances and to tell * wb stat updates to grab the i_pages lock. See - * inode_switch_wb_work_fn() for details. + * inode_switch_wbs_work_fn() for details. * * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper * and work dirs among overlayfs mounts. @@ -2432,6 +2484,7 @@ struct filename { struct audit_names *aname; const char iname[]; }; +static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, @@ -2967,6 +3020,9 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); +extern int generic_remap_checks(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t *count, unsigned int remap_flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); @@ -3212,8 +3268,12 @@ extern int generic_check_addressable(unsigned, u64); extern int buffer_migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); +extern int buffer_migrate_page_norefs(struct address_space *, + struct page *, struct page *, + enum migrate_mode); #else #define buffer_migrate_page NULL +#define buffer_migrate_page_norefs NULL #endif extern int setattr_prepare(struct dentry *, struct iattr *); @@ -3453,4 +3513,13 @@ extern void inode_nohighmem(struct inode *inode); extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice); +#if defined(CONFIG_IO_URING) +extern struct sock *io_uring_get_socket(struct file *file); +#else +static inline struct sock *io_uring_get_socket(struct file *file) +{ + return NULL; +} +#endif + #endif /* _LINUX_FS_H */ |