aboutsummaryrefslogtreecommitdiff
path: root/include/linux/fs.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r--include/linux/fs.h213
1 files changed, 141 insertions, 72 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 897eae8faee1..0a257d89208e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -37,6 +37,9 @@
#include <linux/uuid.h>
#include <linux/errseq.h>
#include <linux/ioprio.h>
+#include <linux/fs_types.h>
+#include <linux/build_bug.h>
+#include <linux/stddef.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -304,13 +307,20 @@ enum rw_hint {
struct kiocb {
struct file *ki_filp;
+
+ /* The 'ki_filp' pointer is shared in a union for aio */
+ randomized_struct_fields_start
+
loff_t ki_pos;
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private;
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
-} __randomize_layout;
+ unsigned int ki_cookie; /* for ->iopoll */
+
+ randomized_struct_fields_end
+};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
{
@@ -403,24 +413,40 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
+/**
+ * struct address_space - Contents of a cacheable, mappable object.
+ * @host: Owner, either the inode or the block_device.
+ * @i_pages: Cached pages.
+ * @gfp_mask: Memory allocation flags to use for allocating pages.
+ * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @i_mmap: Tree of private and shared mappings.
+ * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
+ * @nrpages: Number of page entries, protected by the i_pages lock.
+ * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock.
+ * @writeback_index: Writeback starts here.
+ * @a_ops: Methods.
+ * @flags: Error bits and flags (AS_*).
+ * @wb_err: The most recent error which has occurred.
+ * @private_lock: For use by the owner of the address_space.
+ * @private_list: For use by the owner of the address_space.
+ * @private_data: For use by the owner of the address_space.
+ */
struct address_space {
- struct inode *host; /* owner: inode, block_device */
- struct radix_tree_root i_pages; /* cached pages */
- atomic_t i_mmap_writable;/* count VM_SHARED mappings */
- struct rb_root_cached i_mmap; /* tree of private and shared mappings */
- struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
- /* Protected by the i_pages lock */
- unsigned long nrpages; /* number of total pages */
- /* number of shadow or DAX exceptional entries */
+ struct inode *host;
+ struct xarray i_pages;
+ gfp_t gfp_mask;
+ atomic_t i_mmap_writable;
+ struct rb_root_cached i_mmap;
+ struct rw_semaphore i_mmap_rwsem;
+ unsigned long nrpages;
unsigned long nrexceptional;
- pgoff_t writeback_index;/* writeback starts here */
- const struct address_space_operations *a_ops; /* methods */
- unsigned long flags; /* error bits */
- spinlock_t private_lock; /* for use by the address_space */
- gfp_t gfp_mask; /* implicit gfp mask for allocations */
- struct list_head private_list; /* for use by the address_space */
- void *private_data; /* ditto */
+ pgoff_t writeback_index;
+ const struct address_space_operations *a_ops;
+ unsigned long flags;
errseq_t wb_err;
+ spinlock_t private_lock;
+ struct list_head private_list;
+ void *private_data;
} __attribute__((aligned(sizeof(long)))) __randomize_layout;
/*
* On most architectures that alignment is already the case; but
@@ -467,15 +493,18 @@ struct block_device {
struct mutex bd_fsfreeze_mutex;
} __randomize_layout;
+/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
+#define PAGECACHE_TAG_DIRTY XA_MARK_0
+#define PAGECACHE_TAG_WRITEBACK XA_MARK_1
+#define PAGECACHE_TAG_TOWRITE XA_MARK_2
+
/*
- * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
- * radix trees
+ * Returns true if any of the pages in the mapping are marked with the tag.
*/
-#define PAGECACHE_TAG_DIRTY 0
-#define PAGECACHE_TAG_WRITEBACK 1
-#define PAGECACHE_TAG_TOWRITE 2
-
-int mapping_tagged(struct address_space *mapping, int tag);
+static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
+{
+ return xa_marked(&mapping->i_pages, tag);
+}
static inline void i_mmap_lock_write(struct address_space *mapping)
{
@@ -932,7 +961,9 @@ static inline struct file *get_file(struct file *f)
atomic_long_inc(&f->f_count);
return f;
}
-#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
+#define get_file_rcu_many(x, cnt) \
+ atomic_long_add_unless(&(x)->f_count, (cnt), 0)
+#define get_file_rcu(x) get_file_rcu_many((x), 1)
#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -1025,10 +1056,15 @@ bool opens_in_grace(struct net *);
* Obviously, the last two criteria only matter for POSIX locks.
*/
struct file_lock {
- struct file_lock *fl_next; /* singly linked list for this inode */
+ struct file_lock *fl_blocker; /* The lock, that is blocking us */
struct list_head fl_list; /* link into file_lock_context */
struct hlist_node fl_link; /* node in global lists */
- struct list_head fl_block; /* circular list of blocked processes */
+ struct list_head fl_blocked_requests; /* list of requests with
+ * ->fl_blocker pointing here
+ */
+ struct list_head fl_blocked_member; /* node in
+ * ->fl_blocker->fl_blocked_requests
+ */
fl_owner_t fl_owner;
unsigned int fl_flags;
unsigned char fl_type;
@@ -1100,7 +1136,7 @@ extern void locks_remove_file(struct file *);
extern void locks_release_private(struct file_lock *);
extern void posix_test_lock(struct file *, struct file_lock *);
extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
-extern int posix_unblock_lock(struct file_lock *);
+extern int locks_delete_block(struct file_lock *);
extern int vfs_test_lock(struct file *, struct file_lock *);
extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
@@ -1190,7 +1226,7 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
return -ENOLCK;
}
-static inline int posix_unblock_lock(struct file_lock *waiter)
+static inline int locks_delete_block(struct file_lock *waiter)
{
return -ENOENT;
}
@@ -1393,17 +1429,26 @@ struct super_block {
struct sb_writers s_writers;
+ /*
+ * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
+ * s_fsnotify_marks together for cache efficiency. They are frequently
+ * accessed and rarely modified.
+ */
+ void *s_fs_info; /* Filesystem private info */
+
+ /* Granularity of c/m/atime in ns (cannot be worse than a second) */
+ u32 s_time_gran;
+#ifdef CONFIG_FSNOTIFY
+ __u32 s_fsnotify_mask;
+ struct fsnotify_mark_connector __rcu *s_fsnotify_marks;
+#endif
+
char s_id[32]; /* Informational name */
uuid_t s_uuid; /* UUID */
- void *s_fs_info; /* Filesystem private info */
unsigned int s_max_links;
fmode_t s_mode;
- /* Granularity of c/m/atime in ns.
- Cannot be worse than a second */
- u32 s_time_gran;
-
/*
* The next field is for VFS *only*. No filesystems have any business
* even looking at it. You had been warned.
@@ -1428,6 +1473,9 @@ struct super_block {
/* Number of inodes with nlink == 0 but still referenced */
atomic_long_t s_remove_count;
+ /* Pending fsnotify inode refs */
+ atomic_long_t s_fsnotify_inode_refs;
+
/* Being remounted read-only */
int s_readonly_remount;
@@ -1443,11 +1491,12 @@ struct super_block {
struct user_namespace *s_user_ns;
/*
- * Keep the lru lists last in the structure so they always sit on their
- * own individual cachelines.
+ * The list_lru structure is essentially just a pointer to a table
+ * of per-node lru lists, each of which has its own spinlock.
+ * There is no need to put them into separate cachelines.
*/
- struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
- struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
+ struct list_lru s_dentry_lru;
+ struct list_lru s_inode_lru;
struct rcu_head rcu;
struct work_struct destroy_work;
@@ -1664,22 +1713,6 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
/*
- * File types
- *
- * NOTE! These match bits 12..15 of stat.st_mode
- * (ie "(i_mode >> 12) & 15").
- */
-#define DT_UNKNOWN 0
-#define DT_FIFO 1
-#define DT_CHR 2
-#define DT_DIR 4
-#define DT_BLK 6
-#define DT_REG 8
-#define DT_LNK 10
-#define DT_SOCK 12
-#define DT_WHT 14
-
-/*
* This is the "filldir" function type, used by readdir() to let
* the kernel specify what kind of dirent layout it wants to have.
* This allows the kernel to read directories into kernel space or
@@ -1721,6 +1754,25 @@ struct block_device_operations;
#define NOMMU_VMFLAGS \
(NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
+/*
+ * These flags control the behavior of the remap_file_range function pointer.
+ * If it is called with len == 0 that means "remap to end of source file".
+ * See Documentation/filesystems/vfs.txt for more details about this call.
+ *
+ * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
+ */
+#define REMAP_FILE_DEDUP (1 << 0)
+#define REMAP_FILE_CAN_SHORTEN (1 << 1)
+
+/*
+ * These flags signal that the caller is ok with altering various aspects of
+ * the behavior of the remap operation. The changes must be made by the
+ * implementation; the vfs remap helper functions can take advantage of them.
+ * Flags in this category exist to preserve the quirky behavior of the hoisted
+ * btrfs clone/dedupe ioctls.
+ */
+#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
struct iov_iter;
@@ -1731,6 +1783,7 @@ struct file_operations {
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+ int (*iopoll)(struct kiocb *kiocb, bool spin);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
@@ -1759,10 +1812,9 @@ struct file_operations {
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
- int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
- u64);
- int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
- u64);
+ loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;
@@ -1825,21 +1877,21 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
unsigned long, loff_t *, rwf_t);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
-extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
- struct inode *inode_out, loff_t pos_out,
- u64 *len, bool is_dedupe);
-extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len);
-extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out, u64 len);
-extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
- struct inode *dest, loff_t destoff,
- loff_t len, bool *is_same);
+extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *count,
+ unsigned int remap_flags);
+extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
+extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t len, unsigned int remap_flags);
extern int vfs_dedupe_file_range(struct file *file,
struct file_dedupe_range *same);
-extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
- struct file *dst_file, loff_t dst_pos,
- u64 len);
+extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+ struct file *dst_file, loff_t dst_pos,
+ loff_t len, unsigned int remap_flags);
struct super_operations {
@@ -1972,7 +2024,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
.ki_filp = filp,
.ki_flags = iocb_flags(filp),
.ki_hint = ki_hint_validate(file_write_hint(filp)),
- .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0),
+ .ki_ioprio = get_current_ioprio(),
};
}
@@ -2030,7 +2082,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
* I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
* synchronize competing switching instances and to tell
* wb stat updates to grab the i_pages lock. See
- * inode_switch_wb_work_fn() for details.
+ * inode_switch_wbs_work_fn() for details.
*
* I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
* and work dirs among overlayfs mounts.
@@ -2432,6 +2484,7 @@ struct filename {
struct audit_names *aname;
const char iname[];
};
+static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
extern long vfs_truncate(const struct path *, loff_t);
extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
@@ -2967,6 +3020,9 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
+extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *count, unsigned int remap_flags);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
@@ -3212,8 +3268,12 @@ extern int generic_check_addressable(unsigned, u64);
extern int buffer_migrate_page(struct address_space *,
struct page *, struct page *,
enum migrate_mode);
+extern int buffer_migrate_page_norefs(struct address_space *,
+ struct page *, struct page *,
+ enum migrate_mode);
#else
#define buffer_migrate_page NULL
+#define buffer_migrate_page_norefs NULL
#endif
extern int setattr_prepare(struct dentry *, struct iattr *);
@@ -3453,4 +3513,13 @@ extern void inode_nohighmem(struct inode *inode);
extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
int advice);
+#if defined(CONFIG_IO_URING)
+extern struct sock *io_uring_get_socket(struct file *file);
+#else
+static inline struct sock *io_uring_get_socket(struct file *file)
+{
+ return NULL;
+}
+#endif
+
#endif /* _LINUX_FS_H */