diff options
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r-- | include/linux/fs.h | 278 |
1 files changed, 183 insertions, 95 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 70e61b58baaf..577365a77b47 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -74,7 +74,6 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); -typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 @@ -153,9 +152,10 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define CHECK_IOVEC_ONLY -1 /* - * The below are the various read and write types that we support. Some of + * The below are the various read and write flags that we support. Some of * them include behavioral modifiers that send information down to the - * block layer and IO scheduler. Terminology: + * block layer and IO scheduler. They should be used along with a req_op. + * Terminology: * * The block layer uses device plugging to defer IO a little bit, in * the hope that we will see more IO very shortly. This increases @@ -178,9 +178,6 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); * READ_SYNC A synchronous read. Device is not plugged, caller can * immediately wait on this read without caring about * unplugging. - * READA Used for read-ahead operations. Lower priority, and the - * block layer could (in theory) choose to ignore this - * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO @@ -194,19 +191,17 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); * non-volatile media on completion. * */ -#define RW_MASK REQ_WRITE -#define RWA_MASK REQ_RAHEAD +#define RW_MASK REQ_OP_WRITE -#define READ 0 -#define WRITE RW_MASK -#define READA RWA_MASK +#define READ REQ_OP_READ +#define WRITE REQ_OP_WRITE -#define READ_SYNC (READ | REQ_SYNC) -#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) -#define WRITE_ODIRECT (WRITE | REQ_SYNC) -#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) -#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) -#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define READ_SYNC REQ_SYNC +#define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) +#define WRITE_ODIRECT REQ_SYNC +#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH) +#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA) +#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) /* * Attribute flags. These should be or-ed together to figure out what @@ -323,6 +318,8 @@ struct writeback_control; #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) #define IOCB_HIPRI (1 << 3) +#define IOCB_DSYNC (1 << 4) +#define IOCB_SYNC (1 << 5) struct kiocb { struct file *ki_filp; @@ -394,13 +391,15 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); - ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* * migrate the contents of a page to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); + bool (*isolate_page)(struct page *, isolate_mode_t); + void (*putback_page)(struct page *); int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); @@ -458,7 +457,6 @@ struct block_device { struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ - struct list_head bd_inodes; void * bd_claiming; void * bd_holder; int bd_holders; @@ -577,6 +575,18 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +static inline struct posix_acl * +uncached_acl_sentinel(struct task_struct *task) +{ + return (void *)task + 1; +} + +static inline bool +is_uncached_acl(struct posix_acl *acl) +{ + return (long)acl & 1; +} + #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 @@ -635,7 +645,7 @@ struct inode { /* Misc */ unsigned long i_state; - struct mutex i_mutex; + struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; @@ -652,6 +662,7 @@ struct inode { #endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; + struct list_head i_wb_list; /* backing dev writeback list */ union { struct hlist_head i_dentry; struct rcu_head i_rcu; @@ -672,6 +683,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; + unsigned i_dir_seq; }; __u32 i_generation; @@ -721,27 +733,42 @@ enum inode_i_mutex_lock_class static inline void inode_lock(struct inode *inode) { - mutex_lock(&inode->i_mutex); + down_write(&inode->i_rwsem); } static inline void inode_unlock(struct inode *inode) { - mutex_unlock(&inode->i_mutex); + up_write(&inode->i_rwsem); +} + +static inline void inode_lock_shared(struct inode *inode) +{ + down_read(&inode->i_rwsem); +} + +static inline void inode_unlock_shared(struct inode *inode) +{ + up_read(&inode->i_rwsem); } static inline int inode_trylock(struct inode *inode) { - return mutex_trylock(&inode->i_mutex); + return down_write_trylock(&inode->i_rwsem); +} + +static inline int inode_trylock_shared(struct inode *inode) +{ + return down_read_trylock(&inode->i_rwsem); } static inline int inode_is_locked(struct inode *inode) { - return mutex_is_locked(&inode->i_mutex); + return rwsem_is_locked(&inode->i_rwsem); } static inline void inode_lock_nested(struct inode *inode, unsigned subclass) { - mutex_lock_nested(&inode->i_mutex, subclass); + down_write_nested(&inode->i_rwsem, subclass); } void lock_two_nondirectories(struct inode *, struct inode*); @@ -802,31 +829,6 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) #endif } -/* Helper functions so that in most cases filesystems will - * not need to deal directly with kuid_t and kgid_t and can - * instead deal with the raw numeric values that are stored - * in the filesystem. - */ -static inline uid_t i_uid_read(const struct inode *inode) -{ - return from_kuid(&init_user_ns, inode->i_uid); -} - -static inline gid_t i_gid_read(const struct inode *inode) -{ - return from_kgid(&init_user_ns, inode->i_gid); -} - -static inline void i_uid_write(struct inode *inode, uid_t uid) -{ - inode->i_uid = make_kuid(&init_user_ns, uid); -} - -static inline void i_gid_write(struct inode *inode, gid_t gid) -{ - inode->i_gid = make_kgid(&init_user_ns, gid); -} - static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); @@ -1243,12 +1245,7 @@ static inline struct inode *file_inode(const struct file *f) static inline struct dentry *file_dentry(const struct file *file) { - struct dentry *dentry = file->f_path.dentry; - - if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, file_inode(file)); - else - return dentry; + return d_real(file->f_path.dentry, file_inode(file), 0); } static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) @@ -1298,6 +1295,10 @@ struct mm_struct; /* sb->s_iflags */ #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ +#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ + +/* sb->s_iflags to limit user namespace mounts */ +#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ /* Possible states of 'frozen' field */ enum { @@ -1401,6 +1402,13 @@ struct super_block { struct hlist_head s_pins; /* + * Owning user namespace and default context in which to + * interpret filesystem uids, gids, quotas, device nodes, + * xattrs and security labels. + */ + struct user_namespace *s_user_ns; + + /* * Keep the lru lists last in the structure so they always sit on their * own individual cachelines. */ @@ -1419,8 +1427,36 @@ struct super_block { /* s_inode_list_lock protects s_inodes */ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; /* all inodes */ + + spinlock_t s_inode_wblist_lock; + struct list_head s_inodes_wb; /* writeback inodes */ }; +/* Helper functions so that in most cases filesystems will + * not need to deal directly with kuid_t and kgid_t and can + * instead deal with the raw numeric values that are stored + * in the filesystem. + */ +static inline uid_t i_uid_read(const struct inode *inode) +{ + return from_kuid(inode->i_sb->s_user_ns, inode->i_uid); +} + +static inline gid_t i_gid_read(const struct inode *inode) +{ + return from_kgid(inode->i_sb->s_user_ns, inode->i_gid); +} + +static inline void i_uid_write(struct inode *inode, uid_t uid) +{ + inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid); +} + +static inline void i_gid_write(struct inode *inode, gid_t gid) +{ + inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); +} + extern struct timespec current_fs_time(struct super_block *sb); /* @@ -1563,6 +1599,7 @@ extern int vfs_whiteout(struct inode *, struct dentry *); */ extern void inode_init_owner(struct inode *inode, const struct inode *dir, umode_t mode); +extern bool may_open_dev(const struct path *path); /* * VFS FS_IOC_FIEMAP helper definitions. */ @@ -1646,6 +1683,7 @@ struct file_operations { ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); + int (*iterate_shared) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); @@ -1699,8 +1737,10 @@ struct inode_operations { struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + int (*setxattr) (struct dentry *, struct inode *, + const char *, const void *, size_t, int); + ssize_t (*getxattr) (struct dentry *, struct inode *, + const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, @@ -1830,6 +1870,11 @@ struct super_operations { #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +static inline bool HAS_UNMAPPED_ID(struct inode *inode) +{ + return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid); +} + /* * Inode state bits. Protected by inode->i_lock * @@ -1978,8 +2023,6 @@ struct file_system_type { #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ -#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ -#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -2000,8 +2043,9 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, - void *data, int (*fill_super)(struct super_block *, void *, int)); +extern struct dentry *mount_ns(struct file_system_type *fs_type, + int flags, void *data, void *ns, struct user_namespace *user_ns, + int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2021,6 +2065,11 @@ void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); int get_anon_bdev(dev_t *); void free_anon_bdev(dev_t); +struct super_block *sget_userns(struct file_system_type *type, + int (*test)(struct super_block *,void *), + int (*set)(struct super_block *,void *), + int flags, struct user_namespace *user_ns, + void *data); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), @@ -2263,7 +2312,7 @@ struct filename { const char iname[]; }; -extern long vfs_truncate(struct path *, loff_t); +extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, @@ -2320,14 +2369,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -#ifdef CONFIG_FS_DAX -extern bool blkdev_dax_capable(struct block_device *bdev); -#else -static inline bool blkdev_dax_capable(struct block_device *bdev) -{ - return false; -} -#endif extern struct super_block *blockdev_superblock; @@ -2395,6 +2436,8 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, /* fs/char_dev.c */ #define CHRDEV_MAJOR_HASH_SIZE 255 +/* Marks the bottom of the first segment of free char majors */ +#define CHRDEV_MAJOR_DYN_END 234 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int __register_chrdev(unsigned int major, unsigned int baseminor, @@ -2438,15 +2481,18 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK -/* - * return READ, READA, or WRITE - */ -#define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) +static inline bool op_is_write(unsigned int op) +{ + return op == REQ_OP_READ ? false : true; +} /* * return data direction, READ or WRITE */ -#define bio_data_dir(bio) ((bio)->bi_rw & 1) +static inline int bio_data_dir(struct bio *bio) +{ + return op_is_write(bio_op(bio)) ? WRITE : READ; +} extern void check_disk_size_change(struct gendisk *disk, struct block_device *bdev); @@ -2481,17 +2527,30 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); +extern int filemap_check_errors(struct address_space *mapping); extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) -{ - if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) - return 0; - return vfs_fsync_range(file, pos, pos + count - 1, - (file->f_flags & __O_SYNC) ? 0 : 1); + +/* + * Sync the bytes written if this was a synchronous write. Expect ki_pos + * to already be updated for the write, and will return either the amount + * of bytes passed in, or an error if syncing the file failed. + */ +static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) +{ + if (iocb->ki_flags & IOCB_DSYNC) { + int ret = vfs_fsync_range(iocb->ki_filp, + iocb->ki_pos - count, iocb->ki_pos - 1, + (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); + if (ret) + return ret; + } + + return count; } + extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK @@ -2590,15 +2649,34 @@ static inline void i_readcount_inc(struct inode *inode) #endif extern int do_pipe_flags(int *, int); +#define __kernel_read_file_id(id) \ + id(UNKNOWN, unknown) \ + id(FIRMWARE, firmware) \ + id(MODULE, kernel-module) \ + id(KEXEC_IMAGE, kexec-image) \ + id(KEXEC_INITRAMFS, kexec-initramfs) \ + id(POLICY, security-policy) \ + id(MAX_ID, ) + +#define __fid_enumify(ENUM, dummy) READING_ ## ENUM, +#define __fid_stringify(dummy, str) #str, + enum kernel_read_file_id { - READING_FIRMWARE = 1, - READING_MODULE, - READING_KEXEC_IMAGE, - READING_KEXEC_INITRAMFS, - READING_POLICY, - READING_MAX_ID + __kernel_read_file_id(__fid_enumify) +}; + +static const char * const kernel_read_file_str[] = { + __kernel_read_file_id(__fid_stringify) }; +static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) +{ + if (id < 0 || id >= READING_MAX_ID) + return kernel_read_file_str[READING_UNKNOWN]; + + return kernel_read_file_str[id]; +} + extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, enum kernel_read_file_id); @@ -2690,7 +2768,7 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -extern blk_qc_t submit_bio(int, struct bio *); +extern blk_qc_t submit_bio(struct bio *); extern int bdev_read_only(struct block_device *); #endif extern int set_blocksize(struct block_device *, int); @@ -2703,7 +2781,7 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos); @@ -2745,7 +2823,7 @@ extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK -typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, +typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, loff_t file_offset); enum { @@ -2766,18 +2844,17 @@ void dio_end_io(struct bio *bio, int error); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, - loff_t offset, get_block_t get_block, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags); static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, - struct iov_iter *iter, loff_t offset, + struct iov_iter *iter, get_block_t get_block) { return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - offset, get_block, NULL, NULL, - DIO_LOCKING | DIO_SKIP_HOLES); + get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } #endif @@ -2943,6 +3020,10 @@ static inline int iocb_flags(struct file *file) res |= IOCB_APPEND; if (io_is_direct(file)) res |= IOCB_DIRECT; + if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) + res |= IOCB_DSYNC; + if (file->f_flags & __O_SYNC) + res |= IOCB_SYNC; return res; } @@ -3104,6 +3185,13 @@ static inline bool dir_relax(struct inode *inode) return !IS_DEADDIR(inode); } +static inline bool dir_relax_shared(struct inode *inode) +{ + inode_unlock_shared(inode); + inode_lock_shared(inode); + return !IS_DEADDIR(inode); +} + extern bool path_noexec(const struct path *path); extern void inode_nohighmem(struct inode *inode); |