diff options
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r-- | include/linux/fs.h | 287 |
1 files changed, 191 insertions, 96 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..33f0e96db06f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -74,7 +74,6 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); -typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 @@ -153,9 +152,10 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); #define CHECK_IOVEC_ONLY -1 /* - * The below are the various read and write types that we support. Some of + * The below are the various read and write flags that we support. Some of * them include behavioral modifiers that send information down to the - * block layer and IO scheduler. Terminology: + * block layer and IO scheduler. They should be used along with a req_op. + * Terminology: * * The block layer uses device plugging to defer IO a little bit, in * the hope that we will see more IO very shortly. This increases @@ -178,9 +178,6 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); * READ_SYNC A synchronous read. Device is not plugged, caller can * immediately wait on this read without caring about * unplugging. - * READA Used for read-ahead operations. Lower priority, and the - * block layer could (in theory) choose to ignore this - * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO @@ -194,19 +191,17 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); * non-volatile media on completion. * */ -#define RW_MASK REQ_WRITE -#define RWA_MASK REQ_RAHEAD +#define RW_MASK REQ_OP_WRITE -#define READ 0 -#define WRITE RW_MASK -#define READA RWA_MASK +#define READ REQ_OP_READ +#define WRITE REQ_OP_WRITE -#define READ_SYNC (READ | REQ_SYNC) -#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) -#define WRITE_ODIRECT (WRITE | REQ_SYNC) -#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) -#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) -#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define READ_SYNC REQ_SYNC +#define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) +#define WRITE_ODIRECT REQ_SYNC +#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH) +#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA) +#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) /* * Attribute flags. These should be or-ed together to figure out what @@ -323,6 +318,8 @@ struct writeback_control; #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) #define IOCB_HIPRI (1 << 3) +#define IOCB_DSYNC (1 << 4) +#define IOCB_SYNC (1 << 5) struct kiocb { struct file *ki_filp; @@ -394,13 +391,15 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); - ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* * migrate the contents of a page to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); + bool (*isolate_page)(struct page *, isolate_mode_t); + void (*putback_page)(struct page *); int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); @@ -458,7 +457,6 @@ struct block_device { struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ - struct list_head bd_inodes; void * bd_claiming; void * bd_holder; int bd_holders; @@ -577,6 +575,18 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +static inline struct posix_acl * +uncached_acl_sentinel(struct task_struct *task) +{ + return (void *)task + 1; +} + +static inline bool +is_uncached_acl(struct posix_acl *acl) +{ + return (long)acl & 1; +} + #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 @@ -635,7 +645,7 @@ struct inode { /* Misc */ unsigned long i_state; - struct mutex i_mutex; + struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; @@ -652,6 +662,7 @@ struct inode { #endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; + struct list_head i_wb_list; /* backing dev writeback list */ union { struct hlist_head i_dentry; struct rcu_head i_rcu; @@ -672,6 +683,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; + unsigned i_dir_seq; }; __u32 i_generation; @@ -721,27 +733,42 @@ enum inode_i_mutex_lock_class static inline void inode_lock(struct inode *inode) { - mutex_lock(&inode->i_mutex); + down_write(&inode->i_rwsem); } static inline void inode_unlock(struct inode *inode) { - mutex_unlock(&inode->i_mutex); + up_write(&inode->i_rwsem); +} + +static inline void inode_lock_shared(struct inode *inode) +{ + down_read(&inode->i_rwsem); +} + +static inline void inode_unlock_shared(struct inode *inode) +{ + up_read(&inode->i_rwsem); } static inline int inode_trylock(struct inode *inode) { - return mutex_trylock(&inode->i_mutex); + return down_write_trylock(&inode->i_rwsem); +} + +static inline int inode_trylock_shared(struct inode *inode) +{ + return down_read_trylock(&inode->i_rwsem); } static inline int inode_is_locked(struct inode *inode) { - return mutex_is_locked(&inode->i_mutex); + return rwsem_is_locked(&inode->i_rwsem); } static inline void inode_lock_nested(struct inode *inode, unsigned subclass) { - mutex_lock_nested(&inode->i_mutex, subclass); + down_write_nested(&inode->i_rwsem, subclass); } void lock_two_nondirectories(struct inode *, struct inode*); @@ -802,31 +829,6 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) #endif } -/* Helper functions so that in most cases filesystems will - * not need to deal directly with kuid_t and kgid_t and can - * instead deal with the raw numeric values that are stored - * in the filesystem. - */ -static inline uid_t i_uid_read(const struct inode *inode) -{ - return from_kuid(&init_user_ns, inode->i_uid); -} - -static inline gid_t i_gid_read(const struct inode *inode) -{ - return from_kgid(&init_user_ns, inode->i_gid); -} - -static inline void i_uid_write(struct inode *inode, uid_t uid) -{ - inode->i_uid = make_kuid(&init_user_ns, uid); -} - -static inline void i_gid_write(struct inode *inode, gid_t gid) -{ - inode->i_gid = make_kgid(&init_user_ns, gid); -} - static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); @@ -929,7 +931,7 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) #elif BITS_PER_LONG==64 #define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) #endif @@ -1241,6 +1243,11 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +static inline struct dentry *file_dentry(const struct file *file) +{ + return d_real(file->f_path.dentry, file_inode(file), 0); +} + static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); @@ -1288,6 +1295,10 @@ struct mm_struct; /* sb->s_iflags */ #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ +#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ + +/* sb->s_iflags to limit user namespace mounts */ +#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ /* Possible states of 'frozen' field */ enum { @@ -1391,6 +1402,13 @@ struct super_block { struct hlist_head s_pins; /* + * Owning user namespace and default context in which to + * interpret filesystem uids, gids, quotas, device nodes, + * xattrs and security labels. + */ + struct user_namespace *s_user_ns; + + /* * Keep the lru lists last in the structure so they always sit on their * own individual cachelines. */ @@ -1409,8 +1427,36 @@ struct super_block { /* s_inode_list_lock protects s_inodes */ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; /* all inodes */ + + spinlock_t s_inode_wblist_lock; + struct list_head s_inodes_wb; /* writeback inodes */ }; +/* Helper functions so that in most cases filesystems will + * not need to deal directly with kuid_t and kgid_t and can + * instead deal with the raw numeric values that are stored + * in the filesystem. + */ +static inline uid_t i_uid_read(const struct inode *inode) +{ + return from_kuid(inode->i_sb->s_user_ns, inode->i_uid); +} + +static inline gid_t i_gid_read(const struct inode *inode) +{ + return from_kgid(inode->i_sb->s_user_ns, inode->i_gid); +} + +static inline void i_uid_write(struct inode *inode, uid_t uid) +{ + inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid); +} + +static inline void i_gid_write(struct inode *inode, gid_t gid) +{ + inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); +} + extern struct timespec current_fs_time(struct super_block *sb); /* @@ -1553,6 +1599,7 @@ extern int vfs_whiteout(struct inode *, struct dentry *); */ extern void inode_init_owner(struct inode *inode, const struct inode *dir, umode_t mode); +extern bool may_open_dev(const struct path *path); /* * VFS FS_IOC_FIEMAP helper definitions. */ @@ -1636,6 +1683,7 @@ struct file_operations { ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); + int (*iterate_shared) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); @@ -1689,8 +1737,10 @@ struct inode_operations { struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + int (*setxattr) (struct dentry *, struct inode *, + const char *, const void *, size_t, int); + ssize_t (*getxattr) (struct dentry *, struct inode *, + const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, @@ -1820,6 +1870,11 @@ struct super_operations { #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +static inline bool HAS_UNMAPPED_ID(struct inode *inode) +{ + return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid); +} + /* * Inode state bits. Protected by inode->i_lock * @@ -1968,8 +2023,6 @@ struct file_system_type { #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ -#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ -#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -1990,8 +2043,9 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, - void *data, int (*fill_super)(struct super_block *, void *, int)); +extern struct dentry *mount_ns(struct file_system_type *fs_type, + int flags, void *data, void *ns, struct user_namespace *user_ns, + int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2011,6 +2065,11 @@ void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); int get_anon_bdev(dev_t *); void free_anon_bdev(dev_t); +struct super_block *sget_userns(struct file_system_type *type, + int (*test)(struct super_block *,void *), + int (*set)(struct super_block *,void *), + int flags, struct user_namespace *user_ns, + void *data); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), @@ -2067,7 +2126,7 @@ extern int generic_update_time(struct inode *, struct timespec *, int); /* /sys/fs */ extern struct kobject *fs_kobj; -#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) +#define MAX_RW_COUNT (INT_MAX & PAGE_MASK) #ifdef CONFIG_MANDATORY_FILE_LOCKING extern int locks_mandatory_locked(struct file *); @@ -2253,7 +2312,7 @@ struct filename { const char iname[]; }; -extern long vfs_truncate(struct path *, loff_t); +extern long vfs_truncate(const struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, @@ -2310,14 +2369,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -#ifdef CONFIG_FS_DAX -extern bool blkdev_dax_capable(struct block_device *bdev); -#else -static inline bool blkdev_dax_capable(struct block_device *bdev) -{ - return false; -} -#endif extern struct super_block *blockdev_superblock; @@ -2385,6 +2436,8 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, /* fs/char_dev.c */ #define CHRDEV_MAJOR_HASH_SIZE 255 +/* Marks the bottom of the first segment of free char majors */ +#define CHRDEV_MAJOR_DYN_END 234 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int __register_chrdev(unsigned int major, unsigned int baseminor, @@ -2427,16 +2480,20 @@ extern void init_special_inode(struct inode *, umode_t, dev_t); extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); +static inline bool op_is_write(unsigned int op) +{ + return op == REQ_OP_READ ? false : true; +} + #ifdef CONFIG_BLOCK -/* - * return READ, READA, or WRITE - */ -#define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) /* * return data direction, READ or WRITE */ -#define bio_data_dir(bio) ((bio)->bi_rw & 1) +static inline int bio_data_dir(struct bio *bio) +{ + return op_is_write(bio_op(bio)) ? WRITE : READ; +} extern void check_disk_size_change(struct gendisk *disk, struct block_device *bdev); @@ -2471,17 +2528,30 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); +extern int filemap_check_errors(struct address_space *mapping); extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) -{ - if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) - return 0; - return vfs_fsync_range(file, pos, pos + count - 1, - (file->f_flags & __O_SYNC) ? 0 : 1); + +/* + * Sync the bytes written if this was a synchronous write. Expect ki_pos + * to already be updated for the write, and will return either the amount + * of bytes passed in, or an error if syncing the file failed. + */ +static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) +{ + if (iocb->ki_flags & IOCB_DSYNC) { + int ret = vfs_fsync_range(iocb->ki_filp, + iocb->ki_pos - count, iocb->ki_pos - 1, + (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); + if (ret) + return ret; + } + + return count; } + extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK @@ -2580,15 +2650,35 @@ static inline void i_readcount_inc(struct inode *inode) #endif extern int do_pipe_flags(int *, int); +#define __kernel_read_file_id(id) \ + id(UNKNOWN, unknown) \ + id(FIRMWARE, firmware) \ + id(FIRMWARE_PREALLOC_BUFFER, firmware) \ + id(MODULE, kernel-module) \ + id(KEXEC_IMAGE, kexec-image) \ + id(KEXEC_INITRAMFS, kexec-initramfs) \ + id(POLICY, security-policy) \ + id(MAX_ID, ) + +#define __fid_enumify(ENUM, dummy) READING_ ## ENUM, +#define __fid_stringify(dummy, str) #str, + enum kernel_read_file_id { - READING_FIRMWARE = 1, - READING_MODULE, - READING_KEXEC_IMAGE, - READING_KEXEC_INITRAMFS, - READING_POLICY, - READING_MAX_ID + __kernel_read_file_id(__fid_enumify) +}; + +static const char * const kernel_read_file_str[] = { + __kernel_read_file_id(__fid_stringify) }; +static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) +{ + if (id < 0 || id >= READING_MAX_ID) + return kernel_read_file_str[READING_UNKNOWN]; + + return kernel_read_file_str[id]; +} + extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, enum kernel_read_file_id); @@ -2658,11 +2748,6 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); -extern int dentry_needs_remove_privs(struct dentry *dentry); -static inline int file_needs_remove_privs(struct file *file) -{ - return dentry_needs_remove_privs(file->f_path.dentry); -} extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) @@ -2680,7 +2765,7 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -extern blk_qc_t submit_bio(int, struct bio *); +extern blk_qc_t submit_bio(struct bio *); extern int bdev_read_only(struct block_device *); #endif extern int set_blocksize(struct block_device *, int); @@ -2693,7 +2778,7 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); -extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos); @@ -2735,7 +2820,7 @@ extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK -typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, +typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, loff_t file_offset); enum { @@ -2756,18 +2841,17 @@ void dio_end_io(struct bio *bio, int error); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, - loff_t offset, get_block_t get_block, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags); static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, - struct iov_iter *iter, loff_t offset, + struct iov_iter *iter, get_block_t get_block) { return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, - offset, get_block, NULL, NULL, - DIO_LOCKING | DIO_SKIP_HOLES); + get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } #endif @@ -2933,6 +3017,10 @@ static inline int iocb_flags(struct file *file) res |= IOCB_APPEND; if (io_is_direct(file)) res |= IOCB_DIRECT; + if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host)) + res |= IOCB_DSYNC; + if (file->f_flags & __O_SYNC) + res |= IOCB_SYNC; return res; } @@ -3094,6 +3182,13 @@ static inline bool dir_relax(struct inode *inode) return !IS_DEADDIR(inode); } +static inline bool dir_relax_shared(struct inode *inode) +{ + inode_unlock_shared(inode); + inode_lock_shared(inode); + return !IS_DEADDIR(inode); +} + extern bool path_noexec(const struct path *path); extern void inode_nohighmem(struct inode *inode); |