diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/affs/dir.c | 44 | ||||
-rw-r--r-- | fs/ceph/dir.c | 1 | ||||
-rw-r--r-- | fs/ext2/dir.c | 28 | ||||
-rw-r--r-- | fs/ext4/dir.c | 50 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/inline.c | 7 | ||||
-rw-r--r-- | fs/fcntl.c | 166 | ||||
-rw-r--r-- | fs/file_table.c | 16 | ||||
-rw-r--r-- | fs/internal.h | 1 | ||||
-rw-r--r-- | fs/locks.c | 6 | ||||
-rw-r--r-- | fs/notify/dnotify/dnotify.c | 6 | ||||
-rw-r--r-- | fs/ocfs2/dir.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 11 | ||||
-rw-r--r-- | fs/ocfs2/file.h | 1 | ||||
-rw-r--r-- | fs/pipe.c | 8 | ||||
-rw-r--r-- | fs/proc/base.c | 30 | ||||
-rw-r--r-- | fs/read_write.c | 171 | ||||
-rw-r--r-- | fs/ubifs/dir.c | 64 | ||||
-rw-r--r-- | fs/udf/dir.c | 28 | ||||
-rw-r--r-- | fs/ufs/dir.c | 28 |
20 files changed, 520 insertions, 151 deletions
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index b2bf7016e1b3..bd40d5f08810 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -17,13 +17,44 @@ #include <linux/iversion.h> #include "affs.h" +struct affs_dir_data { + unsigned long ino; + u64 cookie; +}; + static int affs_readdir(struct file *, struct dir_context *); +static loff_t affs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct affs_dir_data *data = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &data->cookie); +} + +static int affs_dir_open(struct inode *inode, struct file *file) +{ + struct affs_dir_data *data; + + data = kzalloc(sizeof(struct affs_dir_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + file->private_data = data; + return 0; +} + +static int affs_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + const struct file_operations affs_dir_operations = { + .open = affs_dir_open, .read = generic_read_dir, - .llseek = generic_file_llseek, + .llseek = affs_dir_llseek, .iterate_shared = affs_readdir, .fsync = affs_file_fsync, + .release = affs_dir_release, }; /* @@ -45,6 +76,7 @@ static int affs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); + struct affs_dir_data *data = file->private_data; struct super_block *sb = inode->i_sb; struct buffer_head *dir_bh = NULL; struct buffer_head *fh_bh = NULL; @@ -59,7 +91,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos); if (ctx->pos < 2) { - file->private_data = (void *)0; + data->ino = 0; if (!dir_emit_dots(file, ctx)) return 0; } @@ -80,8 +112,8 @@ affs_readdir(struct file *file, struct dir_context *ctx) /* If the directory hasn't changed since the last call to readdir(), * we can jump directly to where we left off. */ - ino = (u32)(long)file->private_data; - if (ino && inode_eq_iversion(inode, file->f_version)) { + ino = data->ino; + if (ino && inode_eq_iversion(inode, data->cookie)) { pr_debug("readdir() left off=%d\n", ino); goto inside; } @@ -131,8 +163,8 @@ inside: } while (ino); } done: - file->f_version = inode_query_iversion(inode); - file->private_data = (void *)(long)ino; + data->cookie = inode_query_iversion(inode); + data->ino = ino; affs_brelse(fh_bh); out_brelse_dir: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 18c72b305858..ddec8c9244ee 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -707,7 +707,6 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; - file->f_version = 0; dfi->file_info.flags &= ~CEPH_F_ATEND; } retval = offset; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 2f3042d0174c..402fecf90a44 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -263,7 +263,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); - bool need_revalidate = !inode_eq_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data); bool has_filetype; if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) @@ -290,7 +290,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) offset = ext2_validate_entry(kaddr, offset, chunk_mask); ctx->pos = (n<<PAGE_SHIFT) + offset; } - file->f_version = inode_query_iversion(inode); + *(u64 *)file->private_data = inode_query_iversion(inode); need_revalidate = false; } de = (ext2_dirent *)(kaddr+offset); @@ -703,8 +703,30 @@ not_empty: return 0; } +static int ext2_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int ext2_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t ext2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + const struct file_operations ext2_dir_operations = { - .llseek = generic_file_llseek, + .open = ext2_dir_open, + .release = ext2_dir_release, + .llseek = ext2_dir_llseek, .read = generic_read_dir, .iterate_shared = ext2_readdir, .unlocked_ioctl = ext2_ioctl, diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ff4514e4626b..13196afe55ce 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -133,6 +133,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); + struct dir_private_info *info = file->private_data; err = fscrypt_prepare_readdir(inode); if (err) @@ -229,7 +230,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (!inode_eq_iversion(inode, file->f_version)) { + if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ext4_dir_entry_2 *) (bh->b_data + i); @@ -249,7 +250,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) offset = i; ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) | offset; - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); } while (ctx->pos < inode->i_size @@ -384,6 +385,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp) static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; + struct dir_private_info *info = file->private_data; int dx_dir = is_dx_dir(inode); loff_t ret, htree_max = ext4_get_htree_eof(file); @@ -392,7 +394,7 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) htree_max, htree_max); else ret = ext4_llseek(file, offset, whence); - file->f_version = inode_peek_iversion(inode) - 1; + info->cookie = inode_peek_iversion(inode) - 1; return ret; } @@ -429,18 +431,15 @@ static void free_rb_tree_fname(struct rb_root *root) *root = RB_ROOT; } - -static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp, - loff_t pos) +static void ext4_htree_init_dir_info(struct file *filp, loff_t pos) { - struct dir_private_info *p; - - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return NULL; - p->curr_hash = pos2maj_hash(filp, pos); - p->curr_minor_hash = pos2min_hash(filp, pos); - return p; + struct dir_private_info *p = filp->private_data; + + if (is_dx_dir(file_inode(filp)) && !p->initialized) { + p->curr_hash = pos2maj_hash(filp, pos); + p->curr_minor_hash = pos2min_hash(filp, pos); + p->initialized = true; + } } void ext4_htree_free_dir_info(struct dir_private_info *p) @@ -552,12 +551,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) struct fname *fname; int ret = 0; - if (!info) { - info = ext4_htree_create_dir_info(file, ctx->pos); - if (!info) - return -ENOMEM; - file->private_data = info; - } + ext4_htree_init_dir_info(file, ctx->pos); if (ctx->pos == ext4_get_htree_eof(file)) return 0; /* EOF */ @@ -590,10 +584,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) * cached entries. */ if ((!info->curr_node) || - !inode_eq_iversion(inode, file->f_version)) { + !inode_eq_iversion(inode, info->cookie)) { info->curr_node = NULL; free_rb_tree_fname(&info->root); - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); ret = ext4_htree_fill_tree(file, info->curr_hash, info->curr_minor_hash, &info->next_hash); @@ -664,7 +658,19 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, return 0; } +static int ext4_dir_open(struct inode *inode, struct file *file) +{ + struct dir_private_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + file->private_data = info; + return 0; +} + const struct file_operations ext4_dir_operations = { + .open = ext4_dir_open, .llseek = ext4_dir_llseek, .read = generic_read_dir, .iterate_shared = ext4_readdir, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c3429b664b8d..ecc15e5f1eba 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2553,6 +2553,8 @@ struct dir_private_info { __u32 curr_hash; __u32 curr_minor_hash; __u32 next_hash; + u64 cookie; + bool initialized; }; /* calculate the first block number of the group */ diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d5c2f3b75084..edf4aa99a974 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1460,6 +1460,7 @@ int ext4_read_inline_dir(struct file *file, struct ext4_iloc iloc; void *dir_buf = NULL; int dotdot_offset, dotdot_size, extra_offset, extra_size; + struct dir_private_info *info = file->private_data; ret = ext4_get_inode_loc(inode, &iloc); if (ret) @@ -1503,12 +1504,12 @@ int ext4_read_inline_dir(struct file *file, extra_size = extra_offset + inline_size; /* - * If the version has changed since the last call to + * If the cookie has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the inline * dir to make sure. */ - if (!inode_eq_iversion(inode, file->f_version)) { + if (!inode_eq_iversion(inode, info->cookie)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and @@ -1540,7 +1541,7 @@ int ext4_read_inline_dir(struct file *file, } offset = i; ctx->pos = offset; - file->f_version = inode_query_iversion(inode); + info->cookie = inode_query_iversion(inode); } while (ctx->pos < extra_size) { diff --git a/fs/fcntl.c b/fs/fcntl.c index 22ec683ad8f8..f6fde75a3bd5 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -33,6 +33,8 @@ #include <asm/siginfo.h> #include <linux/uaccess.h> +#include "internal.h" + #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned int arg) @@ -87,22 +89,64 @@ static int setfl(int fd, struct file * filp, unsigned int arg) return error; } +/* + * Allocate an file->f_owner struct if it doesn't exist, handling racing + * allocations correctly. + */ +int file_f_owner_allocate(struct file *file) +{ + struct fown_struct *f_owner; + + f_owner = file_f_owner(file); + if (f_owner) + return 0; + + f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL); + if (!f_owner) + return -ENOMEM; + + rwlock_init(&f_owner->lock); + f_owner->file = file; + /* If someone else raced us, drop our allocation. */ + if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner))) + kfree(f_owner); + return 0; +} +EXPORT_SYMBOL(file_f_owner_allocate); + +void file_f_owner_release(struct file *file) +{ + struct fown_struct *f_owner; + + f_owner = file_f_owner(file); + if (f_owner) { + put_pid(f_owner->pid); + kfree(f_owner); + } +} + static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int force) { - write_lock_irq(&filp->f_owner.lock); - if (force || !filp->f_owner.pid) { - put_pid(filp->f_owner.pid); - filp->f_owner.pid = get_pid(pid); - filp->f_owner.pid_type = type; + struct fown_struct *f_owner; + + f_owner = file_f_owner(filp); + if (WARN_ON_ONCE(!f_owner)) + return; + + write_lock_irq(&f_owner->lock); + if (force || !f_owner->pid) { + put_pid(f_owner->pid); + f_owner->pid = get_pid(pid); + f_owner->pid_type = type; if (pid) { const struct cred *cred = current_cred(); - filp->f_owner.uid = cred->uid; - filp->f_owner.euid = cred->euid; + f_owner->uid = cred->uid; + f_owner->euid = cred->euid; } } - write_unlock_irq(&filp->f_owner.lock); + write_unlock_irq(&f_owner->lock); } void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, @@ -119,6 +163,8 @@ int f_setown(struct file *filp, int who, int force) struct pid *pid = NULL; int ret = 0; + might_sleep(); + type = PIDTYPE_TGID; if (who < 0) { /* avoid overflow below */ @@ -129,6 +175,10 @@ int f_setown(struct file *filp, int who, int force) who = -who; } + ret = file_f_owner_allocate(filp); + if (ret) + return ret; + rcu_read_lock(); if (who) { pid = find_vpid(who); @@ -152,16 +202,21 @@ void f_delown(struct file *filp) pid_t f_getown(struct file *filp) { pid_t pid = 0; + struct fown_struct *f_owner; + + f_owner = file_f_owner(filp); + if (!f_owner) + return pid; - read_lock_irq(&filp->f_owner.lock); + read_lock_irq(&f_owner->lock); rcu_read_lock(); - if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { - pid = pid_vnr(filp->f_owner.pid); - if (filp->f_owner.pid_type == PIDTYPE_PGID) + if (pid_task(f_owner->pid, f_owner->pid_type)) { + pid = pid_vnr(f_owner->pid); + if (f_owner->pid_type == PIDTYPE_PGID) pid = -pid; } rcu_read_unlock(); - read_unlock_irq(&filp->f_owner.lock); + read_unlock_irq(&f_owner->lock); return pid; } @@ -194,6 +249,10 @@ static int f_setown_ex(struct file *filp, unsigned long arg) return -EINVAL; } + ret = file_f_owner_allocate(filp); + if (ret) + return ret; + rcu_read_lock(); pid = find_vpid(owner.pid); if (owner.pid && !pid) @@ -210,13 +269,20 @@ static int f_getown_ex(struct file *filp, unsigned long arg) struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner = {}; int ret = 0; + struct fown_struct *f_owner; + enum pid_type pid_type = PIDTYPE_PID; - read_lock_irq(&filp->f_owner.lock); - rcu_read_lock(); - if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) - owner.pid = pid_vnr(filp->f_owner.pid); - rcu_read_unlock(); - switch (filp->f_owner.pid_type) { + f_owner = file_f_owner(filp); + if (f_owner) { + read_lock_irq(&f_owner->lock); + rcu_read_lock(); + if (pid_task(f_owner->pid, f_owner->pid_type)) + owner.pid = pid_vnr(f_owner->pid); + rcu_read_unlock(); + pid_type = f_owner->pid_type; + } + + switch (pid_type) { case PIDTYPE_PID: owner.type = F_OWNER_TID; break; @@ -234,7 +300,8 @@ static int f_getown_ex(struct file *filp, unsigned long arg) ret = -EINVAL; break; } - read_unlock_irq(&filp->f_owner.lock); + if (f_owner) + read_unlock_irq(&f_owner->lock); if (!ret) { ret = copy_to_user(owner_p, &owner, sizeof(owner)); @@ -248,14 +315,18 @@ static int f_getown_ex(struct file *filp, unsigned long arg) static int f_getowner_uids(struct file *filp, unsigned long arg) { struct user_namespace *user_ns = current_user_ns(); + struct fown_struct *f_owner; uid_t __user *dst = (void __user *)arg; - uid_t src[2]; + uid_t src[2] = {0, 0}; int err; - read_lock_irq(&filp->f_owner.lock); - src[0] = from_kuid(user_ns, filp->f_owner.uid); - src[1] = from_kuid(user_ns, filp->f_owner.euid); - read_unlock_irq(&filp->f_owner.lock); + f_owner = file_f_owner(filp); + if (f_owner) { + read_lock_irq(&f_owner->lock); + src[0] = from_kuid(user_ns, f_owner->uid); + src[1] = from_kuid(user_ns, f_owner->euid); + read_unlock_irq(&f_owner->lock); + } err = put_user(src[0], &dst[0]); err |= put_user(src[1], &dst[1]); @@ -349,6 +420,30 @@ static long f_created_query(const struct file *filp) return !!(filp->f_mode & FMODE_CREATED); } +static int f_owner_sig(struct file *filp, int signum, bool setsig) +{ + int ret = 0; + struct fown_struct *f_owner; + + might_sleep(); + + if (setsig) { + if (!valid_signal(signum)) + return -EINVAL; + + ret = file_f_owner_allocate(filp); + if (ret) + return ret; + } + + f_owner = file_f_owner(filp); + if (setsig) + f_owner->signum = signum; + else if (f_owner) + ret = f_owner->signum; + return ret; +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -430,15 +525,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, err = f_getowner_uids(filp, arg); break; case F_GETSIG: - err = filp->f_owner.signum; + err = f_owner_sig(filp, 0, false); break; case F_SETSIG: - /* arg == 0 restores default behaviour. */ - if (!valid_signal(argi)) { - break; - } - err = 0; - filp->f_owner.signum = argi; + err = f_owner_sig(filp, argi, true); break; case F_GETLEASE: err = fcntl_getlease(filp); @@ -854,14 +944,19 @@ static void send_sigurg_to_task(struct task_struct *p, do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); } -int send_sigurg(struct fown_struct *fown) +int send_sigurg(struct file *file) { + struct fown_struct *fown; struct task_struct *p; enum pid_type type; struct pid *pid; unsigned long flags; int ret = 0; + fown = file_f_owner(file); + if (!fown) + return 0; + read_lock_irqsave(&fown->lock, flags); type = fown->pid_type; @@ -1037,13 +1132,16 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) } read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { - fown = &fa->fa_file->f_owner; + fown = file_f_owner(fa->fa_file); + if (!fown) + goto next; /* Don't send SIGURG to processes which have not set a queued signum: SIGURG has its own default signalling mechanism. */ if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } +next: read_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } diff --git a/fs/file_table.c b/fs/file_table.c index 35f2d5d9ca76..3cac9c34264f 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -156,8 +156,14 @@ static int init_file(struct file *f, int flags, const struct cred *cred) return error; } - rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); + /* + * Note that f_pos_lock is only used for files raising + * FMODE_ATOMIC_POS and directories. Other files such as pipes + * don't need it and since f_pos_lock is in a union may reuse + * the space for other purposes. They are expected to initialize + * the respective member when opening the file. + */ mutex_init(&f->f_pos_lock); f->f_flags = flags; f->f_mode = OPEN_FMODE(flags); @@ -428,7 +434,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); } fops_put(file->f_op); - put_pid(file->f_owner.pid); + file_f_owner_release(file); put_file_access(file); dput(dentry); if (unlikely(mode & FMODE_NEED_UNMOUNT)) @@ -515,9 +521,9 @@ EXPORT_SYMBOL(__fput_sync); void __init files_init(void) { - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN | - SLAB_PANIC | SLAB_ACCOUNT, NULL); + filp_cachep = kmem_cache_create_rcu("filp", sizeof(struct file), + offsetof(struct file, f_freeptr), + SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/fs/internal.h b/fs/internal.h index cdd73209eecb..8c1b7acbbe8f 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -337,3 +337,4 @@ static inline bool path_mounted(const struct path *path) { return path->mnt->mnt_root == path->dentry; } +void file_f_owner_release(struct file *file); diff --git a/fs/locks.c b/fs/locks.c index e45cad40f8b6..b51b1c395ce6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1451,7 +1451,7 @@ int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) struct file *filp = fl->c.flc_file; f_delown(filp); - filp->f_owner.signum = 0; + file_f_owner(filp)->signum = 0; fasync_helper(0, fl->c.flc_file, 0, &fl->fl_fasync); if (fl->fl_fasync != NULL) { printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); @@ -1783,6 +1783,10 @@ generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **pr lease = *flp; trace_generic_add_lease(inode, lease); + error = file_f_owner_allocate(filp); + if (error) + return error; + /* Note that arg is never F_UNLCK here */ ctx = locks_get_lock_context(inode, arg); if (!ctx) diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index f3669403fabf..46440fbb8662 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -110,7 +110,7 @@ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask, prev = &dn->dn_next; continue; } - fown = &dn->dn_filp->f_owner; + fown = file_f_owner(dn->dn_filp); send_sigio(fown, dn->dn_fd, POLL_MSG); if (dn->dn_mask & FS_DN_MULTISHOT) prev = &dn->dn_next; @@ -316,6 +316,10 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } + error = file_f_owner_allocate(filp); + if (error) + goto out_err; + /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f0beb173dbba..ccef3f42b333 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1932,6 +1932,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) { int error = 0; struct inode *inode = file_inode(file); + struct ocfs2_file_private *fp = file->private_data; int lock_level = 0; trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -1952,7 +1953,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) goto bail_nolock; } - error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false); + error = ocfs2_dir_foreach_blk(inode, &fp->cookie, ctx, false); ocfs2_inode_unlock(inode, lock_level); if (error) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index d058eb30cfc2..ad131a2fc58e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2751,6 +2751,13 @@ out_unlock: return remapped > 0 ? remapped : ret; } +static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ocfs2_file_private *fp = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &fp->cookie); +} + const struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -2798,7 +2805,7 @@ const struct file_operations ocfs2_fops = { WRAP_DIR_ITER(ocfs2_readdir) // FIXME! const struct file_operations ocfs2_dops = { - .llseek = generic_file_llseek, + .llseek = ocfs2_dir_llseek, .read = generic_read_dir, .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, @@ -2844,7 +2851,7 @@ const struct file_operations ocfs2_fops_no_plocks = { }; const struct file_operations ocfs2_dops_no_plocks = { - .llseek = generic_file_llseek, + .llseek = ocfs2_dir_llseek, .read = generic_read_dir, .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 8e53e4ac1120..41e65e45a9f3 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -20,6 +20,7 @@ struct ocfs2_alloc_context; enum ocfs2_alloc_restarted; struct ocfs2_file_private { + u64 cookie; struct file *fp_file; struct mutex fp_mutex; struct ocfs2_lock_res fp_flock; diff --git a/fs/pipe.c b/fs/pipe.c index 9a6dfe39f012..4083ba492cb6 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -686,7 +686,7 @@ pipe_poll(struct file *filp, poll_table *wait) if (filp->f_mode & FMODE_READ) { if (!pipe_empty(head, tail)) mask |= EPOLLIN | EPOLLRDNORM; - if (!pipe->writers && filp->f_version != pipe->w_counter) + if (!pipe->writers && filp->f_pipe != pipe->w_counter) mask |= EPOLLHUP; } @@ -945,6 +945,7 @@ int create_pipe_files(struct file **res, int flags) } f->private_data = inode->i_pipe; + f->f_pipe = 0; res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), &pipefifo_fops); @@ -954,6 +955,7 @@ int create_pipe_files(struct file **res, int flags) return PTR_ERR(res[0]); } res[0]->private_data = inode->i_pipe; + res[0]->f_pipe = 0; res[1] = f; stream_open(inode, res[0]); stream_open(inode, res[1]); @@ -1108,7 +1110,7 @@ static int fifo_open(struct inode *inode, struct file *filp) bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; int ret; - filp->f_version = 0; + filp->f_pipe = 0; spin_lock(&inode->i_lock); if (inode->i_pipe) { @@ -1155,7 +1157,7 @@ static int fifo_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_NONBLOCK)) { /* suppress EPOLLHUP until we have * seen a writer */ - filp->f_version = pipe->w_counter; + filp->f_pipe = pipe->w_counter; } else { if (wait_for_partner(pipe, &pipe->w_counter)) goto err_rd; diff --git a/fs/proc/base.c b/fs/proc/base.c index 1409d1003101..7f3abc3de49f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3868,12 +3868,12 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - /* f_version caches the tgid value that the last readdir call couldn't - * return. lseek aka telldir automagically resets f_version to 0. + /* We cache the tgid value that the last readdir call couldn't + * return and lseek resets it to 0. */ ns = proc_pid_ns(inode->i_sb); - tid = (int)file->f_version; - file->f_version = 0; + tid = (int)(intptr_t)file->private_data; + file->private_data = NULL; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { @@ -3888,7 +3888,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first * pid for the next readir call */ - file->f_version = (u64)tid; + file->private_data = (void *)(intptr_t)tid; put_task_struct(task); break; } @@ -3913,6 +3913,24 @@ static int proc_task_getattr(struct mnt_idmap *idmap, return 0; } +/* + * proc_task_readdir() set @file->private_data to a positive integer + * value, so casting that to u64 is safe. generic_llseek_cookie() will + * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is + * here to catch any unexpected change in behavior either in + * proc_task_readdir() or generic_llseek_cookie(). + */ +static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence) +{ + u64 cookie = (u64)(intptr_t)file->private_data; + loff_t off; + + off = generic_llseek_cookie(file, offset, whence, &cookie); + WARN_ON_ONCE(cookie > INT_MAX); + file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */ + return off; +} + static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, @@ -3923,7 +3941,7 @@ static const struct inode_operations proc_task_inode_operations = { static const struct file_operations proc_task_operations = { .read = generic_read_dir, .iterate_shared = proc_task_readdir, - .llseek = generic_file_llseek, + .llseek = proc_dir_llseek, }; void __init set_proc_pid_nlink(void) diff --git a/fs/read_write.c b/fs/read_write.c index 89d4af0e3b93..070a7c33b9dd 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -40,18 +40,20 @@ static inline bool unsigned_offsets(struct file *file) } /** - * vfs_setpos - update the file offset for lseek + * vfs_setpos_cookie - update the file offset for lseek and reset cookie * @file: file structure in question * @offset: file offset to seek to * @maxsize: maximum file size + * @cookie: cookie to reset * - * This is a low-level filesystem helper for updating the file offset to - * the value specified by @offset if the given offset is valid and it is - * not equal to the current file offset. + * Update the file offset to the value specified by @offset if the given + * offset is valid and it is not equal to the current file offset and + * reset the specified cookie to indicate that a seek happened. * * Return the specified offset on success and -EINVAL on invalid offset. */ -loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) +static loff_t vfs_setpos_cookie(struct file *file, loff_t offset, + loff_t maxsize, u64 *cookie) { if (offset < 0 && !unsigned_offsets(file)) return -EINVAL; @@ -60,35 +62,48 @@ loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) if (offset != file->f_pos) { file->f_pos = offset; - file->f_version = 0; + if (cookie) + *cookie = 0; } return offset; } -EXPORT_SYMBOL(vfs_setpos); /** - * generic_file_llseek_size - generic llseek implementation for regular files - * @file: file structure to seek on + * vfs_setpos - update the file offset for lseek + * @file: file structure in question * @offset: file offset to seek to - * @whence: type of seek - * @maxsize: max size of this file in file system - * @eof: offset used for SEEK_END position + * @maxsize: maximum file size * - * This is a variant of generic_file_llseek that allows passing in a custom - * maximum file size and a custom EOF position, for e.g. hashed directories + * This is a low-level filesystem helper for updating the file offset to + * the value specified by @offset if the given offset is valid and it is + * not equal to the current file offset. * - * Synchronization: - * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) - * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. - * read/writes behave like SEEK_SET against seeks. + * Return the specified offset on success and -EINVAL on invalid offset. */ -loff_t -generic_file_llseek_size(struct file *file, loff_t offset, int whence, - loff_t maxsize, loff_t eof) +loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) +{ + return vfs_setpos_cookie(file, offset, maxsize, NULL); +} +EXPORT_SYMBOL(vfs_setpos); + +/** + * must_set_pos - check whether f_pos has to be updated + * @file: file to seek on + * @offset: offset to use + * @whence: type of seek operation + * @eof: end of file + * + * Check whether f_pos needs to be updated and update @offset according + * to @whence. + * + * Return: 0 if f_pos doesn't need to be updated, 1 if f_pos has to be + * updated, and negative error code on failure. + */ +static int must_set_pos(struct file *file, loff_t *offset, int whence, loff_t eof) { switch (whence) { case SEEK_END: - offset += eof; + *offset += eof; break; case SEEK_CUR: /* @@ -97,23 +112,17 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, * f_pos value back to the file because a concurrent read(), * write() or lseek() might have altered it */ - if (offset == 0) - return file->f_pos; - /* - * f_lock protects against read/modify/write race with other - * SEEK_CURs. Note that parallel writes and reads behave - * like SEEK_SET. - */ - spin_lock(&file->f_lock); - offset = vfs_setpos(file, file->f_pos + offset, maxsize); - spin_unlock(&file->f_lock); - return offset; + if (*offset == 0) { + *offset = file->f_pos; + return 0; + } + break; case SEEK_DATA: /* * In the generic case the entire file is data, so as long as * offset isn't at the end of the file then the offset is data. */ - if ((unsigned long long)offset >= eof) + if ((unsigned long long)*offset >= eof) return -ENXIO; break; case SEEK_HOLE: @@ -121,17 +130,103 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence, * There is a virtual hole at the end of the file, so as long as * offset isn't i_size or larger, return i_size. */ - if ((unsigned long long)offset >= eof) + if ((unsigned long long)*offset >= eof) return -ENXIO; - offset = eof; + *offset = eof; break; } + return 1; +} + +/** + * generic_file_llseek_size - generic llseek implementation for regular files + * @file: file structure to seek on + * @offset: file offset to seek to + * @whence: type of seek + * @maxsize: max size of this file in file system + * @eof: offset used for SEEK_END position + * + * This is a variant of generic_file_llseek that allows passing in a custom + * maximum file size and a custom EOF position, for e.g. hashed directories + * + * Synchronization: + * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) + * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. + * read/writes behave like SEEK_SET against seeks. + */ +loff_t +generic_file_llseek_size(struct file *file, loff_t offset, int whence, + loff_t maxsize, loff_t eof) +{ + int ret; + + ret = must_set_pos(file, &offset, whence, eof); + if (ret < 0) + return ret; + if (ret == 0) + return offset; + + if (whence == SEEK_CUR) { + /* + * f_lock protects against read/modify/write race with + * other SEEK_CURs. Note that parallel writes and reads + * behave like SEEK_SET. + */ + guard(spinlock)(&file->f_lock); + return vfs_setpos(file, file->f_pos + offset, maxsize); + } + return vfs_setpos(file, offset, maxsize); } EXPORT_SYMBOL(generic_file_llseek_size); /** + * generic_llseek_cookie - versioned llseek implementation + * @file: file structure to seek on + * @offset: file offset to seek to + * @whence: type of seek + * @cookie: cookie to update + * + * See generic_file_llseek for a general description and locking assumptions. + * + * In contrast to generic_file_llseek, this function also resets a + * specified cookie to indicate a seek took place. + */ +loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, + u64 *cookie) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxsize = inode->i_sb->s_maxbytes; + loff_t eof = i_size_read(inode); + int ret; + + if (WARN_ON_ONCE(!cookie)) + return -EINVAL; + + /* + * Require that this is only used for directories that guarantee + * synchronization between readdir and seek so that an update to + * @cookie is correctly synchronized with concurrent readdir. + */ + if (WARN_ON_ONCE(!(file->f_mode & FMODE_ATOMIC_POS))) + return -EINVAL; + + ret = must_set_pos(file, &offset, whence, eof); + if (ret < 0) + return ret; + if (ret == 0) + return offset; + + /* No need to hold f_lock because we know that f_pos_lock is held. */ + if (whence == SEEK_CUR) + return vfs_setpos_cookie(file, file->f_pos + offset, maxsize, cookie); + + return vfs_setpos_cookie(file, offset, maxsize, cookie); +} +EXPORT_SYMBOL(generic_llseek_cookie); + +/** * generic_file_llseek - generic llseek implementation for regular files * @file: file structure to seek on * @offset: file offset to seek to @@ -270,10 +365,8 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence) } retval = -EINVAL; if (offset >= 0 || unsigned_offsets(file)) { - if (offset != file->f_pos) { + if (offset != file->f_pos) file->f_pos = offset; - file->f_version = 0; - } retval = offset; } out: diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c77ea57fe696..fda82f3e16e8 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -555,6 +555,11 @@ static unsigned int vfs_dent_type(uint8_t type) return 0; } +struct ubifs_dir_data { + struct ubifs_dent_node *dent; + u64 cookie; +}; + /* * The classical Unix view for directory is that it is a linear array of * (name, inode number) entries. Linux/VFS assumes this model as well. @@ -582,6 +587,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) struct inode *dir = file_inode(file); struct ubifs_info *c = dir->i_sb->s_fs_info; bool encrypted = IS_ENCRYPTED(dir); + struct ubifs_dir_data *data = file->private_data; dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); @@ -604,27 +610,27 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) fstr_real_len = fstr.len; } - if (file->f_version == 0) { + if (data->cookie == 0) { /* - * The file was seek'ed, which means that @file->private_data + * The file was seek'ed, which means that @data->dent * is now invalid. This may also be just the first * 'ubifs_readdir()' invocation, in which case - * @file->private_data is NULL, and the below code is + * @data->dent is NULL, and the below code is * basically a no-op. */ - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; } /* - * 'generic_file_llseek()' unconditionally sets @file->f_version to - * zero, and we use this for detecting whether the file was seek'ed. + * 'ubifs_dir_llseek()' sets @data->cookie to zero, and we use this + * for detecting whether the file was seek'ed. */ - file->f_version = 1; + data->cookie = 1; /* File positions 0 and 1 correspond to "." and ".." */ if (ctx->pos < 2) { - ubifs_assert(c, !file->private_data); + ubifs_assert(c, !data->dent); if (!dir_emit_dots(file, ctx)) { if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -641,10 +647,10 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } - dent = file->private_data; + dent = data->dent; if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. @@ -658,7 +664,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; } while (1) { @@ -701,15 +707,15 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } - kfree(file->private_data); + kfree(data->dent); ctx->pos = key_hash_flash(c, &dent->key); - file->private_data = dent; + data->dent = dent; cond_resched(); } out: - kfree(file->private_data); - file->private_data = NULL; + kfree(data->dent); + data->dent = NULL; if (encrypted) fscrypt_fname_free_buffer(&fstr); @@ -733,7 +739,10 @@ out: /* Free saved readdir() state when the directory is closed */ static int ubifs_dir_release(struct inode *dir, struct file *file) { - kfree(file->private_data); + struct ubifs_dir_data *data = file->private_data; + + kfree(data->dent); + kfree(data); file->private_data = NULL; return 0; } @@ -1712,6 +1721,24 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path, return 0; } +static int ubifs_dir_open(struct inode *inode, struct file *file) +{ + struct ubifs_dir_data *data; + + data = kzalloc(sizeof(struct ubifs_dir_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + file->private_data = data; + return 0; +} + +static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ubifs_dir_data *data = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &data->cookie); +} + const struct inode_operations ubifs_dir_inode_operations = { .lookup = ubifs_lookup, .create = ubifs_create, @@ -1732,7 +1759,8 @@ const struct inode_operations ubifs_dir_inode_operations = { }; const struct file_operations ubifs_dir_operations = { - .llseek = generic_file_llseek, + .open = ubifs_dir_open, + .llseek = ubifs_dir_llseek, .release = ubifs_dir_release, .read = generic_read_dir, .iterate_shared = ubifs_readdir, diff --git a/fs/udf/dir.c b/fs/udf/dir.c index f94f45fe2c91..5023dfe191e8 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -60,7 +60,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) * identifying beginning of dir entry (names are under user control), * we need to scan the directory from the beginning. */ - if (!inode_eq_iversion(dir, file->f_version)) { + if (!inode_eq_iversion(dir, *(u64 *)file->private_data)) { emit_pos = nf_pos; nf_pos = 0; } else { @@ -122,15 +122,37 @@ out_iter: udf_fiiter_release(&iter); out: if (pos_valid) - file->f_version = inode_query_iversion(dir); + *(u64 *)file->private_data = inode_query_iversion(dir); kfree(fname); return ret; } +static int udf_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int udf_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t udf_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + /* readdir and lookup functions */ const struct file_operations udf_dir_operations = { - .llseek = generic_file_llseek, + .open = udf_dir_open, + .release = udf_dir_release, + .llseek = udf_dir_llseek, .read = generic_read_dir, .iterate_shared = udf_readdir, .unlocked_ioctl = udf_ioctl, diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 1579561118f5..d6e6a2198971 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -416,7 +416,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); - bool need_revalidate = !inode_eq_iversion(inode, file->f_version); + bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data); unsigned flags = UFS_SB(sb)->s_flags; UFSD("BEGIN\n"); @@ -442,7 +442,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); ctx->pos = (n<<PAGE_SHIFT) + offset; } - file->f_version = inode_query_iversion(inode); + *(u64 *)file->private_data = inode_query_iversion(inode); need_revalidate = false; } de = (struct ufs_dir_entry *)(kaddr+offset); @@ -627,9 +627,31 @@ not_empty: return 0; } +static int ufs_dir_open(struct inode *inode, struct file *file) +{ + file->private_data = kzalloc(sizeof(u64), GFP_KERNEL); + if (!file->private_data) + return -ENOMEM; + return 0; +} + +static int ufs_dir_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static loff_t ufs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + return generic_llseek_cookie(file, offset, whence, + (u64 *)file->private_data); +} + const struct file_operations ufs_dir_operations = { + .open = ufs_dir_open, + .release = ufs_dir_release, .read = generic_read_dir, .iterate_shared = ufs_readdir, .fsync = generic_file_fsync, - .llseek = generic_file_llseek, + .llseek = ufs_dir_llseek, }; |