From f3a608827d1f8de0dd12813e8d9c6803fe64e119 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 8 Feb 2024 18:47:35 +0100 Subject: bdev: open block device as files Add two new helpers to allow opening block devices as files. This is not the final infrastructure. This still opens the block device before opening a struct a file. Until we have removed all references to struct bdev_handle we can't switch the order: * Introduce blk_to_file_flags() to translate from block specific to flags usable to pen a new file. * Introduce bdev_file_open_by_{dev,path}(). * Introduce temporary sb_bdev_handle() helper to retrieve a struct bdev_handle from a block device file and update places that directly reference struct bdev_handle to rely on it. * Don't count block device openes against the number of open files. A bdev_file_open_by_{dev,path}() file is never installed into any file descriptor table. One idea that came to mind was to use kernel_tmpfile_open() which would require us to pass a path and it would then call do_dentry_open() going through the regular fops->open::blkdev_open() path. But then we're back to the problem of routing block specific flags such as BLK_OPEN_RESTRICT_WRITES through the open path and would have to waste FMODE_* flags every time we add a new one. With this we can avoid using a flag bit and we have more leeway in how we open block devices from bdev_open_by_{dev,path}(). Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-1-adbd023e19cc@kernel.org Signed-off-by: Christian Brauner --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99e4f5e72213..76706aa47316 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include #include #include +#include struct module; struct request_queue; @@ -1474,6 +1475,7 @@ extern const struct blk_holder_ops fs_holder_ops; (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) +/* @bdev_handle will be removed soon. */ struct bdev_handle { struct block_device *bdev; void *holder; @@ -1484,6 +1486,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); +struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops); +struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, + void *holder, const struct blk_holder_ops *hops); int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); @@ -1494,6 +1500,7 @@ struct block_device *blkdev_get_no_open(dev_t dev); void blkdev_put_no_open(struct block_device *bdev); struct block_device *I_BDEV(struct inode *inode); +struct block_device *file_bdev(struct file *bdev_file); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); -- cgit From e97d06a46526d9392cbdbd7eda193091e1af2723 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:44 +0100 Subject: bdev: remove bdev_open_by_path() Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-27-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/bdev.c | 40 ---------------------------------------- include/linux/blkdev.h | 2 -- 2 files changed, 42 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bdev.c b/block/bdev.c index e1149652c532..4003f8e1782a 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1004,46 +1004,6 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, } EXPORT_SYMBOL(bdev_file_open_by_path); -/** - * bdev_open_by_path - open a block device by name - * @path: path to the block device to open - * @mode: open mode (BLK_OPEN_*) - * @holder: exclusive holder identifier - * @hops: holder operations - * - * Open the block device described by the device file at @path. If @holder is - * not %NULL, the block device is opened with exclusive access. Exclusive opens - * may nest for the same @holder. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * Handle with a reference to the block_device on success, ERR_PTR(-errno) on - * failure. - */ -struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops) -{ - struct bdev_handle *handle; - dev_t dev; - int error; - - error = lookup_bdev(path, &dev); - if (error) - return ERR_PTR(error); - - handle = bdev_open_by_dev(dev, mode, holder, hops); - if (!IS_ERR(handle) && (mode & BLK_OPEN_WRITE) && - bdev_read_only(handle->bdev)) { - bdev_release(handle); - return ERR_PTR(-EACCES); - } - - return handle; -} -EXPORT_SYMBOL(bdev_open_by_path); - void bdev_release(struct bdev_handle *handle) { struct block_device *bdev = handle->bdev; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76706aa47316..5880d5abfebe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1484,8 +1484,6 @@ struct bdev_handle { struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); -struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, - void *holder, const struct blk_holder_ops *hops); struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, -- cgit From b1211a25c4fe3443cfef4ed7c39251502a663776 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:45 +0100 Subject: bdev: make bdev_{release, open_by_dev}() private to block layer Move both of them to the private block header. There's no caller in the tree anymore that uses them directly. Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-28-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- block/bdev.c | 2 -- block/blk.h | 4 ++++ include/linux/blkdev.h | 3 --- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bdev.c b/block/bdev.c index 4003f8e1782a..e6e46f24a89a 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -916,7 +916,6 @@ free_handle: kfree(handle); return ERR_PTR(ret); } -EXPORT_SYMBOL(bdev_open_by_dev); /* * If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk @@ -1042,7 +1041,6 @@ void bdev_release(struct bdev_handle *handle) blkdev_put_no_open(bdev); kfree(handle); } -EXPORT_SYMBOL(bdev_release); /** * lookup_bdev() - Look up a struct block_device by name. diff --git a/block/blk.h b/block/blk.h index 1ef920f72e0f..c9630774767d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -516,4 +516,8 @@ static inline int req_ref_read(struct request *req) return atomic_read(&req->ref); } +void bdev_release(struct bdev_handle *handle); +struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops); + #endif /* BLK_INTERNAL_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5880d5abfebe..495f55587207 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1482,8 +1482,6 @@ struct bdev_handle { blk_mode_t mode; }; -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops); struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, @@ -1491,7 +1489,6 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void bdev_release(struct bdev_handle *handle); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); -- cgit From a56aefca8d386181415a1fb7cfec2f72b0404797 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 23 Jan 2024 14:26:46 +0100 Subject: bdev: make struct bdev_handle private to the block layer Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-29-adbd023e19cc@kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- block/bdev.c | 119 +++++++++++++++++++++++++------------------------ block/blk.h | 12 +++-- block/fops.c | 37 +++++++-------- include/linux/blkdev.h | 7 --- include/linux/fs.h | 6 --- 5 files changed, 86 insertions(+), 95 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/bdev.c b/block/bdev.c index e6e46f24a89a..8f33f160e923 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -703,6 +703,24 @@ out_blkdev_put: return ret; } +int bdev_permission(dev_t dev, blk_mode_t mode, void *holder) +{ + int ret; + + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, + MAJOR(dev), MINOR(dev), + ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | + ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); + if (ret) + return ret; + + /* Blocking writes requires exclusive opener */ + if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) + return -EINVAL; + + return 0; +} + static void blkdev_put_part(struct block_device *part) { struct block_device *whole = bdev_whole(part); @@ -795,69 +813,43 @@ static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode) } /** - * bdev_open_by_dev - open a block device by device number - * @dev: device number of block device to open + * bdev_open - open a block device + * @bdev: block device to open * @mode: open mode (BLK_OPEN_*) * @holder: exclusive holder identifier * @hops: holder operations + * @bdev_file: file for the block device * - * Open the block device described by device number @dev. If @holder is not - * %NULL, the block device is opened with exclusive access. Exclusive opens may - * nest for the same @holder. - * - * Use this interface ONLY if you really do not have anything better - i.e. when - * you are behind a truly sucky interface and all you are given is a device - * number. Everything else should use bdev_open_by_path(). + * Open the block device. If @holder is not %NULL, the block device is opened + * with exclusive access. Exclusive opens may nest for the same @holder. * * CONTEXT: * Might sleep. * * RETURNS: - * Handle with a reference to the block_device on success, ERR_PTR(-errno) on - * failure. + * zero on success, -errno on failure. */ -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops) +int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops, struct file *bdev_file) { - struct bdev_handle *handle = kmalloc(sizeof(struct bdev_handle), - GFP_KERNEL); - struct block_device *bdev; + struct bdev_handle *handle; bool unblock_events = true; - struct gendisk *disk; + struct gendisk *disk = bdev->bd_disk; int ret; + handle = kmalloc(sizeof(struct bdev_handle), GFP_KERNEL); if (!handle) - return ERR_PTR(-ENOMEM); - - ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, - MAJOR(dev), MINOR(dev), - ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) | - ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0)); - if (ret) - goto free_handle; - - /* Blocking writes requires exclusive opener */ - if (mode & BLK_OPEN_RESTRICT_WRITES && !holder) { - ret = -EINVAL; - goto free_handle; - } - - bdev = blkdev_get_no_open(dev); - if (!bdev) { - ret = -ENXIO; - goto free_handle; - } - disk = bdev->bd_disk; + return -ENOMEM; if (holder) { mode |= BLK_OPEN_EXCL; ret = bd_prepare_to_claim(bdev, holder, hops); if (ret) - goto put_blkdev; + goto free_handle; } else { if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) { ret = -EIO; - goto put_blkdev; + goto free_handle; } } @@ -902,7 +894,16 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, handle->bdev = bdev; handle->holder = holder; handle->mode = mode; - return handle; + + bdev_file->f_flags |= O_LARGEFILE; + bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; + if (bdev_nowait(bdev)) + bdev_file->f_mode |= FMODE_NOWAIT; + bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping; + bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); + bdev_file->private_data = handle; + + return 0; put_module: module_put(disk->fops->owner); abort_claiming: @@ -910,11 +911,9 @@ abort_claiming: bd_abort_claiming(bdev, holder); mutex_unlock(&disk->open_mutex); disk_unblock_events(disk); -put_blkdev: - blkdev_put_no_open(bdev); free_handle: kfree(handle); - return ERR_PTR(ret); + return ret; } /* @@ -951,29 +950,33 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops) { struct file *bdev_file; - struct bdev_handle *handle; + struct block_device *bdev; unsigned int flags; + int ret; - handle = bdev_open_by_dev(dev, mode, holder, hops); - if (IS_ERR(handle)) - return ERR_CAST(handle); + ret = bdev_permission(dev, mode, holder); + if (ret) + return ERR_PTR(ret); + + bdev = blkdev_get_no_open(dev); + if (!bdev) + return ERR_PTR(-ENXIO); flags = blk_to_file_flags(mode); - bdev_file = alloc_file_pseudo_noaccount(handle->bdev->bd_inode, + bdev_file = alloc_file_pseudo_noaccount(bdev->bd_inode, blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops); if (IS_ERR(bdev_file)) { - bdev_release(handle); + blkdev_put_no_open(bdev); return bdev_file; } - ihold(handle->bdev->bd_inode); + ihold(bdev->bd_inode); - bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; - if (bdev_nowait(handle->bdev)) - bdev_file->f_mode |= FMODE_NOWAIT; - - bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping; - bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping); - bdev_file->private_data = handle; + ret = bdev_open(bdev, mode, holder, hops, bdev_file); + if (ret) { + blkdev_put_no_open(bdev); + fput(bdev_file); + return ERR_PTR(ret); + } return bdev_file; } EXPORT_SYMBOL(bdev_file_open_by_dev); diff --git a/block/blk.h b/block/blk.h index c9630774767d..19b15870284f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -25,6 +25,12 @@ struct blk_flush_queue { struct request *flush_rq; }; +struct bdev_handle { + struct block_device *bdev; + void *holder; + blk_mode_t mode; +}; + bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, @@ -517,7 +523,7 @@ static inline int req_ref_read(struct request *req) } void bdev_release(struct bdev_handle *handle); -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, - const struct blk_holder_ops *hops); - +int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, + const struct blk_holder_ops *hops, struct file *bdev_file); +int bdev_permission(dev_t dev, blk_mode_t mode, void *holder); #endif /* BLK_INTERNAL_H */ diff --git a/block/fops.c b/block/fops.c index 0cf8cf72cdfa..a1ba1a50ae77 100644 --- a/block/fops.c +++ b/block/fops.c @@ -599,36 +599,31 @@ blk_mode_t file_to_blk_mode(struct file *file) static int blkdev_open(struct inode *inode, struct file *filp) { - struct bdev_handle *handle; + struct block_device *bdev; blk_mode_t mode; - - /* - * Preserve backwards compatibility and allow large file access - * even if userspace doesn't ask for it explicitly. Some mkfs - * binary needs it. We might want to drop this workaround - * during an unstable branch. - */ - filp->f_flags |= O_LARGEFILE; - filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; + void *holder; + int ret; mode = file_to_blk_mode(filp); - handle = bdev_open_by_dev(inode->i_rdev, mode, - mode & BLK_OPEN_EXCL ? filp : NULL, NULL); - if (IS_ERR(handle)) - return PTR_ERR(handle); + holder = mode & BLK_OPEN_EXCL ? filp : NULL; + ret = bdev_permission(inode->i_rdev, mode, holder); + if (ret) + return ret; - if (bdev_nowait(handle->bdev)) - filp->f_mode |= FMODE_NOWAIT; + bdev = blkdev_get_no_open(inode->i_rdev); + if (!bdev) + return -ENXIO; - filp->f_mapping = handle->bdev->bd_inode->i_mapping; - filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); - filp->private_data = handle; - return 0; + ret = bdev_open(bdev, mode, holder, NULL, filp); + if (ret) + blkdev_put_no_open(bdev); + return ret; } static int blkdev_release(struct inode *inode, struct file *filp) { - bdev_release(filp->private_data); + if (filp->private_data) + bdev_release(filp->private_data); return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 495f55587207..2f5dbde23094 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1475,13 +1475,6 @@ extern const struct blk_holder_ops fs_holder_ops; (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) -/* @bdev_handle will be removed soon. */ -struct bdev_handle { - struct block_device *bdev; - void *holder; - blk_mode_t mode; -}; - struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, diff --git a/include/linux/fs.h b/include/linux/fs.h index e9291e27cc47..6e0714d35d9b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1327,12 +1327,6 @@ struct super_block { struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; -/* Temporary helper that will go away. */ -static inline struct bdev_handle *sb_bdev_handle(struct super_block *sb) -{ - return sb->s_bdev_file->private_data; -} - static inline struct user_namespace *i_user_ns(const struct inode *inode) { return inode->i_sb->s_user_ns; -- cgit