From cda7163d4e3d99db93aa38f0e825b8433c7a8452 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 30 Oct 2024 11:25:47 +1030 Subject: btrfs: fix per-subvolume RO/RW flags with new mount API [BUG] With util-linux 2.40.2, the 'mount' utility is already utilizing the new mount API. e.g: # strace mount -o subvol=subv1,ro /dev/test/scratch1 /mnt/test/ ... fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/mapper/test-scratch1", 0) = 0 fsconfig(3, FSCONFIG_SET_STRING, "subvol", "subv1", 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0 fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = 0 fsmount(3, FSMOUNT_CLOEXEC, 0) = 4 mount_setattr(4, "", AT_EMPTY_PATH, {attr_set=MOUNT_ATTR_RDONLY, attr_clr=0, propagation=0 /* MS_??? */, userns_fd=0}, 32) = 0 move_mount(4, "", AT_FDCWD, "/mnt/test", MOVE_MOUNT_F_EMPTY_PATH) = 0 But this leads to a new problem, that per-subvolume RO/RW mount no longer works, if the initial mount is RO: # mount -o subvol=subv1,ro /dev/test/scratch1 /mnt/test # mount -o rw,subvol=subv2 /dev/test/scratch1 /mnt/scratch # mount | grep mnt /dev/mapper/test-scratch1 on /mnt/test type btrfs (ro,relatime,discard=async,space_cache=v2,subvolid=256,subvol=/subv1) /dev/mapper/test-scratch1 on /mnt/scratch type btrfs (ro,relatime,discard=async,space_cache=v2,subvolid=257,subvol=/subv2) # touch /mnt/scratch/foobar touch: cannot touch '/mnt/scratch/foobar': Read-only file system This is a common use cases on distros. [CAUSE] We have a workaround for remount to handle the RO->RW change, but if the mount is using the new mount API, we do not do that, and rely on the mount tool NOT to set the ro flag. But that's not how the mount tool is doing for the new API: fsconfig(3, FSCONFIG_SET_STRING, "source", "/dev/mapper/test-scratch1", 0) = 0 fsconfig(3, FSCONFIG_SET_STRING, "subvol", "subv1", 0) = 0 fsconfig(3, FSCONFIG_SET_FLAG, "ro", NULL, 0) = 0 <<<< Setting RO flag for super block fsconfig(3, FSCONFIG_CMD_CREATE, NULL, NULL, 0) = 0 fsmount(3, FSMOUNT_CLOEXEC, 0) = 4 mount_setattr(4, "", AT_EMPTY_PATH, {attr_set=MOUNT_ATTR_RDONLY, attr_clr=0, propagation=0 /* MS_??? */, userns_fd=0}, 32) = 0 move_mount(4, "", AT_FDCWD, "/mnt/test", MOVE_MOUNT_F_EMPTY_PATH) = 0 This means we will set the super block RO at the first mount. Later RW mount will not try to reconfigure the fs to RW because the mount tool is already using the new API. This totally breaks the per-subvolume RO/RW mount behavior. [FIX] Do not skip the reconfiguration even if using the new API. The old comments are just expecting any mount tool to properly skip the RO flag set even if we specify "ro", which is not the reality. Update the comments regarding the backward compatibility on the kernel level so it works with old and new mount utilities. CC: stable@vger.kernel.org # 6.8+ Fixes: f044b318675f ("btrfs: handle the ro->rw transition for mounting different subvolumes") Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 926d7a9ed99d..c64d07134122 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1979,25 +1979,10 @@ error: * fsconfig(FSCONFIG_SET_FLAG, "ro"). This option is seen by the filesystem * in fc->sb_flags. * - * This disambiguation has rather positive consequences. Mounting a subvolume - * ro will not also turn the superblock ro. Only the mount for the subvolume - * will become ro. - * - * So, if the superblock creation request comes from the new mount API the - * caller must have explicitly done: - * - * fsconfig(FSCONFIG_SET_FLAG, "ro") - * fsmount/mount_setattr(MOUNT_ATTR_RDONLY) - * - * IOW, at some point the caller must have explicitly turned the whole - * superblock ro and we shouldn't just undo it like we did for the old mount - * API. In any case, it lets us avoid the hack in the new mount API. - * - * Consequently, the remounting hack must only be used for requests originating - * from the old mount API and should be marked for full deprecation so it can be - * turned off in a couple of years. - * - * The new mount API has no reason to support this hack. + * But, currently the util-linux mount command already utilizes the new mount + * API and is still setting fsconfig(FSCONFIG_SET_FLAG, "ro") no matter if it's + * btrfs or not, setting the whole super block RO. To make per-subvolume mounting + * work with different options work we need to keep backward compatibility. */ static struct vfsmount *btrfs_reconfigure_for_mount(struct fs_context *fc) { @@ -2019,7 +2004,7 @@ static struct vfsmount *btrfs_reconfigure_for_mount(struct fs_context *fc) if (IS_ERR(mnt)) return mnt; - if (!fc->oldapi || !ro2rw) + if (!ro2rw) return mnt; /* We need to convert to rw, call reconfigure. */ -- cgit v1.2.3-73-gaa49b From c9a75ec45f1111ef530ab186c2a7684d0a0c9245 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Nov 2024 12:11:15 +0000 Subject: btrfs: reinitialize delayed ref list after deleting it from the list At insert_delayed_ref() if we need to update the action of an existing ref to BTRFS_DROP_DELAYED_REF, we delete the ref from its ref head's ref_add_list using list_del(), which leaves the ref's add_list member not reinitialized, as list_del() sets the next and prev members of the list to LIST_POISON1 and LIST_POISON2, respectively. If later we end up calling drop_delayed_ref() against the ref, which can happen during merging or when destroying delayed refs due to a transaction abort, we can trigger a crash since at drop_delayed_ref() we call list_empty() against the ref's add_list, which returns false since the list was not reinitialized after the list_del() and as a consequence we call list_del() again at drop_delayed_ref(). This results in an invalid list access since the next and prev members are set to poison pointers, resulting in a splat if CONFIG_LIST_HARDENED and CONFIG_DEBUG_LIST are set or invalid poison pointer dereferences otherwise. So fix this by deleting from the list with list_del_init() instead. Fixes: 1d57ee941692 ("btrfs: improve delayed refs iterations") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 115b90d29b1d..65d841d7142c 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -649,7 +649,7 @@ static bool insert_delayed_ref(struct btrfs_trans_handle *trans, &href->ref_add_list); else if (ref->action == BTRFS_DROP_DELAYED_REF) { ASSERT(!list_empty(&exist->add_list)); - list_del(&exist->add_list); + list_del_init(&exist->add_list); } else { ASSERT(0); } -- cgit v1.2.3-73-gaa49b From 2b084d8205949dd804e279df8e68531da78be1e8 Mon Sep 17 00:00:00 2001 From: Haisu Wang Date: Fri, 25 Oct 2024 14:54:40 +0800 Subject: btrfs: fix the length of reserved qgroup to free The dealloc flag may be cleared and the extent won't reach the disk in cow_file_range when errors path. The reserved qgroup space is freed in commit 30479f31d44d ("btrfs: fix qgroup reserve leaks in cow_file_range"). However, the length of untouched region to free needs to be adjusted with the correct remaining region size. Fixes: 30479f31d44d ("btrfs: fix qgroup reserve leaks in cow_file_range") CC: stable@vger.kernel.org # 6.11+ Reviewed-by: Qu Wenruo Reviewed-by: Boris Burkov Signed-off-by: Haisu Wang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c6d257fb40af..a97dfc72a80a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1618,7 +1618,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_folio, &cached, clear_bits, page_ops); - btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); + btrfs_qgroup_free_data(inode, NULL, start, end - start + 1, NULL); } return ret; } -- cgit v1.2.3-73-gaa49b