From 25d202ed820ee347edec0bf3bf553544556bf64b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 Oct 2018 10:21:38 -0500 Subject: mount: Retest MNT_LOCKED in do_umount It was recently pointed out that the one instance of testing MNT_LOCKED outside of the namespace_sem is in ksys_umount. Fix that by adding a test inside of do_umount with namespace_sem and the mount_lock held. As it helps to fail fails the existing test is maintained with an additional comment pointing out that it may be racy because the locks are not held. Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 98d27da43304..72f10c40fe3f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1540,8 +1540,13 @@ static int do_umount(struct mount *mnt, int flags) namespace_lock(); lock_mount_hash(); - event++; + /* Recheck MNT_LOCKED with the locks held */ + retval = -EINVAL; + if (mnt->mnt.mnt_flags & MNT_LOCKED) + goto out; + + event++; if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) umount_tree(mnt, UMOUNT_PROPAGATE); @@ -1555,6 +1560,7 @@ static int do_umount(struct mount *mnt, int flags) retval = 0; } } +out: unlock_mount_hash(); namespace_unlock(); return retval; @@ -1645,7 +1651,7 @@ int ksys_umount(char __user *name, int flags) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; - if (mnt->mnt.mnt_flags & MNT_LOCKED) + if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ goto dput_and_out; retval = -EPERM; if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) -- cgit From df7342b240185d58d3d9665c0bbf0a0f5570ec29 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Oct 2018 09:04:18 -0500 Subject: mount: Don't allow copying MNT_UNBINDABLE|MNT_LOCKED mounts Jonathan Calmels from NVIDIA reported that he's able to bypass the mount visibility security check in place in the Linux kernel by using a combination of the unbindable property along with the private mount propagation option to allow a unprivileged user to see a path which was purposefully hidden by the root user. Reproducer: # Hide a path to all users using a tmpfs root@castiana:~# mount -t tmpfs tmpfs /sys/devices/ root@castiana:~# # As an unprivileged user, unshare user namespace and mount namespace stgraber@castiana:~$ unshare -U -m -r # Confirm the path is still not accessible root@castiana:~# ls /sys/devices/ # Make /sys recursively unbindable and private root@castiana:~# mount --make-runbindable /sys root@castiana:~# mount --make-private /sys # Recursively bind-mount the rest of /sys over to /mnnt root@castiana:~# mount --rbind /sys/ /mnt # Access our hidden /sys/device as an unprivileged user root@castiana:~# ls /mnt/devices/ breakpoint cpu cstate_core cstate_pkg i915 intel_pt isa kprobe LNXSYSTM:00 msr pci0000:00 platform pnp0 power software system tracepoint uncore_arb uncore_cbox_0 uncore_cbox_1 uprobe virtual Solve this by teaching copy_tree to fail if a mount turns out to be both unbindable and locked. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Jonathan Calmels Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 72f10c40fe3f..e0e0f9cf6c30 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1734,8 +1734,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, for (s = r; s; s = next_mnt(s, r)) { if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(s)) { - s = skip_mnt_tree(s); - continue; + if (s->mnt.mnt_flags & MNT_LOCKED) { + /* Both unbindable and locked. */ + q = ERR_PTR(-EPERM); + goto out; + } else { + s = skip_mnt_tree(s); + continue; + } } if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(s->mnt.mnt_root)) { -- cgit From 9c8e0a1b683525464a2abe9fb4b54404a50ed2b4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Oct 2018 12:05:11 -0500 Subject: mount: Prevent MNT_DETACH from disconnecting locked mounts Timothy Baldwin wrote: > As per mount_namespaces(7) unprivileged users should not be able to look under mount points: > > Mounts that come as a single unit from more privileged mount are locked > together and may not be separated in a less privileged mount namespace. > > However they can: > > 1. Create a mount namespace. > 2. In the mount namespace open a file descriptor to the parent of a mount point. > 3. Destroy the mount namespace. > 4. Use the file descriptor to look under the mount point. > > I have reproduced this with Linux 4.16.18 and Linux 4.18-rc8. > > The setup: > > $ sudo sysctl kernel.unprivileged_userns_clone=1 > kernel.unprivileged_userns_clone = 1 > $ mkdir -p A/B/Secret > $ sudo mount -t tmpfs hide A/B > > > "Secret" is indeed hidden as expected: > > $ ls -lR A > A: > total 0 > drwxrwxrwt 2 root root 40 Feb 12 21:08 B > > A/B: > total 0 > > > The attack revealing "Secret": > > $ unshare -Umr sh -c "exec unshare -m ls -lR /proc/self/fd/4/ 4 /proc/self/fd/4/: > total 0 > drwxr-xr-x 3 root root 60 Feb 12 21:08 B > > /proc/self/fd/4/B: > total 0 > drwxr-xr-x 2 root root 40 Feb 12 21:08 Secret > > /proc/self/fd/4/B/Secret: > total 0 I tracked this down to put_mnt_ns running passing UMOUNT_SYNC and disconnecting all of the mounts in a mount namespace. Fix this by factoring drop_mounts out of drop_collected_mounts and passing 0 instead of UMOUNT_SYNC. There are two possible behavior differences that result from this. - No longer setting UMOUNT_SYNC will no longer set MNT_SYNC_UMOUNT on the vfsmounts being unmounted. This effects the lazy rcu walk by kicking the walk out of rcu mode and forcing it to be a non-lazy walk. - No longer disconnecting locked mounts will keep some mounts around longer as they stay because the are locked to other mounts. There are only two users of drop_collected mounts: audit_tree.c and put_mnt_ns. In audit_tree.c the mounts are private and there are no rcu lazy walks only calls to iterate_mounts. So the changes should have no effect except for a small timing effect as the connected mounts are disconnected. In put_mnt_ns there may be references from process outside the mount namespace to the mounts. So the mounts remaining connected will be the bug fix that is needed. That rcu walks are allowed to continue appears not to be a problem especially as the rcu walk change was about an implementation detail not about semantics. Cc: stable@vger.kernel.org Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Reported-by: Timothy Baldwin Tested-by: Timothy Baldwin Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index e0e0f9cf6c30..74f64294a410 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1794,7 +1794,7 @@ void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); lock_mount_hash(); - umount_tree(real_mount(mnt), UMOUNT_SYNC); + umount_tree(real_mount(mnt), 0); unlock_mount_hash(); namespace_unlock(); } -- cgit From 1e9c75fb9c47a75a9aec0cd17db5f6dc36b58e00 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 3 Oct 2018 10:18:33 -0400 Subject: mnt: fix __detach_mounts infinite loop Since commit ff17fa561a04 ("d_invalidate(): unhash immediately") immediately unhashes the dentry, we'll never return the mountpoint in lookup_mountpoint(), which can lead to an unbreakable loop in d_invalidate(). I have reports of NFS clients getting into this condition after the server removes an export of an existing mount created through follow_automount(), but I suspect there are various other ways to produce this problem if we hunt down users of d_invalidate(). For example, it is possible to get into this state by using XFS' d_invalidate() call in xfs_vn_unlink(): truncate -s 100m img{1,2} mkfs.xfs -q -n version=ci img1 mkfs.xfs -q -n version=ci img2 mkdir -p /mnt/xfs mount img1 /mnt/xfs mkdir /mnt/xfs/sub1 mount img2 /mnt/xfs/sub1 cat > /mnt/xfs/sub1/foo & umount -l /mnt/xfs/sub1 mount img2 /mnt/xfs/sub1 mount --make-private /mnt/xfs mkdir /mnt/xfs/sub2 mount --move /mnt/xfs/sub1 /mnt/xfs/sub2 rmdir /mnt/xfs/sub1 Fix this by moving the check for an unlinked dentry out of the detach_mounts() path. Fixes: ff17fa561a04 ("d_invalidate(): unhash immediately") Cc: stable@vger.kernel.org Reviewed-by: "Eric W. Biederman" Signed-off-by: Benjamin Coddington Signed-off-by: Eric W. Biederman --- fs/namespace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 74f64294a410..a7f91265ea67 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -695,9 +695,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { - /* might be worth a WARN_ON() */ - if (d_unlinked(dentry)) - return ERR_PTR(-ENOENT); mp->m_count++; return mp; } @@ -711,6 +708,9 @@ static struct mountpoint *get_mountpoint(struct dentry *dentry) int ret; if (d_mountpoint(dentry)) { + /* might be worth a WARN_ON() */ + if (d_unlinked(dentry)) + return ERR_PTR(-ENOENT); mountpoint: read_seqlock_excl(&mount_lock); mp = lookup_mountpoint(dentry); -- cgit