aboutsummaryrefslogtreecommitdiff
path: root/fs/overlayfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/overlayfs')
-rw-r--r--fs/overlayfs/Kconfig21
-rw-r--r--fs/overlayfs/copy_up.c458
-rw-r--r--fs/overlayfs/dir.c164
-rw-r--r--fs/overlayfs/inode.c362
-rw-r--r--fs/overlayfs/namei.c459
-rw-r--r--fs/overlayfs/overlayfs.h122
-rw-r--r--fs/overlayfs/ovl_entry.h37
-rw-r--r--fs/overlayfs/readdir.c53
-rw-r--r--fs/overlayfs/super.c344
-rw-r--r--fs/overlayfs/util.c414
10 files changed, 2074 insertions, 360 deletions
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 0daac5112f7a..cbfc196e5dc5 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -1,5 +1,6 @@
config OVERLAY_FS
tristate "Overlay filesystem support"
+ select EXPORTFS
help
An overlay filesystem combines two filesystems - an 'upper' filesystem
and a 'lower' filesystem. When a name exists in both filesystems, the
@@ -22,3 +23,23 @@ config OVERLAY_FS_REDIRECT_DIR
Note, that redirects are not backward compatible. That is, mounting
an overlay which has redirects on a kernel that doesn't support this
feature will have unexpected results.
+
+config OVERLAY_FS_INDEX
+ bool "Overlayfs: turn on inodes index feature by default"
+ depends on OVERLAY_FS
+ help
+ If this config option is enabled then overlay filesystems will use
+ the inodes index dir to map lower inodes to upper inodes by default.
+ In this case it is still possible to turn off index globally with the
+ "index=off" module option or on a filesystem instance basis with the
+ "index=off" mount option.
+
+ The inodes index feature prevents breaking of lower hardlinks on copy
+ up.
+
+ Note, that the inodes index feature is read-only backward compatible.
+ That is, mounting an overlay which has an index dir on a kernel that
+ doesn't support this feature read-only, will not have any negative
+ outcomes. However, mounting the same overlay with an old kernel
+ read-write and then mounting it again with a new kernel, will have
+ unexpected results.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043dace62..acb6f97deb97 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -15,11 +15,14 @@
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/uaccess.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/fdtable.h>
#include <linux/ratelimit.h>
+#include <linux/exportfs.h>
#include "overlayfs.h"
+#include "ovl_entry.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
@@ -230,91 +233,290 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}
-static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
- struct dentry *dentry, struct path *lowerpath,
- struct kstat *stat, const char *link)
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
{
- struct inode *wdir = workdir->d_inode;
- struct inode *udir = upperdir->d_inode;
- struct dentry *newdentry = NULL;
- struct dentry *upper = NULL;
+ struct ovl_fh *fh;
+ int fh_type, fh_len, dwords;
+ void *buf;
+ int buflen = MAX_HANDLE_SZ;
+ uuid_t *uuid = &lower->d_sb->s_uuid;
+
+ buf = kmalloc(buflen, GFP_TEMPORARY);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * We encode a non-connectable file handle for non-dir, because we
+ * only need to find the lower inode number and we don't want to pay
+ * the price or reconnecting the dentry.
+ */
+ dwords = buflen >> 2;
+ fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
+ buflen = (dwords << 2);
+
+ fh = ERR_PTR(-EIO);
+ if (WARN_ON(fh_type < 0) ||
+ WARN_ON(buflen > MAX_HANDLE_SZ) ||
+ WARN_ON(fh_type == FILEID_INVALID))
+ goto out;
+
+ BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
+ fh_len = offsetof(struct ovl_fh, fid) + buflen;
+ fh = kmalloc(fh_len, GFP_KERNEL);
+ if (!fh) {
+ fh = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ fh->version = OVL_FH_VERSION;
+ fh->magic = OVL_FH_MAGIC;
+ fh->type = fh_type;
+ fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+ /*
+ * When we will want to decode an overlay dentry from this handle
+ * and all layers are on the same fs, if we get a disconncted real
+ * dentry when we decode fid, the only way to tell if we should assign
+ * it to upperdentry or to lowerstack is by checking this flag.
+ */
+ if (is_upper)
+ fh->flags |= OVL_FH_FLAG_PATH_UPPER;
+ fh->len = fh_len;
+ fh->uuid = *uuid;
+ memcpy(fh->fid, buf, buflen);
+
+out:
+ kfree(buf);
+ return fh;
+}
+
+static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
+ struct dentry *upper)
+{
+ const struct ovl_fh *fh = NULL;
int err;
+
+ /*
+ * When lower layer doesn't support export operations store a 'null' fh,
+ * so we can use the overlay.origin xattr to distignuish between a copy
+ * up and a pure upper inode.
+ */
+ if (ovl_can_decode_fh(lower->d_sb)) {
+ fh = ovl_encode_fh(lower, false);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+ }
+
+ /*
+ * Do not fail when upper doesn't support xattrs.
+ */
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
+ fh ? fh->len : 0, 0);
+ kfree(fh);
+
+ return err;
+}
+
+struct ovl_copy_up_ctx {
+ struct dentry *parent;
+ struct dentry *dentry;
+ struct path lowerpath;
+ struct kstat stat;
+ struct kstat pstat;
+ const char *link;
+ struct dentry *destdir;
+ struct qstr destname;
+ struct dentry *workdir;
+ bool tmpfile;
+ bool origin;
+};
+
+static int ovl_link_up(struct ovl_copy_up_ctx *c)
+{
+ int err;
+ struct dentry *upper;
+ struct dentry *upperdir = ovl_dentry_upper(c->parent);
+ struct inode *udir = d_inode(upperdir);
+
+ /* Mark parent "impure" because it may now contain non-pure upper */
+ err = ovl_set_impure(c->parent, upperdir);
+ if (err)
+ return err;
+
+ err = ovl_set_nlink_lower(c->dentry);
+ if (err)
+ return err;
+
+ inode_lock_nested(udir, I_MUTEX_PARENT);
+ upper = lookup_one_len(c->dentry->d_name.name, upperdir,
+ c->dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (!IS_ERR(upper)) {
+ err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
+ true);
+ dput(upper);
+
+ if (!err) {
+ /* Restore timestamps on parent (best effort) */
+ ovl_set_timestamps(upperdir, &c->pstat);
+ ovl_dentry_set_upper_alias(c->dentry);
+ }
+ }
+ inode_unlock(udir);
+ ovl_set_nlink_upper(c->dentry);
+
+ return err;
+}
+
+static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
+ struct dentry **newdentry)
+{
+ int err;
+ struct dentry *upper;
+ struct inode *udir = d_inode(c->destdir);
+
+ upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+ if (IS_ERR(upper))
+ return PTR_ERR(upper);
+
+ if (c->tmpfile)
+ err = ovl_do_link(temp, udir, upper, true);
+ else
+ err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
+
+ if (!err)
+ *newdentry = dget(c->tmpfile ? upper : temp);
+ dput(upper);
+
+ return err;
+}
+
+static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
+{
+ int err;
+ struct dentry *temp;
const struct cred *old_creds = NULL;
struct cred *new_creds = NULL;
struct cattr cattr = {
/* Can't properly set mode on creation because of the umask */
- .mode = stat->mode & S_IFMT,
- .rdev = stat->rdev,
- .link = link
+ .mode = c->stat.mode & S_IFMT,
+ .rdev = c->stat.rdev,
+ .link = c->link
};
- newdentry = ovl_lookup_temp(workdir, dentry);
- err = PTR_ERR(newdentry);
- if (IS_ERR(newdentry))
- goto out;
-
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto out1;
-
- err = security_inode_copy_up(dentry, &new_creds);
+ err = security_inode_copy_up(c->dentry, &new_creds);
if (err < 0)
- goto out2;
+ goto out;
if (new_creds)
old_creds = override_creds(new_creds);
- err = ovl_create_real(wdir, newdentry, &cattr, NULL, true);
-
+ if (c->tmpfile) {
+ temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+ if (IS_ERR(temp))
+ goto temp_err;
+ } else {
+ temp = ovl_lookup_temp(c->workdir);
+ if (IS_ERR(temp))
+ goto temp_err;
+
+ err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
+ NULL, true);
+ if (err) {
+ dput(temp);
+ goto out;
+ }
+ }
+ err = 0;
+ *tempp = temp;
+out:
if (new_creds) {
revert_creds(old_creds);
put_cred(new_creds);
}
- if (err)
- goto out2;
+ return err;
+
+temp_err:
+ err = PTR_ERR(temp);
+ goto out;
+}
+
+static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+ int err;
- if (S_ISREG(stat->mode)) {
+ if (S_ISREG(c->stat.mode)) {
struct path upperpath;
- ovl_path_upper(dentry, &upperpath);
+ ovl_path_upper(c->dentry, &upperpath);
BUG_ON(upperpath.dentry != NULL);
- upperpath.dentry = newdentry;
+ upperpath.dentry = temp;
- err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
+ err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
if (err)
- goto out_cleanup;
+ return err;
}
- err = ovl_copy_xattr(lowerpath->dentry, newdentry);
+ err = ovl_copy_xattr(c->lowerpath.dentry, temp);
if (err)
- goto out_cleanup;
+ return err;
- inode_lock(newdentry->d_inode);
- err = ovl_set_attr(newdentry, stat);
- inode_unlock(newdentry->d_inode);
+ inode_lock(temp->d_inode);
+ err = ovl_set_attr(temp, &c->stat);
+ inode_unlock(temp->d_inode);
+ if (err)
+ return err;
+
+ /*
+ * Store identifier of lower inode in upper inode xattr to
+ * allow lookup of the copy up origin inode.
+ *
+ * Don't set origin when we are breaking the association with a lower
+ * hard link.
+ */
+ if (c->origin) {
+ err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
+{
+ struct inode *udir = c->destdir->d_inode;
+ struct dentry *newdentry = NULL;
+ struct dentry *temp = NULL;
+ int err;
+
+ err = ovl_get_tmpfile(c, &temp);
+ if (err)
+ goto out;
+
+ err = ovl_copy_up_inode(c, temp);
if (err)
goto out_cleanup;
- err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+ if (c->tmpfile) {
+ inode_lock_nested(udir, I_MUTEX_PARENT);
+ err = ovl_install_temp(c, temp, &newdentry);
+ inode_unlock(udir);
+ } else {
+ err = ovl_install_temp(c, temp, &newdentry);
+ }
if (err)
goto out_cleanup;
- ovl_dentry_update(dentry, newdentry);
- ovl_inode_update(d_inode(dentry), d_inode(newdentry));
- newdentry = NULL;
-out2:
- dput(upper);
-out1:
- dput(newdentry);
+ ovl_inode_update(d_inode(c->dentry), newdentry);
out:
+ dput(temp);
return err;
out_cleanup:
- ovl_cleanup(wdir, newdentry);
- goto out2;
+ if (!c->tmpfile)
+ ovl_cleanup(d_inode(c->workdir), temp);
+ goto out;
}
/*
@@ -326,55 +528,119 @@ out_cleanup:
* is possible that the copy up will lock the old parent. At that point
* the file will have already been copied up anyway.
*/
+static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
+{
+ int err;
+ struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+ bool indexed = false;
+
+ if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
+ c->stat.nlink > 1)
+ indexed = true;
+
+ if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
+ c->origin = true;
+
+ if (indexed) {
+ c->destdir = ovl_indexdir(c->dentry->d_sb);
+ err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+ if (err)
+ return err;
+ } else {
+ /*
+ * Mark parent "impure" because it may now contain non-pure
+ * upper
+ */
+ err = ovl_set_impure(c->parent, c->destdir);
+ if (err)
+ return err;
+ }
+
+ /* Should we copyup with O_TMPFILE or with workdir? */
+ if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
+ c->tmpfile = true;
+ err = ovl_copy_up_locked(c);
+ } else {
+ err = -EIO;
+ if (lock_rename(c->workdir, c->destdir) != NULL) {
+ pr_err("overlayfs: failed to lock workdir+upperdir\n");
+ } else {
+ err = ovl_copy_up_locked(c);
+ unlock_rename(c->workdir, c->destdir);
+ }
+ }
+
+ if (indexed) {
+ if (!err)
+ ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+ kfree(c->destname.name);
+ } else if (!err) {
+ struct inode *udir = d_inode(c->destdir);
+
+ /* Restore timestamps on parent (best effort) */
+ inode_lock(udir);
+ ovl_set_timestamps(c->destdir, &c->pstat);
+ inode_unlock(udir);
+
+ ovl_dentry_set_upper_alias(c->dentry);
+ }
+
+ return err;
+}
+
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
- struct path *lowerpath, struct kstat *stat)
+ int flags)
{
- DEFINE_DELAYED_CALL(done);
- struct dentry *workdir = ovl_workdir(dentry);
int err;
- struct kstat pstat;
+ DEFINE_DELAYED_CALL(done);
struct path parentpath;
- struct dentry *lowerdentry = lowerpath->dentry;
- struct dentry *upperdir;
- const char *link = NULL;
+ struct ovl_copy_up_ctx ctx = {
+ .parent = parent,
+ .dentry = dentry,
+ .workdir = ovl_workdir(dentry),
+ };
- if (WARN_ON(!workdir))
+ if (WARN_ON(!ctx.workdir))
return -EROFS;
- ovl_do_check_copy_up(lowerdentry);
+ ovl_path_lower(dentry, &ctx.lowerpath);
+ err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
+ STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+ if (err)
+ return err;
ovl_path_upper(parent, &parentpath);
- upperdir = parentpath.dentry;
+ ctx.destdir = parentpath.dentry;
+ ctx.destname = dentry->d_name;
- err = vfs_getattr(&parentpath, &pstat);
+ err = vfs_getattr(&parentpath, &ctx.pstat,
+ STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
if (err)
return err;
- if (S_ISLNK(stat->mode)) {
- link = vfs_get_link(lowerdentry, &done);
- if (IS_ERR(link))
- return PTR_ERR(link);
- }
+ /* maybe truncate regular file. this has no effect on dirs */
+ if (flags & O_TRUNC)
+ ctx.stat.size = 0;
- err = -EIO;
- if (lock_rename(workdir, upperdir) != NULL) {
- pr_err("overlayfs: failed to lock workdir+upperdir\n");
- goto out_unlock;
+ if (S_ISLNK(ctx.stat.mode)) {
+ ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
+ if (IS_ERR(ctx.link))
+ return PTR_ERR(ctx.link);
}
- if (ovl_dentry_upper(dentry)) {
- /* Raced with another copy-up? Nothing to do, then... */
- err = 0;
- goto out_unlock;
+ ovl_do_check_copy_up(ctx.lowerpath.dentry);
+
+ err = ovl_copy_up_start(dentry);
+ /* err < 0: interrupted, err > 0: raced with another copy-up */
+ if (unlikely(err)) {
+ if (err > 0)
+ err = 0;
+ } else {
+ if (!ovl_dentry_upper(dentry))
+ err = ovl_do_copy_up(&ctx);
+ if (!err && !ovl_dentry_has_upper_alias(dentry))
+ err = ovl_link_up(&ctx);
+ ovl_copy_up_end(dentry);
}
-
- err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
- stat, link);
- if (!err) {
- /* Restore timestamps on parent (best effort) */
- ovl_set_timestamps(upperdir, &pstat);
- }
-out_unlock:
- unlock_rename(workdir, upperdir);
do_delayed_call(&done);
return err;
@@ -388,11 +654,22 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
while (!err) {
struct dentry *next;
struct dentry *parent;
- struct path lowerpath;
- struct kstat stat;
- enum ovl_path_type type = ovl_path_type(dentry);
- if (OVL_TYPE_UPPER(type))
+ /*
+ * Check if copy-up has happened as well as for upper alias (in
+ * case of hard links) is there.
+ *
+ * Both checks are lockless:
+ * - false negatives: will recheck under oi->lock
+ * - false positives:
+ * + ovl_dentry_upper() uses memory barriers to ensure the
+ * upper dentry is up-to-date
+ * + ovl_dentry_has_upper_alias() relies on locking of
+ * upper parent i_rwsem to prevent reordering copy-up
+ * with rename.
+ */
+ if (ovl_dentry_upper(dentry) &&
+ ovl_dentry_has_upper_alias(dentry))
break;
next = dget(dentry);
@@ -400,21 +677,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
for (;;) {
parent = dget_parent(next);
- type = ovl_path_type(parent);
- if (OVL_TYPE_UPPER(type))
+ if (ovl_dentry_upper(parent))
break;
dput(next);
next = parent;
}
- ovl_path_lower(next, &lowerpath);
- err = vfs_getattr(&lowerpath, &stat);
- /* maybe truncate regular file. this has no effect on dirs */
- if (flags & O_TRUNC)
- stat.size = 0;
- if (!err)
- err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
+ err = ovl_copy_up_one(parent, next, flags);
dput(parent);
dput(next);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 16e06dd89457..48b70e6490f3 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -24,7 +24,7 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
MODULE_PARM_DESC(ovl_redirect_max,
"Maximum length of absolute redirect xattr value");
-void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
{
int err;
@@ -39,9 +39,11 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
wdentry, err);
}
+
+ return err;
}
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
+struct dentry *ovl_lookup_temp(struct dentry *workdir)
{
struct dentry *temp;
char name[20];
@@ -68,7 +70,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir,
struct dentry *whiteout;
struct inode *wdir = workdir->d_inode;
- whiteout = ovl_lookup_temp(workdir, dentry);
+ whiteout = ovl_lookup_temp(workdir);
if (IS_ERR(whiteout))
return whiteout;
@@ -127,44 +129,26 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry,
return err;
}
-static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
+ int xerr)
{
int err;
- err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
if (!err)
ovl_dentry_set_opaque(dentry);
return err;
}
-static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
{
- int err;
- enum ovl_path_type type;
- struct path realpath;
- const struct cred *old_cred;
-
- type = ovl_path_real(dentry, &realpath);
- old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getattr(&realpath, stat);
- revert_creds(old_cred);
- if (err)
- return err;
-
- stat->dev = dentry->d_sb->s_dev;
- stat->ino = dentry->d_inode->i_ino;
-
/*
- * It's probably not worth it to count subdirs to get the
- * correct link count. nlink=1 seems to pacify 'find' and
- * other utilities.
+ * Fail with -EIO when trying to create opaque dir and upper doesn't
+ * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
+ * return a specific error for noxattr case.
*/
- if (OVL_TYPE_MERGE(type))
- stat->nlink = 1;
-
- return 0;
+ return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
}
/* Common operations required to be done after creation of file on upper */
@@ -172,15 +156,19 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
{
ovl_dentry_version_inc(dentry->d_parent);
- ovl_dentry_update(dentry, newdentry);
+ ovl_dentry_set_upper_alias(dentry);
if (!hardlink) {
- ovl_inode_update(inode, d_inode(newdentry));
+ ovl_inode_update(inode, newdentry);
ovl_copyattr(newdentry->d_inode, inode);
} else {
- WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
+ WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
+ dput(newdentry);
inc_nlink(inode);
}
d_instantiate(dentry, inode);
+ /* Force lookup of new upper hardlink to find its lower */
+ if (hardlink)
+ d_drop(dentry);
}
static bool ovl_type_merge(struct dentry *dentry)
@@ -188,6 +176,11 @@ static bool ovl_type_merge(struct dentry *dentry)
return OVL_TYPE_MERGE(ovl_path_type(dentry));
}
+static bool ovl_type_origin(struct dentry *dentry)
+{
+ return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
+}
+
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct cattr *attr, struct dentry *hardlink)
{
@@ -209,7 +202,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
if (err)
goto out_dput;
- if (ovl_type_merge(dentry->d_parent)) {
+ if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
/* Setting opaque here is just an optimization, allow to fail */
ovl_set_opaque(dentry, newdentry);
}
@@ -264,7 +257,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
goto out;
ovl_path_upper(dentry, &upperpath);
- err = vfs_getattr(&upperpath, &stat);
+ err = vfs_getattr(&upperpath, &stat,
+ STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
if (err)
goto out_unlock;
@@ -275,7 +269,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (upper->d_parent->d_inode != udir)
goto out_unlock;
- opaquedir = ovl_lookup_temp(workdir, dentry);
+ opaquedir = ovl_lookup_temp(workdir);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out_unlock;
@@ -407,7 +401,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out;
- newdentry = ovl_lookup_temp(workdir, dentry);
+ newdentry = ovl_lookup_temp(workdir);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_unlock;
@@ -487,17 +481,30 @@ out_cleanup:
}
static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
- struct cattr *attr, struct dentry *hardlink)
+ struct cattr *attr, struct dentry *hardlink,
+ bool origin)
{
int err;
const struct cred *old_cred;
struct cred *override_cred;
+ struct dentry *parent = dentry->d_parent;
- err = ovl_copy_up(dentry->d_parent);
+ err = ovl_copy_up(parent);
if (err)
return err;
old_cred = ovl_override_creds(dentry->d_sb);
+
+ /*
+ * When linking a file with copy up origin into a new parent, mark the
+ * new parent dir "impure".
+ */
+ if (origin) {
+ err = ovl_set_impure(parent, ovl_dentry_upper(parent));
+ if (err)
+ goto out_revert_creds;
+ }
+
err = -ENOMEM;
override_cred = prepare_creds();
if (override_cred) {
@@ -556,7 +563,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
inode_init_owner(inode, dentry->d_parent->d_inode, mode);
attr.mode = inode->i_mode;
- err = ovl_create_or_link(dentry, inode, &attr, NULL);
+ err = ovl_create_or_link(dentry, inode, &attr, NULL, false);
if (err)
iput(inode);
@@ -597,6 +604,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
struct dentry *new)
{
int err;
+ bool locked = false;
struct inode *inode;
err = ovl_want_write(old);
@@ -607,19 +615,30 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
goto out_drop_write;
+ err = ovl_nlink_start(old, &locked);
+ if (err)
+ goto out_drop_write;
+
inode = d_inode(old);
ihold(inode);
- err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old));
+ err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old),
+ ovl_type_origin(old));
if (err)
iput(inode);
+ ovl_nlink_end(old, locked);
out_drop_write:
ovl_drop_write(old);
out:
return err;
}
+static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
+{
+ return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
+}
+
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
{
struct dentry *workdir = ovl_workdir(dentry);
@@ -655,7 +674,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
(!opaquedir && ovl_dentry_upper(dentry) &&
- upper != ovl_dentry_upper(dentry))) {
+ !ovl_matches_upper(dentry, upper))) {
goto out_dput_upper;
}
@@ -716,7 +735,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
err = -ESTALE;
if ((opaquedir && upper != opaquedir) ||
- (!opaquedir && upper != ovl_dentry_upper(dentry)))
+ (!opaquedir && !ovl_matches_upper(dentry, upper)))
goto out_dput_upper;
if (is_dir)
@@ -744,8 +763,8 @@ out:
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
{
- enum ovl_path_type type;
int err;
+ bool locked = false;
const struct cred *old_cred;
err = ovl_want_write(dentry);
@@ -756,7 +775,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (err)
goto out_drop_write;
- type = ovl_path_type(dentry);
+ err = ovl_nlink_start(dentry, &locked);
+ if (err)
+ goto out_drop_write;
old_cred = ovl_override_creds(dentry->d_sb);
if (!ovl_lower_positive(dentry))
@@ -770,6 +791,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
else
drop_nlink(dentry->d_inode);
}
+ ovl_nlink_end(dentry, locked);
out_drop_write:
ovl_drop_write(dentry);
out:
@@ -871,18 +893,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
if (IS_ERR(redirect))
return PTR_ERR(redirect);
- err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT,
- redirect, strlen(redirect), 0);
+ err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+ OVL_XATTR_REDIRECT,
+ redirect, strlen(redirect), -EXDEV);
if (!err) {
spin_lock(&dentry->d_lock);
ovl_dentry_set_redirect(dentry, redirect);
spin_unlock(&dentry->d_lock);
} else {
kfree(redirect);
- if (err == -EOPNOTSUPP)
- ovl_clear_redirect_dir(dentry->d_sb);
- else
- pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
+ pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
/* Fall back to userspace copy-up */
err = -EXDEV;
}
@@ -894,6 +914,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
unsigned int flags)
{
int err;
+ bool locked = false;
struct dentry *old_upperdir;
struct dentry *new_upperdir;
struct dentry *olddentry;
@@ -937,6 +958,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
err = ovl_copy_up(new);
if (err)
goto out_drop_write;
+ } else {
+ err = ovl_nlink_start(new, &locked);
+ if (err)
+ goto out_drop_write;
}
old_cred = ovl_override_creds(old->d_sb);
@@ -968,6 +993,25 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
old_upperdir = ovl_dentry_upper(old->d_parent);
new_upperdir = ovl_dentry_upper(new->d_parent);
+ if (!samedir) {
+ /*
+ * When moving a merge dir or non-dir with copy up origin into
+ * a new parent, we are marking the new parent dir "impure".
+ * When ovl_iterate() iterates an "impure" upper dir, it will
+ * lookup the origin inodes of the entries to fill d_ino.
+ */
+ if (ovl_type_origin(old)) {
+ err = ovl_set_impure(new->d_parent, new_upperdir);
+ if (err)
+ goto out_revert_creds;
+ }
+ if (!overwrite && ovl_type_origin(new)) {
+ err = ovl_set_impure(old->d_parent, old_upperdir);
+ if (err)
+ goto out_revert_creds;
+ }
+ }
+
trap = lock_rename(new_upperdir, old_upperdir);
olddentry = lookup_one_len(old->d_name.name, old_upperdir,
@@ -977,7 +1021,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
goto out_unlock;
err = -ESTALE;
- if (olddentry != ovl_dentry_upper(old))
+ if (!ovl_matches_upper(old, olddentry))
goto out_dput_old;
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
@@ -990,12 +1034,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
new_opaque = ovl_dentry_is_opaque(new);
err = -ESTALE;
- if (ovl_dentry_upper(new)) {
+ if (d_inode(new) && ovl_dentry_upper(new)) {
if (opaquedir) {
if (newdentry != opaquedir)
goto out_dput;
} else {
- if (newdentry != ovl_dentry_upper(new))
+ if (!ovl_matches_upper(new, newdentry))
goto out_dput;
}
} else {
@@ -1017,7 +1061,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (ovl_type_merge_or_lower(old))
err = ovl_set_redirect(old, samedir);
else if (!old_opaque && ovl_type_merge(new->d_parent))
- err = ovl_set_opaque(old, olddentry);
+ err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
if (err)
goto out_dput;
}
@@ -1025,7 +1069,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (ovl_type_merge_or_lower(new))
err = ovl_set_redirect(new, samedir);
else if (!new_opaque && ovl_type_merge(old->d_parent))
- err = ovl_set_opaque(new, newdentry);
+ err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
if (err)
goto out_dput;
}
@@ -1038,6 +1082,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (cleanup_whiteout)
ovl_cleanup(old_upperdir->d_inode, newdentry);
+ if (overwrite && d_inode(new)) {
+ if (new_is_dir)
+ clear_nlink(d_inode(new));
+ else
+ drop_nlink(d_inode(new));
+ }
+
ovl_dentry_version_inc(old->d_parent);
ovl_dentry_version_inc(new->d_parent);
@@ -1049,6 +1100,7 @@ out_unlock:
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
revert_creds(old_cred);
+ ovl_nlink_end(new, locked);
out_drop_write:
ovl_drop_write(old);
out:
@@ -1068,7 +1120,7 @@ const struct inode_operations ovl_dir_inode_operations = {
.create = ovl_create,
.mknod = ovl_mknod,
.permission = ovl_permission,
- .getattr = ovl_dir_getattr,
+ .getattr = ovl_getattr,
.listxattr = ovl_listxattr,
.get_acl = ovl_get_acl,
.update_time = ovl_update_time,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 08643ac44a02..5bc71642b226 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -9,8 +9,10 @@
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/cred.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
+#include <linux/ratelimit.h>
#include "overlayfs.h"
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
@@ -56,24 +58,98 @@ out:
return err;
}
-static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+int ovl_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags)
{
+ struct dentry *dentry = path->dentry;
+ enum ovl_path_type type;
struct path realpath;
const struct cred *old_cred;
+ bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
int err;
- ovl_path_real(dentry, &realpath);
+ type = ovl_path_real(dentry, &realpath);
old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getattr(&realpath, stat);
+ err = vfs_getattr(&realpath, stat, request_mask, flags);
+ if (err)
+ goto out;
+
+ /*
+ * When all layers are on the same fs, all real inode number are
+ * unique, so we use the overlay st_dev, which is friendly to du -x.
+ *
+ * We also use st_ino of the copy up origin, if we know it.
+ * This guaranties constant st_dev/st_ino across copy up.
+ *
+ * If filesystem supports NFS export ops, this also guaranties
+ * persistent st_ino across mount cycle.
+ */
+ if (ovl_same_sb(dentry->d_sb)) {
+ if (OVL_TYPE_ORIGIN(type)) {
+ struct kstat lowerstat;
+ u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
+
+ ovl_path_lower(dentry, &realpath);
+ err = vfs_getattr(&realpath, &lowerstat,
+ lowermask, flags);
+ if (err)
+ goto out;
+
+ WARN_ON_ONCE(stat->dev != lowerstat.dev);
+ /*
+ * Lower hardlinks may be broken on copy up to different
+ * upper files, so we cannot use the lower origin st_ino
+ * for those different files, even for the same fs case.
+ * With inodes index enabled, it is safe to use st_ino
+ * of an indexed hardlinked origin. The index validates
+ * that the upper hardlink is not broken.
+ */
+ if (is_dir || lowerstat.nlink == 1 ||
+ ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ stat->ino = lowerstat.ino;
+ }
+ stat->dev = dentry->d_sb->s_dev;
+ } else if (is_dir) {
+ /*
+ * If not all layers are on the same fs the pair {real st_ino;
+ * overlay st_dev} is not unique, so use the non persistent
+ * overlay st_ino.
+ *
+ * Always use the overlay st_dev for directories, so 'find
+ * -xdev' will scan the entire overlay mount and won't cross the
+ * overlay mount boundaries.
+ */
+ stat->dev = dentry->d_sb->s_dev;
+ stat->ino = dentry->d_inode->i_ino;
+ }
+
+ /*
+ * It's probably not worth it to count subdirs to get the
+ * correct link count. nlink=1 seems to pacify 'find' and
+ * other utilities.
+ */
+ if (is_dir && OVL_TYPE_MERGE(type))
+ stat->nlink = 1;
+
+ /*
+ * Return the overlay inode nlinks for indexed upper inodes.
+ * Overlay inode nlink counts the union of the upper hardlinks
+ * and non-covered lower hardlinks. It does not include the upper
+ * index hardlink.
+ */
+ if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ stat->nlink = dentry->d_inode->i_nlink;
+
+out:
revert_creds(old_cred);
+
return err;
}
int ovl_permission(struct inode *inode, int mask)
{
- bool is_upper;
- struct inode *realinode = ovl_inode_real(inode, &is_upper);
+ struct inode *upperinode = ovl_inode_upper(inode);
+ struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
const struct cred *old_cred;
int err;
@@ -92,7 +168,8 @@ int ovl_permission(struct inode *inode, int mask)
return err;
old_cred = ovl_override_creds(inode->i_sb);
- if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+ if (!upperinode &&
+ !special_file(realinode->i_mode) && mask & MAY_WRITE) {
mask &= ~(MAY_WRITE | MAY_APPEND);
/* Make sure mounter can read file for copy up later */
mask |= MAY_READ;
@@ -125,37 +202,38 @@ bool ovl_is_private_xattr(const char *name)
sizeof(OVL_XATTR_PREFIX) - 1) == 0;
}
-int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
+int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
{
int err;
- struct path realpath;
- enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+ struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+ struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
const struct cred *old_cred;
err = ovl_want_write(dentry);
if (err)
goto out;
- if (!value && !OVL_TYPE_UPPER(type)) {
- err = vfs_getxattr(realpath.dentry, name, NULL, 0);
+ if (!value && !upperdentry) {
+ err = vfs_getxattr(realdentry, name, NULL, 0);
if (err < 0)
goto out_drop_write;
}
- err = ovl_copy_up(dentry);
- if (err)
- goto out_drop_write;
+ if (!upperdentry) {
+ err = ovl_copy_up(dentry);
+ if (err)
+ goto out_drop_write;
- if (!OVL_TYPE_UPPER(type))
- ovl_path_upper(dentry, &realpath);
+ realdentry = ovl_dentry_upper(dentry);
+ }
old_cred = ovl_override_creds(dentry->d_sb);
if (value)
- err = vfs_setxattr(realpath.dentry, name, value, size, flags);
+ err = vfs_setxattr(realdentry, name, value, size, flags);
else {
WARN_ON(flags != XATTR_REPLACE);
- err = vfs_removexattr(realpath.dentry, name);
+ err = vfs_removexattr(realdentry, name);
}
revert_creds(old_cred);
@@ -165,12 +243,13 @@ out:
return err;
}
-int ovl_xattr_get(struct dentry *dentry, const char *name,
+int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size)
{
- struct dentry *realdentry = ovl_dentry_real(dentry);
ssize_t res;
const struct cred *old_cred;
+ struct dentry *realdentry =
+ ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
old_cred = ovl_override_creds(dentry->d_sb);
res = vfs_getxattr(realdentry, name, value, size);
@@ -178,6 +257,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name,
return res;
}
+static bool ovl_can_list(const char *s)
+{
+ /* List all non-trusted xatts */
+ if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+ return true;
+
+ /* Never list trusted.overlay, list other trusted for superuser only */
+ return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
+}
+
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
struct dentry *realdentry = ovl_dentry_real(dentry);
@@ -201,7 +290,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return -EIO;
len -= slen;
- if (ovl_is_private_xattr(s)) {
+ if (!ovl_can_list(s)) {
res -= slen;
memmove(s, s + slen, len);
} else {
@@ -214,7 +303,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
struct posix_acl *ovl_get_acl(struct inode *inode, int type)
{
- struct inode *realinode = ovl_inode_real(inode, NULL);
+ struct inode *realinode = ovl_inode_real(inode);
const struct cred *old_cred;
struct posix_acl *acl;
@@ -228,13 +317,13 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
return acl;
}
-static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
- struct dentry *realdentry)
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
{
- if (OVL_TYPE_UPPER(type))
+ if (ovl_dentry_upper(dentry) &&
+ ovl_dentry_has_upper_alias(dentry))
return false;
- if (special_file(realdentry->d_inode->i_mode))
+ if (special_file(d_inode(dentry)->i_mode))
return false;
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
@@ -246,11 +335,8 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
{
int err = 0;
- struct path realpath;
- enum ovl_path_type type;
- type = ovl_path_real(dentry, &realpath);
- if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
+ if (ovl_open_need_copy_up(dentry, file_flags)) {
err = ovl_want_write(dentry);
if (!err) {
err = ovl_copy_up_flags(dentry, file_flags);
@@ -301,6 +387,41 @@ static const struct inode_operations ovl_symlink_inode_operations = {
.update_time = ovl_update_time,
};
+/*
+ * It is possible to stack overlayfs instance on top of another
+ * overlayfs instance as lower layer. We need to annonate the
+ * stackable i_mutex locks according to stack level of the super
+ * block instance. An overlayfs instance can never be in stack
+ * depth 0 (there is always a real fs below it). An overlayfs
+ * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
+ *
+ * For example, here is a snip from /proc/lockdep_chains after
+ * dir_iterate of nested overlayfs:
+ *
+ * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2)
+ * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
+ * [...] &type->i_mutex_dir_key (stack_depth=0)
+ */
+#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
+
+static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
+#ifdef CONFIG_LOCKDEP
+ static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
+ static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
+
+ int depth = inode->i_sb->s_stack_depth - 1;
+
+ if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
+ depth = 0;
+
+ if (S_ISDIR(inode->i_mode))
+ lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
+ else
+ lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
+#endif
+}
+
static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_ino = get_next_ino();
@@ -310,6 +431,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
#endif
+ ovl_lockdep_annotate_inode_mutex_key(inode);
+
switch (mode & S_IFMT) {
case S_IFREG:
inode->i_op = &ovl_file_inode_operations;
@@ -331,6 +454,103 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
}
}
+/*
+ * With inodes index enabled, an overlay inode nlink counts the union of upper
+ * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
+ * upper inode, the following nlink modifying operations can happen:
+ *
+ * 1. Lower hardlink copy up
+ * 2. Upper hardlink created, unlinked or renamed over
+ * 3. Lower hardlink whiteout or renamed over
+ *
+ * For the first, copy up case, the union nlink does not change, whether the
+ * operation succeeds or fails, but the upper inode nlink may change.
+ * Therefore, before copy up, we store the union nlink value relative to the
+ * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ *
+ * For the second, upper hardlink case, the union nlink should be incremented
+ * or decremented IFF the operation succeeds, aligned with nlink change of the
+ * upper inode. Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in the index inode.
+ *
+ * For the last, lower cover up case, we simplify things by preceding the
+ * whiteout or cover up with copy up. This makes sure that there is an index
+ * upper inode where the nlink xattr can be stored before the copied up upper
+ * entry is unlink.
+ */
+#define OVL_NLINK_ADD_UPPER (1 << 0)
+
+/*
+ * On-disk format for indexed nlink:
+ *
+ * nlink relative to the upper inode - "U[+-]NUM"
+ * nlink relative to the lower inode - "L[+-]NUM"
+ */
+
+static int ovl_set_nlink_common(struct dentry *dentry,
+ struct dentry *realdentry, const char *format)
+{
+ struct inode *inode = d_inode(dentry);
+ struct inode *realinode = d_inode(realdentry);
+ char buf[13];
+ int len;
+
+ len = snprintf(buf, sizeof(buf), format,
+ (int) (inode->i_nlink - realinode->i_nlink));
+
+ return ovl_do_setxattr(ovl_dentry_upper(dentry),
+ OVL_XATTR_NLINK, buf, len, 0);
+}
+
+int ovl_set_nlink_upper(struct dentry *dentry)
+{
+ return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
+}
+
+int ovl_set_nlink_lower(struct dentry *dentry)
+{
+ return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
+}
+
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+ struct dentry *upperdentry,
+ unsigned int fallback)
+{
+ int nlink_diff;
+ int nlink;
+ char buf[13];
+ int err;
+
+ if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
+ return fallback;
+
+ err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
+ if (err < 0)
+ goto fail;
+
+ buf[err] = '\0';
+ if ((buf[0] != 'L' && buf[0] != 'U') ||
+ (buf[1] != '+' && buf[1] != '-'))
+ goto fail;
+
+ err = kstrtoint(buf + 1, 10, &nlink_diff);
+ if (err < 0)
+ goto fail;
+
+ nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink;
+ nlink += nlink_diff;
+
+ if (nlink <= 0)
+ goto fail;
+
+ return nlink;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+ upperdentry, err);
+ return fallback;
+}
+
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -344,27 +564,87 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
static int ovl_inode_test(struct inode *inode, void *data)
{
- return ovl_inode_real(inode, NULL) == data;
+ return inode->i_private == data;
}
static int ovl_inode_set(struct inode *inode, void *data)
{
- inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK);
+ inode->i_private = data;
return 0;
}
-struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode)
+static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
+ struct dentry *upperdentry)
+{
+ struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL;
+
+ /* Lower (origin) inode must match, even if NULL */
+ if (ovl_inode_lower(inode) != lowerinode)
+ return false;
+
+ /*
+ * Allow non-NULL __upperdentry in inode even if upperdentry is NULL.
+ * This happens when finding a lower alias for a copied up hard link.
+ */
+ if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry))
+ return false;
+
+ return true;
+}
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
{
+ struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+ struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
struct inode *inode;
- inode = iget5_locked(sb, (unsigned long) realinode,
- ovl_inode_test, ovl_inode_set, realinode);
- if (inode && inode->i_state & I_NEW) {
- ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
- set_nlink(inode, realinode->i_nlink);
- unlock_new_inode(inode);
+ if (!realinode)
+ realinode = d_inode(lowerdentry);
+
+ if (!S_ISDIR(realinode->i_mode) &&
+ (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) {
+ struct inode *key = d_inode(lowerdentry ?: upperdentry);
+ unsigned int nlink;
+
+ inode = iget5_locked(dentry->d_sb, (unsigned long) key,
+ ovl_inode_test, ovl_inode_set, key);
+ if (!inode)
+ goto out_nomem;
+ if (!(inode->i_state & I_NEW)) {
+ /*
+ * Verify that the underlying files stored in the inode
+ * match those in the dentry.
+ */
+ if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) {
+ iput(inode);
+ inode = ERR_PTR(-ESTALE);
+ goto out;
+ }
+
+ dput(upperdentry);
+ goto out;
+ }
+
+ nlink = ovl_get_nlink(lowerdentry, upperdentry,
+ realinode->i_nlink);
+ set_nlink(inode, nlink);
+ } else {
+ inode = new_inode(dentry->d_sb);
+ if (!inode)
+ goto out_nomem;
}
+ ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+ ovl_inode_init(inode, upperdentry, lowerdentry);
+
+ if (upperdentry && ovl_is_impuredir(upperdentry))
+ ovl_set_flag(OVL_IMPURE, inode);
+ if (inode->i_state & I_NEW)
+ unlock_new_inode(inode);
+out:
return inode;
+
+out_nomem:
+ inode = ERR_PTR(-ENOMEM);
+ goto out;
}
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 023bb0b03352..8aef2b304b2d 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -8,9 +8,12 @@
*/
#include <linux/fs.h>
+#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/ratelimit.h>
+#include <linux/mount.h>
+#include <linux/exportfs.h>
#include "overlayfs.h"
#include "ovl_entry.h"
@@ -80,19 +83,109 @@ invalid:
goto err_free;
}
-static bool ovl_is_opaquedir(struct dentry *dentry)
+static int ovl_acceptable(void *ctx, struct dentry *dentry)
+{
+ return 1;
+}
+
+static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
{
int res;
- char val;
+ struct ovl_fh *fh = NULL;
+
+ res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
+ if (res < 0) {
+ if (res == -ENODATA || res == -EOPNOTSUPP)
+ return NULL;
+ goto fail;
+ }
+ /* Zero size value means "copied up but origin unknown" */
+ if (res == 0)
+ return NULL;
- if (!d_is_dir(dentry))
- return false;
+ fh = kzalloc(res, GFP_TEMPORARY);
+ if (!fh)
+ return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
- if (res == 1 && val == 'y')
- return true;
+ res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res);
+ if (res < 0)
+ goto fail;
+
+ if (res < sizeof(struct ovl_fh) || res < fh->len)
+ goto invalid;
+
+ if (fh->magic != OVL_FH_MAGIC)
+ goto invalid;
+
+ /* Treat larger version and unknown flags as "origin unknown" */
+ if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+ goto out;
+
+ /* Treat endianness mismatch as "origin unknown" */
+ if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+ (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+ goto out;
+
+ return fh;
+
+out:
+ kfree(fh);
+ return NULL;
- return false;
+fail:
+ pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+ goto out;
+invalid:
+ pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+ goto out;
+}
+
+static struct dentry *ovl_get_origin(struct dentry *dentry,
+ struct vfsmount *mnt)
+{
+ struct dentry *origin = NULL;
+ struct ovl_fh *fh = ovl_get_origin_fh(dentry);
+ int bytes;
+
+ if (IS_ERR_OR_NULL(fh))
+ return (struct dentry *)fh;
+
+ /*
+ * Make sure that the stored uuid matches the uuid of the lower
+ * layer where file handle will be decoded.
+ */
+ if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
+ goto out;
+
+ bytes = (fh->len - offsetof(struct ovl_fh, fid));
+ origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
+ bytes >> 2, (int)fh->type,
+ ovl_acceptable, NULL);
+ if (IS_ERR(origin)) {
+ /* Treat stale file handle as "origin unknown" */
+ if (origin == ERR_PTR(-ESTALE))
+ origin = NULL;
+ goto out;
+ }
+
+ if (ovl_dentry_weird(origin) ||
+ ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
+ goto invalid;
+
+out:
+ kfree(fh);
+ return origin;
+
+invalid:
+ pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
+ dput(origin);
+ origin = NULL;
+ goto out;
+}
+
+static bool ovl_is_opaquedir(struct dentry *dentry)
+{
+ return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
}
static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
@@ -191,6 +284,274 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
return 0;
}
+
+static int ovl_check_origin(struct dentry *upperdentry,
+ struct path *lowerstack, unsigned int numlower,
+ struct path **stackp, unsigned int *ctrp)
+{
+ struct vfsmount *mnt;
+ struct dentry *origin = NULL;
+ int i;
+
+
+ for (i = 0; i < numlower; i++) {
+ mnt = lowerstack[i].mnt;
+ origin = ovl_get_origin(upperdentry, mnt);
+ if (IS_ERR(origin))
+ return PTR_ERR(origin);
+
+ if (origin)
+ break;
+ }
+
+ if (!origin)
+ return 0;
+
+ BUG_ON(*ctrp);
+ if (!*stackp)
+ *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
+ if (!*stackp) {
+ dput(origin);
+ return -ENOMEM;
+ }
+ **stackp = (struct path) { .dentry = origin, .mnt = mnt };
+ *ctrp = 1;
+
+ return 0;
+}
+
+/*
+ * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
+{
+ struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
+ int err = 0;
+
+ if (!ofh)
+ return -ENODATA;
+
+ if (IS_ERR(ofh))
+ return PTR_ERR(ofh);
+
+ if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+ err = -ESTALE;
+
+ kfree(ofh);
+ return err;
+}
+
+/*
+ * Verify that an inode matches the origin file handle stored in upper inode.
+ *
+ * If @set is true and there is no stored file handle, encode and store origin
+ * file handle in OVL_XATTR_ORIGIN.
+ *
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
+ struct dentry *origin, bool is_upper, bool set)
+{
+ struct inode *inode;
+ struct ovl_fh *fh;
+ int err;
+
+ fh = ovl_encode_fh(origin, is_upper);
+ err = PTR_ERR(fh);
+ if (IS_ERR(fh))
+ goto fail;
+
+ err = ovl_verify_origin_fh(dentry, fh);
+ if (set && err == -ENODATA)
+ err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
+ if (err)
+ goto fail;
+
+out:
+ kfree(fh);
+ return err;
+
+fail:
+ inode = d_inode(origin);
+ pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
+ origin, inode ? inode->i_ino : 0, err);
+ goto out;
+}
+
+/*
+ * Verify that an index entry name matches the origin file handle stored in
+ * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
+ * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
+ */
+int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+ unsigned int numlower)
+{
+ struct ovl_fh *fh = NULL;
+ size_t len;
+ struct path origin = { };
+ struct path *stack = &origin;
+ unsigned int ctr = 0;
+ int err;
+
+ if (!d_inode(index))
+ return 0;
+
+ /*
+ * Directory index entries are going to be used for looking up
+ * redirected upper dirs by lower dir fh when decoding an overlay
+ * file handle of a merge dir. Whiteout index entries are going to be
+ * used as an indication that an exported overlay file handle should
+ * be treated as stale (i.e. after unlink of the overlay inode).
+ * We don't know the verification rules for directory and whiteout
+ * index entries, because they have not been implemented yet, so return
+ * EROFS if those entries are found to avoid corrupting an index that
+ * was created by a newer kernel.
+ */
+ err = -EROFS;
+ if (d_is_dir(index) || ovl_is_whiteout(index))
+ goto fail;
+
+ err = -EINVAL;
+ if (index->d_name.len < sizeof(struct ovl_fh)*2)
+ goto fail;
+
+ err = -ENOMEM;
+ len = index->d_name.len / 2;
+ fh = kzalloc(len, GFP_TEMPORARY);
+ if (!fh)
+ goto fail;
+
+ err = -EINVAL;
+ if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
+ goto fail;
+
+ err = ovl_verify_origin_fh(index, fh);
+ if (err)
+ goto fail;
+
+ err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
+ if (!err && !ctr)
+ err = -ESTALE;
+ if (err)
+ goto fail;
+
+ /* Check if index is orphan and don't warn before cleaning it */
+ if (d_inode(index)->i_nlink == 1 &&
+ ovl_get_nlink(index, origin.dentry, 0) == 0)
+ err = -ENOENT;
+
+ dput(origin.dentry);
+out:
+ kfree(fh);
+ return err;
+
+fail:
+ pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n",
+ index, d_inode(index)->i_mode & S_IFMT, err);
+ goto out;
+}
+
+/*
+ * Lookup in indexdir for the index entry of a lower real inode or a copy up
+ * origin inode. The index entry name is the hex representation of the lower
+ * inode file handle.
+ *
+ * If the index dentry in negative, then either no lower aliases have been
+ * copied up yet, or aliases have been copied up in older kernels and are
+ * not indexed.
+ *
+ * If the index dentry for a copy up origin inode is positive, but points
+ * to an inode different than the upper inode, then either the upper inode
+ * has been copied up and not indexed or it was indexed, but since then
+ * index dir was cleared. Either way, that index cannot be used to indentify
+ * the overlay inode.
+ */
+int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+{
+ int err;
+ struct ovl_fh *fh;
+ char *n, *s;
+
+ fh = ovl_encode_fh(origin, false);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+
+ err = -ENOMEM;
+ n = kzalloc(fh->len * 2, GFP_TEMPORARY);
+ if (n) {
+ s = bin2hex(n, fh, fh->len);
+ *name = (struct qstr) QSTR_INIT(n, s - n);
+ err = 0;
+ }
+ kfree(fh);
+
+ return err;
+
+}
+
+static struct dentry *ovl_lookup_index(struct dentry *dentry,
+ struct dentry *upper,
+ struct dentry *origin)
+{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+ struct dentry *index;
+ struct inode *inode;
+ struct qstr name;
+ int err;
+
+ err = ovl_get_index_name(origin, &name);
+ if (err)
+ return ERR_PTR(err);
+
+ index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ if (IS_ERR(index)) {
+ pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
+ "overlayfs: mount with '-o index=off' to disable inodes index.\n",
+ d_inode(origin)->i_ino, name.len, name.name,
+ err);
+ goto out;
+ }
+
+ inode = d_inode(index);
+ if (d_is_negative(index)) {
+ if (upper && d_inode(origin)->i_nlink > 1) {
+ pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n",
+ d_inode(origin)->i_ino);
+ goto fail;
+ }
+
+ dput(index);
+ index = NULL;
+ } else if (upper && d_inode(upper) != inode) {
+ pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n",
+ index, inode->i_ino, d_inode(upper)->i_ino);
+ goto fail;
+ } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
+ ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
+ /*
+ * Index should always be of the same file type as origin
+ * except for the case of a whiteout index. A whiteout
+ * index should only exist if all lower aliases have been
+ * unlinked, which means that finding a lower origin on lookup
+ * whose index is a whiteout should be treated as an error.
+ */
+ pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
+ index, d_inode(index)->i_mode & S_IFMT,
+ d_inode(origin)->i_mode & S_IFMT);
+ goto fail;
+ }
+
+out:
+ kfree(name.name);
+ return index;
+
+fail:
+ dput(index);
+ index = ERR_PTR(-EIO);
+ goto out;
+}
+
/*
* Returns next layer in stack starting from top.
* Returns -1 if this is the last layer.
@@ -219,8 +580,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
const struct cred *old_cred;
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
+ struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
struct path *stack = NULL;
struct dentry *upperdir, *upperdentry = NULL;
+ struct dentry *index = NULL;
unsigned int ctr = 0;
struct inode *inode = NULL;
bool upperopaque = false;
@@ -241,7 +604,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENAMETOOLONG);
old_cred = ovl_override_creds(dentry->d_sb);
- upperdir = ovl_upperdentry_dereference(poe);
+ upperdir = ovl_dentry_upper(dentry->d_parent);
if (upperdir) {
err = ovl_lookup_layer(upperdir, &d, &upperdentry);
if (err)
@@ -252,13 +615,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
err = -EREMOTE;
goto out;
}
+ if (upperdentry && !d.is_dir) {
+ BUG_ON(!d.stop || d.redirect);
+ /*
+ * Lookup copy up origin by decoding origin file handle.
+ * We may get a disconnected dentry, which is fine,
+ * because we only need to hold the origin inode in
+ * cache and use its inode number. We may even get a
+ * connected dentry, that is not under any of the lower
+ * layers root. That is also fine for using it's inode
+ * number - it's the same as if we held a reference
+ * to a dentry in lower layer that was moved under us.
+ */
+ err = ovl_check_origin(upperdentry, roe->lowerstack,
+ roe->numlower, &stack, &ctr);
+ if (err)
+ goto out;
+ }
if (d.redirect) {
upperredirect = kstrdup(d.redirect, GFP_KERNEL);
if (!upperredirect)
goto out_put_upper;
if (d.redirect[0] == '/')
- poe = dentry->d_sb->s_root->d_fsdata;
+ poe = roe;
}
upperopaque = d.opaque;
}
@@ -289,10 +669,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
if (d.stop)
break;
- if (d.redirect &&
- d.redirect[0] == '/' &&
- poe != dentry->d_sb->s_root->d_fsdata) {
- poe = dentry->d_sb->s_root->d_fsdata;
+ if (d.redirect && d.redirect[0] == '/' && poe != roe) {
+ poe = roe;
/* Find the current layer on the root dentry */
for (i = 0; i < poe->numlower; i++)
@@ -303,47 +681,56 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
}
}
+ /* Lookup index by lower inode and verify it matches upper inode */
+ if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
+ struct dentry *origin = stack[0].dentry;
+
+ index = ovl_lookup_index(dentry, upperdentry, origin);
+ if (IS_ERR(index)) {
+ err = PTR_ERR(index);
+ index = NULL;
+ goto out_put;
+ }
+ }
+
oe = ovl_alloc_entry(ctr);
err = -ENOMEM;
if (!oe)
goto out_put;
- if (upperdentry || ctr) {
- struct dentry *realdentry;
- struct inode *realinode;
+ oe->opaque = upperopaque;
+ memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+ dentry->d_fsdata = oe;
- realdentry = upperdentry ? upperdentry : stack[0].dentry;
- realinode = d_inode(realdentry);
+ if (upperdentry)
+ ovl_dentry_set_upper_alias(dentry);
+ else if (index)
+ upperdentry = dget(index);
- err = -ENOMEM;
- if (upperdentry && !d_is_dir(upperdentry)) {
- inode = ovl_get_inode(dentry->d_sb, realinode);
- } else {
- inode = ovl_new_inode(dentry->d_sb, realinode->i_mode,
- realinode->i_rdev);
- if (inode)
- ovl_inode_init(inode, realinode, !!upperdentry);
- }
- if (!inode)
+ if (upperdentry || ctr) {
+ inode = ovl_get_inode(dentry, upperdentry);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
goto out_free_oe;
- ovl_copyattr(realdentry->d_inode, inode);
+
+ OVL_I(inode)->redirect = upperredirect;
+ if (index)
+ ovl_set_flag(OVL_INDEX, inode);
}
revert_creds(old_cred);
- oe->opaque = upperopaque;
- oe->redirect = upperredirect;
- oe->__upperdentry = upperdentry;
- memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+ dput(index);
kfree(stack);
kfree(d.redirect);
- dentry->d_fsdata = oe;
d_add(dentry, inode);
return NULL;
out_free_oe:
+ dentry->d_fsdata = NULL;
kfree(oe);
out_put:
+ dput(index);
for (i = 0; i < ctr; i++)
dput(stack[i].dentry);
kfree(stack);
@@ -373,7 +760,7 @@ bool ovl_lower_positive(struct dentry *dentry)
return oe->opaque;
/* Negative upper -> positive lower */
- if (!oe->__upperdentry)
+ if (!ovl_dentry_upper(dentry))
return true;
/* Positive upper -> have to look up lower to see whether it exists */
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 8af450b0e57a..e927a62c97ae 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -8,20 +8,66 @@
*/
#include <linux/kernel.h>
+#include <linux/uuid.h>
enum ovl_path_type {
__OVL_PATH_UPPER = (1 << 0),
__OVL_PATH_MERGE = (1 << 1),
+ __OVL_PATH_ORIGIN = (1 << 2),
};
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
+#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN)
#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
+#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
+#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
+#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
-#define OVL_ISUPPER_MASK 1UL
+enum ovl_flag {
+ OVL_IMPURE,
+ OVL_INDEX,
+};
+
+/*
+ * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
+ * where:
+ * origin.fh - exported file handle of the lower file
+ * origin.uuid - uuid of the lower filesystem
+ */
+#define OVL_FH_VERSION 0
+#define OVL_FH_MAGIC 0xfb
+
+/* CPU byte order required for fid decoding: */
+#define OVL_FH_FLAG_BIG_ENDIAN (1 << 0)
+#define OVL_FH_FLAG_ANY_ENDIAN (1 << 1)
+/* Is the real inode encoded in fid an upper inode? */
+#define OVL_FH_FLAG_PATH_UPPER (1 << 2)
+
+#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \
+ OVL_FH_FLAG_PATH_UPPER)
+
+#if defined(__LITTLE_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN 0
+#elif defined(__BIG_ENDIAN)
+#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN
+#else
+#error Endianness not defined
+#endif
+
+/* On-disk and in-memeory format for redirect by file handle */
+struct ovl_fh {
+ u8 version; /* 0 */
+ u8 magic; /* 0xfb */
+ u8 len; /* size of this header + size of fid */
+ u8 flags; /* OVL_FH_FLAG_* */
+ u8 type; /* fid_type of fid */
+ uuid_t uuid; /* uuid of filesystem */
+ u8 fid[0]; /* file identifier */
+} __packed;
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
@@ -127,14 +173,13 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
return err;
}
-static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
+static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
{
- unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
-
- if (is_upper)
- *is_upper = x & OVL_ISUPPER_MASK;
+ struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
+ int err = IS_ERR(ret) ? PTR_ERR(ret) : 0;
- return (struct inode *) (x & ~OVL_ISUPPER_MASK);
+ pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
+ return ret;
}
/* util.c */
@@ -142,6 +187,9 @@ int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
+struct super_block *ovl_same_sb(struct super_block *sb);
+bool ovl_can_decode_fh(struct super_block *sb);
+struct dentry *ovl_indexdir(struct super_block *sb);
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
bool ovl_dentry_remote(struct dentry *dentry);
bool ovl_dentry_weird(struct dentry *dentry);
@@ -152,25 +200,53 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
struct dentry *ovl_dentry_upper(struct dentry *dentry);
struct dentry *ovl_dentry_lower(struct dentry *dentry);
struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct dentry *ovl_i_dentry_upper(struct inode *inode);
+struct inode *ovl_inode_upper(struct inode *inode);
+struct inode *ovl_inode_lower(struct inode *inode);
+struct inode *ovl_inode_real(struct inode *inode);
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
bool ovl_dentry_is_opaque(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_upper_alias(struct dentry *dentry);
+void ovl_dentry_set_upper_alias(struct dentry *dentry);
bool ovl_redirect_dir(struct super_block *sb);
-void ovl_clear_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
-void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
-void ovl_inode_init(struct inode *inode, struct inode *realinode,
- bool is_upper);
-void ovl_inode_update(struct inode *inode, struct inode *upperinode);
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+ struct dentry *lowerdentry);
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
void ovl_dentry_version_inc(struct dentry *dentry);
u64 ovl_dentry_version_get(struct dentry *dentry);
bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
+int ovl_copy_up_start(struct dentry *dentry);
+void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+ const char *name, const void *value, size_t size,
+ int xerr);
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+void ovl_set_flag(unsigned long flag, struct inode *inode);
+bool ovl_test_flag(unsigned long flag, struct inode *inode);
+bool ovl_inuse_trylock(struct dentry *dentry);
+void ovl_inuse_unlock(struct dentry *dentry);
+int ovl_nlink_start(struct dentry *dentry, bool *locked);
+void ovl_nlink_end(struct dentry *dentry, bool locked);
+
+static inline bool ovl_is_impuredir(struct dentry *dentry)
+{
+ return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+}
+
/* namei.c */
+int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
+ struct dentry *origin, bool is_upper, bool set);
+int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+ unsigned int numlower);
+int ovl_get_index_name(struct dentry *origin, struct qstr *name);
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
bool ovl_lower_positive(struct dentry *dentry);
@@ -183,13 +259,22 @@ void ovl_cache_free(struct list_head *list);
int ovl_check_d_type_supported(struct path *realpath);
void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
struct dentry *dentry, int level);
+int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
+ struct path *lowerstack, unsigned int numlower);
/* inode.c */
+int ovl_set_nlink_upper(struct dentry *dentry);
+int ovl_set_nlink_lower(struct dentry *dentry);
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+ struct dentry *upperdentry,
+ unsigned int fallback);
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
+int ovl_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int flags);
int ovl_permission(struct inode *inode, int mask);
-int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags);
-int ovl_xattr_get(struct dentry *dentry, const char *name,
+int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
+ const void *value, size_t size, int flags);
+int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
struct posix_acl *ovl_get_acl(struct inode *inode, int type);
@@ -198,7 +283,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
bool ovl_is_private_xattr(const char *name);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode);
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry);
static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
@@ -211,7 +296,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
+struct dentry *ovl_lookup_temp(struct dentry *workdir);
struct cattr {
dev_t rdev;
umode_t mode;
@@ -220,10 +305,11 @@ struct cattr {
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
struct cattr *attr,
struct dentry *hardlink, bool debug);
-void ovl_cleanup(struct inode *dir, struct dentry *dentry);
+int ovl_cleanup(struct inode *dir, struct dentry *dentry);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_flags(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index d14bca1850d9..878a750986dd 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,7 @@ struct ovl_config {
char *workdir;
bool default_permissions;
bool redirect_dir;
+ bool index;
};
/* private information held for overlayfs's superblock */
@@ -21,22 +22,28 @@ struct ovl_fs {
struct vfsmount *upper_mnt;
unsigned numlower;
struct vfsmount **lower_mnt;
+ /* workbasedir is the path at workdir= mount option */
+ struct dentry *workbasedir;
+ /* workdir is the 'work' directory under workbasedir */
struct dentry *workdir;
+ /* index directory listing overlay inodes by origin file handle */
+ struct dentry *indexdir;
long namelen;
/* pathnames of lower and upper dirs, for show_options */
struct ovl_config config;
/* creds of process who forced instantiation of super block */
const struct cred *creator_cred;
+ bool tmpfile;
+ bool noxattr;
+ /* sb common to all layers */
+ struct super_block *same_sb;
};
/* private information held for every overlayfs dentry */
struct ovl_entry {
- struct dentry *__upperdentry;
- struct ovl_dir_cache *cache;
union {
struct {
- u64 version;
- const char *redirect;
+ unsigned long has_upper;
bool opaque;
};
struct rcu_head rcu;
@@ -47,7 +54,25 @@ struct ovl_entry {
struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
-static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
+struct ovl_inode {
+ struct ovl_dir_cache *cache;
+ const char *redirect;
+ u64 version;
+ unsigned long flags;
+ struct inode vfs_inode;
+ struct dentry *__upperdentry;
+ struct inode *lower;
+
+ /* synchronize copy up and more */
+ struct mutex lock;
+};
+
+static inline struct ovl_inode *OVL_I(struct inode *inode)
+{
+ return container_of(inode, struct ovl_inode, vfs_inode);
+}
+
+static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
{
- return lockless_dereference(oe->__upperdentry);
+ return lockless_dereference(oi->__upperdentry);
}
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index f241b4ee3d8a..3d424a51cabb 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -667,3 +667,56 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
ovl_cleanup(dir, dentry);
}
}
+
+int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
+ struct path *lowerstack, unsigned int numlower)
+{
+ int err;
+ struct inode *dir = dentry->d_inode;
+ struct path path = { .mnt = mnt, .dentry = dentry };
+ LIST_HEAD(list);
+ struct ovl_cache_entry *p;
+ struct ovl_readdir_data rdd = {
+ .ctx.actor = ovl_fill_merge,
+ .dentry = NULL,
+ .list = &list,
+ .root = RB_ROOT,
+ .is_lowest = false,
+ };
+
+ err = ovl_dir_read(&path, &rdd);
+ if (err)
+ goto out;
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ list_for_each_entry(p, &list, l_node) {
+ struct dentry *index;
+
+ if (p->name[0] == '.') {
+ if (p->len == 1)
+ continue;
+ if (p->len == 2 && p->name[1] == '.')
+ continue;
+ }
+ index = lookup_one_len(p->name, dentry, p->len);
+ if (IS_ERR(index)) {
+ err = PTR_ERR(index);
+ break;
+ }
+ err = ovl_verify_index(index, lowerstack, numlower);
+ if (err) {
+ if (err == -EROFS)
+ break;
+ err = ovl_cleanup(dir, index);
+ if (err)
+ break;
+ }
+ dput(index);
+ }
+ inode_unlock(dir);
+out:
+ ovl_cache_free(&list);
+ if (err)
+ pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
+ return err;
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 20f48abbb82f..d86e89f97201 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -7,6 +7,7 @@
* the Free Software Foundation.
*/
+#include <uapi/linux/magic.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
@@ -33,6 +34,11 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
MODULE_PARM_DESC(ovl_redirect_dir_def,
"Default to on or off for the redirect_dir feature");
+static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
+module_param_named(index, ovl_index_def, bool, 0644);
+MODULE_PARM_DESC(ovl_index_def,
+ "Default to on or off for the inodes index feature");
+
static void ovl_dentry_release(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
@@ -40,19 +46,34 @@ static void ovl_dentry_release(struct dentry *dentry)
if (oe) {
unsigned int i;
- dput(oe->__upperdentry);
- kfree(oe->redirect);
for (i = 0; i < oe->numlower; i++)
dput(oe->lowerstack[i].dentry);
kfree_rcu(oe, rcu);
}
}
+static int ovl_check_append_only(struct inode *inode, int flag)
+{
+ /*
+ * This test was moot in vfs may_open() because overlay inode does
+ * not have the S_APPEND flag, so re-check on real upper inode
+ */
+ if (IS_APPEND(inode)) {
+ if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
+ return -EPERM;
+ if (flag & O_TRUNC)
+ return -EPERM;
+ }
+
+ return 0;
+}
+
static struct dentry *ovl_d_real(struct dentry *dentry,
const struct inode *inode,
unsigned int open_flags)
{
struct dentry *real;
+ int err;
if (!d_is_reg(dentry)) {
if (!inode || inode == d_inode(dentry))
@@ -64,15 +85,20 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
return dentry;
if (open_flags) {
- int err = ovl_open_maybe_copy_up(dentry, open_flags);
-
+ err = ovl_open_maybe_copy_up(dentry, open_flags);
if (err)
return ERR_PTR(err);
}
real = ovl_dentry_upper(dentry);
- if (real && (!inode || inode == d_inode(real)))
+ if (real && (!inode || inode == d_inode(real))) {
+ if (!inode) {
+ err = ovl_check_append_only(d_inode(real), open_flags);
+ if (err)
+ return ERR_PTR(err);
+ }
return real;
+ }
real = ovl_dentry_lower(dentry);
if (!real)
@@ -142,12 +168,52 @@ static const struct dentry_operations ovl_reval_dentry_operations = {
.d_weak_revalidate = ovl_dentry_weak_revalidate,
};
+static struct kmem_cache *ovl_inode_cachep;
+
+static struct inode *ovl_alloc_inode(struct super_block *sb)
+{
+ struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+
+ oi->cache = NULL;
+ oi->redirect = NULL;
+ oi->version = 0;
+ oi->flags = 0;
+ oi->__upperdentry = NULL;
+ oi->lower = NULL;
+ mutex_init(&oi->lock);
+
+ return &oi->vfs_inode;
+}
+
+static void ovl_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+
+ kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
+}
+
+static void ovl_destroy_inode(struct inode *inode)
+{
+ struct ovl_inode *oi = OVL_I(inode);
+
+ dput(oi->__upperdentry);
+ kfree(oi->redirect);
+ mutex_destroy(&oi->lock);
+
+ call_rcu(&inode->i_rcu, ovl_i_callback);
+}
+
static void ovl_put_super(struct super_block *sb)
{
struct ovl_fs *ufs = sb->s_fs_info;
unsigned i;
+ dput(ufs->indexdir);
dput(ufs->workdir);
+ ovl_inuse_unlock(ufs->workbasedir);
+ dput(ufs->workbasedir);
+ if (ufs->upper_mnt)
+ ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
mntput(ufs->upper_mnt);
for (i = 0; i < ufs->numlower; i++)
mntput(ufs->lower_mnt[i]);
@@ -160,6 +226,25 @@ static void ovl_put_super(struct super_block *sb)
kfree(ufs);
}
+static int ovl_sync_fs(struct super_block *sb, int wait)
+{
+ struct ovl_fs *ufs = sb->s_fs_info;
+ struct super_block *upper_sb;
+ int ret;
+
+ if (!ufs->upper_mnt)
+ return 0;
+ upper_sb = ufs->upper_mnt->mnt_sb;
+ if (!upper_sb->s_op->sync_fs)
+ return 0;
+
+ /* real inodes have already been synced by sync_filesystem(ovl_sb) */
+ down_read(&upper_sb->s_umount);
+ ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+ up_read(&upper_sb->s_umount);
+ return ret;
+}
+
/**
* ovl_statfs
* @sb: The overlayfs super block
@@ -186,6 +271,12 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
return err;
}
+/* Will this overlay be forced to mount/remount ro? */
+static bool ovl_force_readonly(struct ovl_fs *ufs)
+{
+ return (!ufs->upper_mnt || !ufs->workdir);
+}
+
/**
* ovl_show_options
*
@@ -207,6 +298,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
if (ufs->config.redirect_dir != ovl_redirect_dir_def)
seq_printf(m, ",redirect_dir=%s",
ufs->config.redirect_dir ? "on" : "off");
+ if (ufs->config.index != ovl_index_def)
+ seq_printf(m, ",index=%s",
+ ufs->config.index ? "on" : "off");
return 0;
}
@@ -214,18 +308,21 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
struct ovl_fs *ufs = sb->s_fs_info;
- if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
+ if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs))
return -EROFS;
return 0;
}
static const struct super_operations ovl_super_operations = {
+ .alloc_inode = ovl_alloc_inode,
+ .destroy_inode = ovl_destroy_inode,
+ .drop_inode = generic_delete_inode,
.put_super = ovl_put_super,
+ .sync_fs = ovl_sync_fs,
.statfs = ovl_statfs,
.show_options = ovl_show_options,
.remount_fs = ovl_remount,
- .drop_inode = generic_delete_inode,
};
enum {
@@ -235,6 +332,8 @@ enum {
OPT_DEFAULT_PERMISSIONS,
OPT_REDIRECT_DIR_ON,
OPT_REDIRECT_DIR_OFF,
+ OPT_INDEX_ON,
+ OPT_INDEX_OFF,
OPT_ERR,
};
@@ -245,6 +344,8 @@ static const match_table_t ovl_tokens = {
{OPT_DEFAULT_PERMISSIONS, "default_permissions"},
{OPT_REDIRECT_DIR_ON, "redirect_dir=on"},
{OPT_REDIRECT_DIR_OFF, "redirect_dir=off"},
+ {OPT_INDEX_ON, "index=on"},
+ {OPT_INDEX_OFF, "index=off"},
{OPT_ERR, NULL}
};
@@ -317,6 +418,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->redirect_dir = false;
break;
+ case OPT_INDEX_ON:
+ config->index = true;
+ break;
+
+ case OPT_INDEX_OFF:
+ config->index = false;
+ break;
+
default:
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
@@ -335,23 +444,29 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
}
#define OVL_WORKDIR_NAME "work"
+#define OVL_INDEXDIR_NAME "index"
-static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
- struct dentry *dentry)
+static struct dentry *ovl_workdir_create(struct super_block *sb,
+ struct ovl_fs *ufs,
+ struct dentry *dentry,
+ const char *name, bool persist)
{
struct inode *dir = dentry->d_inode;
+ struct vfsmount *mnt = ufs->upper_mnt;
struct dentry *work;
int err;
bool retried = false;
+ bool locked = false;
err = mnt_want_write(mnt);
if (err)
- return ERR_PTR(err);
+ goto out_err;
inode_lock_nested(dir, I_MUTEX_PARENT);
+ locked = true;
+
retry:
- work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
- strlen(OVL_WORKDIR_NAME));
+ work = lookup_one_len(name, dentry, strlen(name));
if (!IS_ERR(work)) {
struct iattr attr = {
@@ -364,6 +479,9 @@ retry:
if (retried)
goto out_dput;
+ if (persist)
+ goto out_unlock;
+
retried = true;
ovl_workdir_cleanup(dir, mnt, work, 0);
dput(work);
@@ -403,16 +521,24 @@ retry:
inode_unlock(work->d_inode);
if (err)
goto out_dput;
+ } else {
+ err = PTR_ERR(work);
+ goto out_err;
}
out_unlock:
- inode_unlock(dir);
mnt_drop_write(mnt);
+ if (locked)
+ inode_unlock(dir);
return work;
out_dput:
dput(work);
- work = ERR_PTR(err);
+out_err:
+ pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+ ufs->config.workdir, name, -err);
+ sb->s_flags |= MS_RDONLY;
+ work = NULL;
goto out_unlock;
}
@@ -512,6 +638,15 @@ static int ovl_lower_dir(const char *name, struct path *path,
if (ovl_dentry_remote(path->dentry))
*remote = true;
+ /*
+ * The inodes index feature needs to encode and decode file
+ * handles, so it requires that all layers support them.
+ */
+ if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) {
+ ofs->config.index = false;
+ pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
+ }
+
return 0;
out_put:
@@ -557,7 +692,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
- return ovl_xattr_get(dentry, handler->name, buffer, size);
+ return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
}
static int __maybe_unused
@@ -567,7 +702,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
size_t size, int flags)
{
struct dentry *workdir = ovl_workdir(dentry);
- struct inode *realinode = ovl_inode_real(inode, NULL);
+ struct inode *realinode = ovl_inode_real(inode);
struct posix_acl *acl = NULL;
int err;
@@ -607,9 +742,9 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
return err;
}
- err = ovl_xattr_set(dentry, handler->name, value, size, flags);
+ err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
if (!err)
- ovl_copyattr(ovl_inode_real(inode, NULL), inode);
+ ovl_copyattr(ovl_inode_real(inode), inode);
return err;
@@ -637,7 +772,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
- return ovl_xattr_get(dentry, name, buffer, size);
+ return ovl_xattr_get(dentry, inode, name, buffer, size);
}
static int ovl_other_xattr_set(const struct xattr_handler *handler,
@@ -645,7 +780,7 @@ static int ovl_other_xattr_set(const struct xattr_handler *handler,
const char *name, const void *value,
size_t size, int flags)
{
- return ovl_xattr_set(dentry, name, value, size, flags);
+ return ovl_xattr_set(dentry, inode, name, value, size, flags);
}
static const struct xattr_handler __maybe_unused
@@ -688,10 +823,9 @@ static const struct xattr_handler *ovl_xattr_handlers[] = {
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
- struct path upperpath = { NULL, NULL };
- struct path workpath = { NULL, NULL };
+ struct path upperpath = { };
+ struct path workpath = { };
struct dentry *root_dentry;
- struct inode *realinode;
struct ovl_entry *oe;
struct ovl_fs *ufs;
struct path *stack = NULL;
@@ -701,6 +835,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
unsigned int stacklen = 0;
unsigned int i;
bool remote = false;
+ struct cred *cred;
int err;
err = -ENOMEM;
@@ -709,6 +844,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
goto out;
ufs->config.redirect_dir = ovl_redirect_dir_def;
+ ufs->config.index = ovl_index_def;
err = ovl_parse_opt((char *) data, &ufs->config);
if (err)
goto out_free_config;
@@ -743,9 +879,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto out_put_upperpath;
+ err = -EBUSY;
+ if (!ovl_inuse_trylock(upperpath.dentry)) {
+ pr_err("overlayfs: upperdir is in-use by another mount\n");
+ goto out_put_upperpath;
+ }
+
err = ovl_mount_dir(ufs->config.workdir, &workpath);
if (err)
- goto out_put_upperpath;
+ goto out_unlock_upperdentry;
err = -EINVAL;
if (upperpath.mnt != workpath.mnt) {
@@ -756,12 +898,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
goto out_put_workpath;
}
+
+ err = -EBUSY;
+ if (!ovl_inuse_trylock(workpath.dentry)) {
+ pr_err("overlayfs: workdir is in-use by another mount\n");
+ goto out_put_workpath;
+ }
+
+ ufs->workbasedir = workpath.dentry;
sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
}
err = -ENOMEM;
lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
if (!lowertmp)
- goto out_put_workpath;
+ goto out_unlock_workdentry;
err = -EINVAL;
stacklen = ovl_split_lowerdirs(lowertmp);
@@ -804,20 +954,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
pr_err("overlayfs: failed to clone upperpath\n");
goto out_put_lowerpath;
}
+
/* Don't inherit atime flags */
ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran;
- ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
- err = PTR_ERR(ufs->workdir);
- if (IS_ERR(ufs->workdir)) {
- pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
- ufs->config.workdir, OVL_WORKDIR_NAME, -err);
- sb->s_flags |= MS_RDONLY;
- ufs->workdir = NULL;
- }
-
+ ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry,
+ OVL_WORKDIR_NAME, false);
/*
* Upper should support d_type, else whiteouts are visible.
* Given workdir and upper are on same fs, we can do
@@ -825,6 +969,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
* creation of workdir in previous step.
*/
if (ufs->workdir) {
+ struct dentry *temp;
+
err = ovl_check_d_type_supported(&workpath);
if (err < 0)
goto out_put_workdir;
@@ -836,6 +982,34 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
*/
if (!err)
pr_warn("overlayfs: upper fs needs to support d_type.\n");
+
+ /* Check if upper/work fs supports O_TMPFILE */
+ temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0);
+ ufs->tmpfile = !IS_ERR(temp);
+ if (ufs->tmpfile)
+ dput(temp);
+ else
+ pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+
+ /*
+ * Check if upper/work fs supports trusted.overlay.*
+ * xattr
+ */
+ err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE,
+ "0", 1, 0);
+ if (err) {
+ ufs->noxattr = true;
+ pr_warn("overlayfs: upper fs does not support xattr.\n");
+ } else {
+ vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
+ }
+
+ /* Check if upper/work fs supports file handles */
+ if (ufs->config.index &&
+ !ovl_can_decode_fh(ufs->workdir->d_sb)) {
+ ufs->config.index = false;
+ pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+ }
}
}
@@ -859,20 +1033,66 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ufs->lower_mnt[ufs->numlower] = mnt;
ufs->numlower++;
+
+ /* Check if all lower layers are on same sb */
+ if (i == 0)
+ ufs->same_sb = mnt->mnt_sb;
+ else if (ufs->same_sb != mnt->mnt_sb)
+ ufs->same_sb = NULL;
}
/* If the upper fs is nonexistent, we mark overlayfs r/o too */
if (!ufs->upper_mnt)
sb->s_flags |= MS_RDONLY;
+ else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
+ ufs->same_sb = NULL;
+
+ if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
+ /* Verify lower root is upper root origin */
+ err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0],
+ stack[0].dentry, false, true);
+ if (err) {
+ pr_err("overlayfs: failed to verify upper root origin\n");
+ goto out_put_lower_mnt;
+ }
+
+ ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
+ OVL_INDEXDIR_NAME, true);
+ if (ufs->indexdir) {
+ /* Verify upper root is index dir origin */
+ err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt,
+ upperpath.dentry, true, true);
+ if (err)
+ pr_err("overlayfs: failed to verify index dir origin\n");
+
+ /* Cleanup bad/stale/orphan index entries */
+ if (!err)
+ err = ovl_indexdir_cleanup(ufs->indexdir,
+ ufs->upper_mnt,
+ stack, numlower);
+ }
+ if (err || !ufs->indexdir)
+ pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+ if (err)
+ goto out_put_indexdir;
+ }
+
+ /* Show index=off/on in /proc/mounts for any of the reasons above */
+ if (!ufs->indexdir)
+ ufs->config.index = false;
if (remote)
sb->s_d_op = &ovl_reval_dentry_operations;
else
sb->s_d_op = &ovl_dentry_operations;
- ufs->creator_cred = prepare_creds();
- if (!ufs->creator_cred)
- goto out_put_lower_mnt;
+ err = -ENOMEM;
+ ufs->creator_cred = cred = prepare_creds();
+ if (!cred)
+ goto out_put_indexdir;
+
+ /* Never override disk quota limits or use reserved space */
+ cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
err = -ENOMEM;
oe = ovl_alloc_entry(numlower);
@@ -892,10 +1112,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
mntput(upperpath.mnt);
for (i = 0; i < numlower; i++)
mntput(stack[i].mnt);
- path_put(&workpath);
+ mntput(workpath.mnt);
kfree(lowertmp);
- oe->__upperdentry = upperpath.dentry;
+ if (upperpath.dentry) {
+ oe->has_upper = true;
+ if (ovl_is_impuredir(upperpath.dentry))
+ ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
+ }
for (i = 0; i < numlower; i++) {
oe->lowerstack[i].dentry = stack[i].dentry;
oe->lowerstack[i].mnt = ufs->lower_mnt[i];
@@ -904,9 +1128,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
root_dentry->d_fsdata = oe;
- realinode = d_inode(ovl_dentry_real(root_dentry));
- ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
- ovl_copyattr(realinode, d_inode(root_dentry));
+ ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
+ ovl_dentry_lower(root_dentry));
sb->s_root = root_dentry;
@@ -916,6 +1139,8 @@ out_free_oe:
kfree(oe);
out_put_cred:
put_cred(ufs->creator_cred);
+out_put_indexdir:
+ dput(ufs->indexdir);
out_put_lower_mnt:
for (i = 0; i < ufs->numlower; i++)
mntput(ufs->lower_mnt[i]);
@@ -929,8 +1154,12 @@ out_put_lowerpath:
kfree(stack);
out_free_lowertmp:
kfree(lowertmp);
+out_unlock_workdentry:
+ ovl_inuse_unlock(workpath.dentry);
out_put_workpath:
path_put(&workpath);
+out_unlock_upperdentry:
+ ovl_inuse_unlock(upperpath.dentry);
out_put_upperpath:
path_put(&upperpath);
out_free_config:
@@ -956,14 +1185,43 @@ static struct file_system_type ovl_fs_type = {
};
MODULE_ALIAS_FS("overlay");
+static void ovl_inode_init_once(void *foo)
+{
+ struct ovl_inode *oi = foo;
+
+ inode_init_once(&oi->vfs_inode);
+}
+
static int __init ovl_init(void)
{
- return register_filesystem(&ovl_fs_type);
+ int err;
+
+ ovl_inode_cachep = kmem_cache_create("ovl_inode",
+ sizeof(struct ovl_inode), 0,
+ (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ ovl_inode_init_once);
+ if (ovl_inode_cachep == NULL)
+ return -ENOMEM;
+
+ err = register_filesystem(&ovl_fs_type);
+ if (err)
+ kmem_cache_destroy(ovl_inode_cachep);
+
+ return err;
}
static void __exit ovl_exit(void)
{
unregister_filesystem(&ovl_fs_type);
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(ovl_inode_cachep);
+
}
module_init(ovl_init);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 952286f4826c..f46ad75dc96a 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -10,7 +10,12 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/slab.h>
+#include <linux/cred.h>
#include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/uuid.h>
+#include <linux/namei.h>
+#include <linux/ratelimit.h>
#include "overlayfs.h"
#include "ovl_entry.h"
@@ -39,6 +44,26 @@ const struct cred *ovl_override_creds(struct super_block *sb)
return override_creds(ofs->creator_cred);
}
+struct super_block *ovl_same_sb(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ return ofs->same_sb;
+}
+
+bool ovl_can_decode_fh(struct super_block *sb)
+{
+ return (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
+ !uuid_is_null(&sb->s_uuid));
+}
+
+struct dentry *ovl_indexdir(struct super_block *sb)
+{
+ struct ovl_fs *ofs = sb->s_fs_info;
+
+ return ofs->indexdir;
+}
+
struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
{
size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@ -70,15 +95,17 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
struct ovl_entry *oe = dentry->d_fsdata;
enum ovl_path_type type = 0;
- if (oe->__upperdentry) {
+ if (ovl_dentry_upper(dentry)) {
type = __OVL_PATH_UPPER;
/*
- * Non-dir dentry can hold lower dentry from previous
- * location.
+ * Non-dir dentry can hold lower dentry of its copy up origin.
*/
- if (oe->numlower && d_is_dir(dentry))
- type |= __OVL_PATH_MERGE;
+ if (oe->numlower) {
+ type |= __OVL_PATH_ORIGIN;
+ if (d_is_dir(dentry))
+ type |= __OVL_PATH_MERGE;
+ }
} else {
if (oe->numlower > 1)
type |= __OVL_PATH_MERGE;
@@ -89,17 +116,16 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
void ovl_path_upper(struct dentry *dentry, struct path *path)
{
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
- struct ovl_entry *oe = dentry->d_fsdata;
path->mnt = ofs->upper_mnt;
- path->dentry = ovl_upperdentry_dereference(oe);
+ path->dentry = ovl_dentry_upper(dentry);
}
void ovl_path_lower(struct dentry *dentry, struct path *path)
{
struct ovl_entry *oe = dentry->d_fsdata;
- *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL };
+ *path = oe->numlower ? oe->lowerstack[0] : (struct path) { };
}
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
@@ -116,47 +142,52 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
struct dentry *ovl_dentry_upper(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- return ovl_upperdentry_dereference(oe);
+ return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
}
-static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe)
+struct dentry *ovl_dentry_lower(struct dentry *dentry)
{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
return oe->numlower ? oe->lowerstack[0].dentry : NULL;
}
-struct dentry *ovl_dentry_lower(struct dentry *dentry)
+struct dentry *ovl_dentry_real(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
+}
- return __ovl_dentry_lower(oe);
+struct dentry *ovl_i_dentry_upper(struct inode *inode)
+{
+ return ovl_upperdentry_dereference(OVL_I(inode));
}
-struct dentry *ovl_dentry_real(struct dentry *dentry)
+struct inode *ovl_inode_upper(struct inode *inode)
{
- struct ovl_entry *oe = dentry->d_fsdata;
- struct dentry *realdentry;
+ struct dentry *upperdentry = ovl_i_dentry_upper(inode);
+
+ return upperdentry ? d_inode(upperdentry) : NULL;
+}
- realdentry = ovl_upperdentry_dereference(oe);
- if (!realdentry)
- realdentry = __ovl_dentry_lower(oe);
+struct inode *ovl_inode_lower(struct inode *inode)
+{
+ return OVL_I(inode)->lower;
+}
- return realdentry;
+struct inode *ovl_inode_real(struct inode *inode)
+{
+ return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
}
+
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- return oe->cache;
+ return OVL_I(d_inode(dentry))->cache;
}
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- oe->cache = cache;
+ OVL_I(d_inode(dentry))->cache = cache;
}
bool ovl_dentry_is_opaque(struct dentry *dentry)
@@ -177,79 +208,87 @@ void ovl_dentry_set_opaque(struct dentry *dentry)
oe->opaque = true;
}
-bool ovl_redirect_dir(struct super_block *sb)
+/*
+ * For hard links it's possible for ovl_dentry_upper() to return positive, while
+ * there's no actual upper alias for the inode. Copy up code needs to know
+ * about the existence of the upper alias, so it can't use ovl_dentry_upper().
+ */
+bool ovl_dentry_has_upper_alias(struct dentry *dentry)
{
- struct ovl_fs *ofs = sb->s_fs_info;
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->has_upper;
+}
+
+void ovl_dentry_set_upper_alias(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
- return ofs->config.redirect_dir;
+ oe->has_upper = true;
}
-void ovl_clear_redirect_dir(struct super_block *sb)
+bool ovl_redirect_dir(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
- ofs->config.redirect_dir = false;
+ return ofs->config.redirect_dir && !ofs->noxattr;
}
const char *ovl_dentry_get_redirect(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
-
- return oe->redirect;
+ return OVL_I(d_inode(dentry))->redirect;
}
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct ovl_inode *oi = OVL_I(d_inode(dentry));
- kfree(oe->redirect);
- oe->redirect = redirect;
+ kfree(oi->redirect);
+ oi->redirect = redirect;
}
-void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+ struct dentry *lowerdentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ if (upperdentry)
+ OVL_I(inode)->__upperdentry = upperdentry;
+ if (lowerdentry)
+ OVL_I(inode)->lower = d_inode(lowerdentry);
- WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
- WARN_ON(oe->__upperdentry);
- /*
- * Make sure upperdentry is consistent before making it visible to
- * ovl_upperdentry_dereference().
- */
- smp_wmb();
- oe->__upperdentry = upperdentry;
+ ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
}
-void ovl_inode_init(struct inode *inode, struct inode *realinode, bool is_upper)
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
{
- WRITE_ONCE(inode->i_private, (unsigned long) realinode |
- (is_upper ? OVL_ISUPPER_MASK : 0));
-}
+ struct inode *upperinode = d_inode(upperdentry);
-void ovl_inode_update(struct inode *inode, struct inode *upperinode)
-{
- WARN_ON(!upperinode);
- WARN_ON(!inode_unhashed(inode));
- WRITE_ONCE(inode->i_private,
- (unsigned long) upperinode | OVL_ISUPPER_MASK);
- if (!S_ISDIR(upperinode->i_mode))
+ WARN_ON(OVL_I(inode)->__upperdentry);
+
+ /*
+ * Make sure upperdentry is consistent before making it visible
+ */
+ smp_wmb();
+ OVL_I(inode)->__upperdentry = upperdentry;
+ if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) {
+ inode->i_private = upperinode;
__insert_inode_hash(inode, (unsigned long) upperinode);
+ }
}
void ovl_dentry_version_inc(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct inode *inode = d_inode(dentry);
- WARN_ON(!inode_is_locked(dentry->d_inode));
- oe->version++;
+ WARN_ON(!inode_is_locked(inode));
+ OVL_I(inode)->version++;
}
u64 ovl_dentry_version_get(struct dentry *dentry)
{
- struct ovl_entry *oe = dentry->d_fsdata;
+ struct inode *inode = d_inode(dentry);
- WARN_ON(!inode_is_locked(dentry->d_inode));
- return oe->version;
+ WARN_ON(!inode_is_locked(inode));
+ return OVL_I(inode)->version;
}
bool ovl_is_whiteout(struct dentry *dentry)
@@ -263,3 +302,246 @@ struct file *ovl_path_open(struct path *path, int flags)
{
return dentry_open(path, flags | O_NOATIME, current_cred());
}
+
+int ovl_copy_up_start(struct dentry *dentry)
+{
+ struct ovl_inode *oi = OVL_I(d_inode(dentry));
+ int err;
+
+ err = mutex_lock_interruptible(&oi->lock);
+ if (!err && ovl_dentry_has_upper_alias(dentry)) {
+ err = 1; /* Already copied up */
+ mutex_unlock(&oi->lock);
+ }
+
+ return err;
+}
+
+void ovl_copy_up_end(struct dentry *dentry)
+{
+ mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+}
+
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+{
+ int res;
+ char val;
+
+ if (!d_is_dir(dentry))
+ return false;
+
+ res = vfs_getxattr(dentry, name, &val, 1);
+ if (res == 1 && val == 'y')
+ return true;
+
+ return false;
+}
+
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+ const char *name, const void *value, size_t size,
+ int xerr)
+{
+ int err;
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+ if (ofs->noxattr)
+ return xerr;
+
+ err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+
+ if (err == -EOPNOTSUPP) {
+ pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+ ofs->noxattr = true;
+ return xerr;
+ }
+
+ return err;
+}
+
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
+{
+ int err;
+
+ if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
+ return 0;
+
+ /*
+ * Do not fail when upper doesn't support xattrs.
+ * Upper inodes won't have origin nor redirect xattr anyway.
+ */
+ err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
+ "y", 1, 0);
+ if (!err)
+ ovl_set_flag(OVL_IMPURE, d_inode(dentry));
+
+ return err;
+}
+
+void ovl_set_flag(unsigned long flag, struct inode *inode)
+{
+ set_bit(flag, &OVL_I(inode)->flags);
+}
+
+bool ovl_test_flag(unsigned long flag, struct inode *inode)
+{
+ return test_bit(flag, &OVL_I(inode)->flags);
+}
+
+/**
+ * Caller must hold a reference to inode to prevent it from being freed while
+ * it is marked inuse.
+ */
+bool ovl_inuse_trylock(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ bool locked = false;
+
+ spin_lock(&inode->i_lock);
+ if (!(inode->i_state & I_OVL_INUSE)) {
+ inode->i_state |= I_OVL_INUSE;
+ locked = true;
+ }
+ spin_unlock(&inode->i_lock);
+
+ return locked;
+}
+
+void ovl_inuse_unlock(struct dentry *dentry)
+{
+ if (dentry) {
+ struct inode *inode = d_inode(dentry);
+
+ spin_lock(&inode->i_lock);
+ WARN_ON(!(inode->i_state & I_OVL_INUSE));
+ inode->i_state &= ~I_OVL_INUSE;
+ spin_unlock(&inode->i_lock);
+ }
+}
+
+/* Called must hold OVL_I(inode)->oi_lock */
+static void ovl_cleanup_index(struct dentry *dentry)
+{
+ struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
+ struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+ struct dentry *upperdentry = ovl_dentry_upper(dentry);
+ struct dentry *index = NULL;
+ struct inode *inode;
+ struct qstr name;
+ int err;
+
+ err = ovl_get_index_name(lowerdentry, &name);
+ if (err)
+ goto fail;
+
+ inode = d_inode(upperdentry);
+ if (inode->i_nlink != 1) {
+ pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+ upperdentry, inode->i_ino, inode->i_nlink);
+ /*
+ * We either have a bug with persistent union nlink or a lower
+ * hardlink was added while overlay is mounted. Adding a lower
+ * hardlink and then unlinking all overlay hardlinks would drop
+ * overlay nlink to zero before all upper inodes are unlinked.
+ * As a safety measure, when that situation is detected, set
+ * the overlay nlink to the index inode nlink minus one for the
+ * index entry itself.
+ */
+ set_nlink(d_inode(dentry), inode->i_nlink - 1);
+ ovl_set_nlink_upper(dentry);
+ goto out;
+ }
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ /* TODO: whiteout instead of cleanup to block future open by handle */
+ index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len);
+ err = PTR_ERR(index);
+ if (!IS_ERR(index))
+ err = ovl_cleanup(dir, index);
+ inode_unlock(dir);
+ if (err)
+ goto fail;
+
+out:
+ dput(index);
+ return;
+
+fail:
+ pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
+ goto out;
+}
+
+/*
+ * Operations that change overlay inode and upper inode nlink need to be
+ * synchronized with copy up for persistent nlink accounting.
+ */
+int ovl_nlink_start(struct dentry *dentry, bool *locked)
+{
+ struct ovl_inode *oi = OVL_I(d_inode(dentry));
+ const struct cred *old_cred;
+ int err;
+
+ if (!d_inode(dentry) || d_is_dir(dentry))
+ return 0;
+
+ /*
+ * With inodes index is enabled, we store the union overlay nlink
+ * in an xattr on the index inode. When whiting out lower hardlinks
+ * we need to decrement the overlay persistent nlink, but before the
+ * first copy up, we have no upper index inode to store the xattr.
+ *
+ * As a workaround, before whiteout/rename over of a lower hardlink,
+ * copy up to create the upper index. Creating the upper index will
+ * initialize the overlay nlink, so it could be dropped if unlink
+ * or rename succeeds.
+ *
+ * TODO: implement metadata only index copy up when called with
+ * ovl_copy_up_flags(dentry, O_PATH).
+ */
+ if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) &&
+ d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) {
+ err = ovl_copy_up(dentry);
+ if (err)
+ return err;
+ }
+
+ err = mutex_lock_interruptible(&oi->lock);
+ if (err)
+ return err;
+
+ if (!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ goto out;
+
+ old_cred = ovl_override_creds(dentry->d_sb);
+ /*
+ * The overlay inode nlink should be incremented/decremented IFF the
+ * upper operation succeeds, along with nlink change of upper inode.
+ * Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in an upper inode xattr.
+ */
+ err = ovl_set_nlink_upper(dentry);
+ revert_creds(old_cred);
+
+out:
+ if (err)
+ mutex_unlock(&oi->lock);
+ else
+ *locked = true;
+
+ return err;
+}
+
+void ovl_nlink_end(struct dentry *dentry, bool locked)
+{
+ if (locked) {
+ if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
+ d_inode(dentry)->i_nlink == 0) {
+ const struct cred *old_cred;
+
+ old_cred = ovl_override_creds(dentry->d_sb);
+ ovl_cleanup_index(dentry);
+ revert_creds(old_cred);
+ }
+
+ mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+ }
+}