diff options
Diffstat (limited to 'fs/overlayfs')
| -rw-r--r-- | fs/overlayfs/Kconfig | 21 | ||||
| -rw-r--r-- | fs/overlayfs/copy_up.c | 458 | ||||
| -rw-r--r-- | fs/overlayfs/dir.c | 164 | ||||
| -rw-r--r-- | fs/overlayfs/inode.c | 362 | ||||
| -rw-r--r-- | fs/overlayfs/namei.c | 459 | ||||
| -rw-r--r-- | fs/overlayfs/overlayfs.h | 122 | ||||
| -rw-r--r-- | fs/overlayfs/ovl_entry.h | 37 | ||||
| -rw-r--r-- | fs/overlayfs/readdir.c | 53 | ||||
| -rw-r--r-- | fs/overlayfs/super.c | 344 | ||||
| -rw-r--r-- | fs/overlayfs/util.c | 414 |
10 files changed, 2074 insertions, 360 deletions
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 0daac5112f7a..cbfc196e5dc5 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,5 +1,6 @@ config OVERLAY_FS tristate "Overlay filesystem support" + select EXPORTFS help An overlay filesystem combines two filesystems - an 'upper' filesystem and a 'lower' filesystem. When a name exists in both filesystems, the @@ -22,3 +23,23 @@ config OVERLAY_FS_REDIRECT_DIR Note, that redirects are not backward compatible. That is, mounting an overlay which has redirects on a kernel that doesn't support this feature will have unexpected results. + +config OVERLAY_FS_INDEX + bool "Overlayfs: turn on inodes index feature by default" + depends on OVERLAY_FS + help + If this config option is enabled then overlay filesystems will use + the inodes index dir to map lower inodes to upper inodes by default. + In this case it is still possible to turn off index globally with the + "index=off" module option or on a filesystem instance basis with the + "index=off" mount option. + + The inodes index feature prevents breaking of lower hardlinks on copy + up. + + Note, that the inodes index feature is read-only backward compatible. + That is, mounting an overlay which has an index dir on a kernel that + doesn't support this feature read-only, will not have any negative + outcomes. However, mounting the same overlay with an old kernel + read-write and then mounting it again with a new kernel, will have + unexpected results. diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index f57043dace62..acb6f97deb97 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -15,11 +15,14 @@ #include <linux/xattr.h> #include <linux/security.h> #include <linux/uaccess.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/cred.h> #include <linux/namei.h> #include <linux/fdtable.h> #include <linux/ratelimit.h> +#include <linux/exportfs.h> #include "overlayfs.h" +#include "ovl_entry.h" #define OVL_COPY_UP_CHUNK_SIZE (1 << 20) @@ -230,91 +233,290 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } -static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, - struct dentry *dentry, struct path *lowerpath, - struct kstat *stat, const char *link) +struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper) { - struct inode *wdir = workdir->d_inode; - struct inode *udir = upperdir->d_inode; - struct dentry *newdentry = NULL; - struct dentry *upper = NULL; + struct ovl_fh *fh; + int fh_type, fh_len, dwords; + void *buf; + int buflen = MAX_HANDLE_SZ; + uuid_t *uuid = &lower->d_sb->s_uuid; + + buf = kmalloc(buflen, GFP_TEMPORARY); + if (!buf) + return ERR_PTR(-ENOMEM); + + /* + * We encode a non-connectable file handle for non-dir, because we + * only need to find the lower inode number and we don't want to pay + * the price or reconnecting the dentry. + */ + dwords = buflen >> 2; + fh_type = exportfs_encode_fh(lower, buf, &dwords, 0); + buflen = (dwords << 2); + + fh = ERR_PTR(-EIO); + if (WARN_ON(fh_type < 0) || + WARN_ON(buflen > MAX_HANDLE_SZ) || + WARN_ON(fh_type == FILEID_INVALID)) + goto out; + + BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255); + fh_len = offsetof(struct ovl_fh, fid) + buflen; + fh = kmalloc(fh_len, GFP_KERNEL); + if (!fh) { + fh = ERR_PTR(-ENOMEM); + goto out; + } + + fh->version = OVL_FH_VERSION; + fh->magic = OVL_FH_MAGIC; + fh->type = fh_type; + fh->flags = OVL_FH_FLAG_CPU_ENDIAN; + /* + * When we will want to decode an overlay dentry from this handle + * and all layers are on the same fs, if we get a disconncted real + * dentry when we decode fid, the only way to tell if we should assign + * it to upperdentry or to lowerstack is by checking this flag. + */ + if (is_upper) + fh->flags |= OVL_FH_FLAG_PATH_UPPER; + fh->len = fh_len; + fh->uuid = *uuid; + memcpy(fh->fid, buf, buflen); + +out: + kfree(buf); + return fh; +} + +static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, + struct dentry *upper) +{ + const struct ovl_fh *fh = NULL; int err; + + /* + * When lower layer doesn't support export operations store a 'null' fh, + * so we can use the overlay.origin xattr to distignuish between a copy + * up and a pure upper inode. + */ + if (ovl_can_decode_fh(lower->d_sb)) { + fh = ovl_encode_fh(lower, false); + if (IS_ERR(fh)) + return PTR_ERR(fh); + } + + /* + * Do not fail when upper doesn't support xattrs. + */ + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh, + fh ? fh->len : 0, 0); + kfree(fh); + + return err; +} + +struct ovl_copy_up_ctx { + struct dentry *parent; + struct dentry *dentry; + struct path lowerpath; + struct kstat stat; + struct kstat pstat; + const char *link; + struct dentry *destdir; + struct qstr destname; + struct dentry *workdir; + bool tmpfile; + bool origin; +}; + +static int ovl_link_up(struct ovl_copy_up_ctx *c) +{ + int err; + struct dentry *upper; + struct dentry *upperdir = ovl_dentry_upper(c->parent); + struct inode *udir = d_inode(upperdir); + + /* Mark parent "impure" because it may now contain non-pure upper */ + err = ovl_set_impure(c->parent, upperdir); + if (err) + return err; + + err = ovl_set_nlink_lower(c->dentry); + if (err) + return err; + + inode_lock_nested(udir, I_MUTEX_PARENT); + upper = lookup_one_len(c->dentry->d_name.name, upperdir, + c->dentry->d_name.len); + err = PTR_ERR(upper); + if (!IS_ERR(upper)) { + err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper, + true); + dput(upper); + + if (!err) { + /* Restore timestamps on parent (best effort) */ + ovl_set_timestamps(upperdir, &c->pstat); + ovl_dentry_set_upper_alias(c->dentry); + } + } + inode_unlock(udir); + ovl_set_nlink_upper(c->dentry); + + return err; +} + +static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp, + struct dentry **newdentry) +{ + int err; + struct dentry *upper; + struct inode *udir = d_inode(c->destdir); + + upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len); + if (IS_ERR(upper)) + return PTR_ERR(upper); + + if (c->tmpfile) + err = ovl_do_link(temp, udir, upper, true); + else + err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0); + + if (!err) + *newdentry = dget(c->tmpfile ? upper : temp); + dput(upper); + + return err; +} + +static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp) +{ + int err; + struct dentry *temp; const struct cred *old_creds = NULL; struct cred *new_creds = NULL; struct cattr cattr = { /* Can't properly set mode on creation because of the umask */ - .mode = stat->mode & S_IFMT, - .rdev = stat->rdev, - .link = link + .mode = c->stat.mode & S_IFMT, + .rdev = c->stat.rdev, + .link = c->link }; - newdentry = ovl_lookup_temp(workdir, dentry); - err = PTR_ERR(newdentry); - if (IS_ERR(newdentry)) - goto out; - - upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); - err = PTR_ERR(upper); - if (IS_ERR(upper)) - goto out1; - - err = security_inode_copy_up(dentry, &new_creds); + err = security_inode_copy_up(c->dentry, &new_creds); if (err < 0) - goto out2; + goto out; if (new_creds) old_creds = override_creds(new_creds); - err = ovl_create_real(wdir, newdentry, &cattr, NULL, true); - + if (c->tmpfile) { + temp = ovl_do_tmpfile(c->workdir, c->stat.mode); + if (IS_ERR(temp)) + goto temp_err; + } else { + temp = ovl_lookup_temp(c->workdir); + if (IS_ERR(temp)) + goto temp_err; + + err = ovl_create_real(d_inode(c->workdir), temp, &cattr, + NULL, true); + if (err) { + dput(temp); + goto out; + } + } + err = 0; + *tempp = temp; +out: if (new_creds) { revert_creds(old_creds); put_cred(new_creds); } - if (err) - goto out2; + return err; + +temp_err: + err = PTR_ERR(temp); + goto out; +} + +static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) +{ + int err; - if (S_ISREG(stat->mode)) { + if (S_ISREG(c->stat.mode)) { struct path upperpath; - ovl_path_upper(dentry, &upperpath); + ovl_path_upper(c->dentry, &upperpath); BUG_ON(upperpath.dentry != NULL); - upperpath.dentry = newdentry; + upperpath.dentry = temp; - err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); + err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size); if (err) - goto out_cleanup; + return err; } - err = ovl_copy_xattr(lowerpath->dentry, newdentry); + err = ovl_copy_xattr(c->lowerpath.dentry, temp); if (err) - goto out_cleanup; + return err; - inode_lock(newdentry->d_inode); - err = ovl_set_attr(newdentry, stat); - inode_unlock(newdentry->d_inode); + inode_lock(temp->d_inode); + err = ovl_set_attr(temp, &c->stat); + inode_unlock(temp->d_inode); + if (err) + return err; + + /* + * Store identifier of lower inode in upper inode xattr to + * allow lookup of the copy up origin inode. + * + * Don't set origin when we are breaking the association with a lower + * hard link. + */ + if (c->origin) { + err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp); + if (err) + return err; + } + + return 0; +} + +static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c) +{ + struct inode *udir = c->destdir->d_inode; + struct dentry *newdentry = NULL; + struct dentry *temp = NULL; + int err; + + err = ovl_get_tmpfile(c, &temp); + if (err) + goto out; + + err = ovl_copy_up_inode(c, temp); if (err) goto out_cleanup; - err = ovl_do_rename(wdir, newdentry, udir, upper, 0); + if (c->tmpfile) { + inode_lock_nested(udir, I_MUTEX_PARENT); + err = ovl_install_temp(c, temp, &newdentry); + inode_unlock(udir); + } else { + err = ovl_install_temp(c, temp, &newdentry); + } if (err) goto out_cleanup; - ovl_dentry_update(dentry, newdentry); - ovl_inode_update(d_inode(dentry), d_inode(newdentry)); - newdentry = NULL; -out2: - dput(upper); -out1: - dput(newdentry); + ovl_inode_update(d_inode(c->dentry), newdentry); out: + dput(temp); return err; out_cleanup: - ovl_cleanup(wdir, newdentry); - goto out2; + if (!c->tmpfile) + ovl_cleanup(d_inode(c->workdir), temp); + goto out; } /* @@ -326,55 +528,119 @@ out_cleanup: * is possible that the copy up will lock the old parent. At that point * the file will have already been copied up anyway. */ +static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) +{ + int err; + struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info; + bool indexed = false; + + if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) && + c->stat.nlink > 1) + indexed = true; + + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed) + c->origin = true; + + if (indexed) { + c->destdir = ovl_indexdir(c->dentry->d_sb); + err = ovl_get_index_name(c->lowerpath.dentry, &c->destname); + if (err) + return err; + } else { + /* + * Mark parent "impure" because it may now contain non-pure + * upper + */ + err = ovl_set_impure(c->parent, c->destdir); + if (err) + return err; + } + + /* Should we copyup with O_TMPFILE or with workdir? */ + if (S_ISREG(c->stat.mode) && ofs->tmpfile) { + c->tmpfile = true; + err = ovl_copy_up_locked(c); + } else { + err = -EIO; + if (lock_rename(c->workdir, c->destdir) != NULL) { + pr_err("overlayfs: failed to lock workdir+upperdir\n"); + } else { + err = ovl_copy_up_locked(c); + unlock_rename(c->workdir, c->destdir); + } + } + + if (indexed) { + if (!err) + ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + kfree(c->destname.name); + } else if (!err) { + struct inode *udir = d_inode(c->destdir); + + /* Restore timestamps on parent (best effort) */ + inode_lock(udir); + ovl_set_timestamps(c->destdir, &c->pstat); + inode_unlock(udir); + + ovl_dentry_set_upper_alias(c->dentry); + } + + return err; +} + static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, - struct path *lowerpath, struct kstat *stat) + int flags) { - DEFINE_DELAYED_CALL(done); - struct dentry *workdir = ovl_workdir(dentry); int err; - struct kstat pstat; + DEFINE_DELAYED_CALL(done); struct path parentpath; - struct dentry *lowerdentry = lowerpath->dentry; - struct dentry *upperdir; - const char *link = NULL; + struct ovl_copy_up_ctx ctx = { + .parent = parent, + .dentry = dentry, + .workdir = ovl_workdir(dentry), + }; - if (WARN_ON(!workdir)) + if (WARN_ON(!ctx.workdir)) return -EROFS; - ovl_do_check_copy_up(lowerdentry); + ovl_path_lower(dentry, &ctx.lowerpath); + err = vfs_getattr(&ctx.lowerpath, &ctx.stat, + STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); + if (err) + return err; ovl_path_upper(parent, &parentpath); - upperdir = parentpath.dentry; + ctx.destdir = parentpath.dentry; + ctx.destname = dentry->d_name; - err = vfs_getattr(&parentpath, &pstat); + err = vfs_getattr(&parentpath, &ctx.pstat, + STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); if (err) return err; - if (S_ISLNK(stat->mode)) { - link = vfs_get_link(lowerdentry, &done); - if (IS_ERR(link)) - return PTR_ERR(link); - } + /* maybe truncate regular file. this has no effect on dirs */ + if (flags & O_TRUNC) + ctx.stat.size = 0; - err = -EIO; - if (lock_rename(workdir, upperdir) != NULL) { - pr_err("overlayfs: failed to lock workdir+upperdir\n"); - goto out_unlock; + if (S_ISLNK(ctx.stat.mode)) { + ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done); + if (IS_ERR(ctx.link)) + return PTR_ERR(ctx.link); } - if (ovl_dentry_upper(dentry)) { - /* Raced with another copy-up? Nothing to do, then... */ - err = 0; - goto out_unlock; + ovl_do_check_copy_up(ctx.lowerpath.dentry); + + err = ovl_copy_up_start(dentry); + /* err < 0: interrupted, err > 0: raced with another copy-up */ + if (unlikely(err)) { + if (err > 0) + err = 0; + } else { + if (!ovl_dentry_upper(dentry)) + err = ovl_do_copy_up(&ctx); + if (!err && !ovl_dentry_has_upper_alias(dentry)) + err = ovl_link_up(&ctx); + ovl_copy_up_end(dentry); } - - err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, - stat, link); - if (!err) { - /* Restore timestamps on parent (best effort) */ - ovl_set_timestamps(upperdir, &pstat); - } -out_unlock: - unlock_rename(workdir, upperdir); do_delayed_call(&done); return err; @@ -388,11 +654,22 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) while (!err) { struct dentry *next; struct dentry *parent; - struct path lowerpath; - struct kstat stat; - enum ovl_path_type type = ovl_path_type(dentry); - if (OVL_TYPE_UPPER(type)) + /* + * Check if copy-up has happened as well as for upper alias (in + * case of hard links) is there. + * + * Both checks are lockless: + * - false negatives: will recheck under oi->lock + * - false positives: + * + ovl_dentry_upper() uses memory barriers to ensure the + * upper dentry is up-to-date + * + ovl_dentry_has_upper_alias() relies on locking of + * upper parent i_rwsem to prevent reordering copy-up + * with rename. + */ + if (ovl_dentry_upper(dentry) && + ovl_dentry_has_upper_alias(dentry)) break; next = dget(dentry); @@ -400,21 +677,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) for (;;) { parent = dget_parent(next); - type = ovl_path_type(parent); - if (OVL_TYPE_UPPER(type)) + if (ovl_dentry_upper(parent)) break; dput(next); next = parent; } - ovl_path_lower(next, &lowerpath); - err = vfs_getattr(&lowerpath, &stat); - /* maybe truncate regular file. this has no effect on dirs */ - if (flags & O_TRUNC) - stat.size = 0; - if (!err) - err = ovl_copy_up_one(parent, next, &lowerpath, &stat); + err = ovl_copy_up_one(parent, next, flags); dput(parent); dput(next); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 16e06dd89457..48b70e6490f3 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -24,7 +24,7 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); MODULE_PARM_DESC(ovl_redirect_max, "Maximum length of absolute redirect xattr value"); -void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) +int ovl_cleanup(struct inode *wdir, struct dentry *wdentry) { int err; @@ -39,9 +39,11 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", wdentry, err); } + + return err; } -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) +struct dentry *ovl_lookup_temp(struct dentry *workdir) { struct dentry *temp; char name[20]; @@ -68,7 +70,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, struct dentry *whiteout; struct inode *wdir = workdir->d_inode; - whiteout = ovl_lookup_temp(workdir, dentry); + whiteout = ovl_lookup_temp(workdir); if (IS_ERR(whiteout)) return whiteout; @@ -127,44 +129,26 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, return err; } -static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) +static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, + int xerr) { int err; - err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); return err; } -static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) { - int err; - enum ovl_path_type type; - struct path realpath; - const struct cred *old_cred; - - type = ovl_path_real(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat); - revert_creds(old_cred); - if (err) - return err; - - stat->dev = dentry->d_sb->s_dev; - stat->ino = dentry->d_inode->i_ino; - /* - * It's probably not worth it to count subdirs to get the - * correct link count. nlink=1 seems to pacify 'find' and - * other utilities. + * Fail with -EIO when trying to create opaque dir and upper doesn't + * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to + * return a specific error for noxattr case. */ - if (OVL_TYPE_MERGE(type)) - stat->nlink = 1; - - return 0; + return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); } /* Common operations required to be done after creation of file on upper */ @@ -172,15 +156,19 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) { ovl_dentry_version_inc(dentry->d_parent); - ovl_dentry_update(dentry, newdentry); + ovl_dentry_set_upper_alias(dentry); if (!hardlink) { - ovl_inode_update(inode, d_inode(newdentry)); + ovl_inode_update(inode, newdentry); ovl_copyattr(newdentry->d_inode, inode); } else { - WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry)); + WARN_ON(ovl_inode_real(inode) != d_inode(newdentry)); + dput(newdentry); inc_nlink(inode); } d_instantiate(dentry, inode); + /* Force lookup of new upper hardlink to find its lower */ + if (hardlink) + d_drop(dentry); } static bool ovl_type_merge(struct dentry *dentry) @@ -188,6 +176,11 @@ static bool ovl_type_merge(struct dentry *dentry) return OVL_TYPE_MERGE(ovl_path_type(dentry)); } +static bool ovl_type_origin(struct dentry *dentry) +{ + return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); +} + static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -209,7 +202,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (err) goto out_dput; - if (ovl_type_merge(dentry->d_parent)) { + if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) { /* Setting opaque here is just an optimization, allow to fail */ ovl_set_opaque(dentry, newdentry); } @@ -264,7 +257,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, goto out; ovl_path_upper(dentry, &upperpath); - err = vfs_getattr(&upperpath, &stat); + err = vfs_getattr(&upperpath, &stat, + STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) goto out_unlock; @@ -275,7 +269,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (upper->d_parent->d_inode != udir) goto out_unlock; - opaquedir = ovl_lookup_temp(workdir, dentry); + opaquedir = ovl_lookup_temp(workdir); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out_unlock; @@ -407,7 +401,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out; - newdentry = ovl_lookup_temp(workdir, dentry); + newdentry = ovl_lookup_temp(workdir); err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_unlock; @@ -487,17 +481,30 @@ out_cleanup: } static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, - struct cattr *attr, struct dentry *hardlink) + struct cattr *attr, struct dentry *hardlink, + bool origin) { int err; const struct cred *old_cred; struct cred *override_cred; + struct dentry *parent = dentry->d_parent; - err = ovl_copy_up(dentry->d_parent); + err = ovl_copy_up(parent); if (err) return err; old_cred = ovl_override_creds(dentry->d_sb); + + /* + * When linking a file with copy up origin into a new parent, mark the + * new parent dir "impure". + */ + if (origin) { + err = ovl_set_impure(parent, ovl_dentry_upper(parent)); + if (err) + goto out_revert_creds; + } + err = -ENOMEM; override_cred = prepare_creds(); if (override_cred) { @@ -556,7 +563,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, inode_init_owner(inode, dentry->d_parent->d_inode, mode); attr.mode = inode->i_mode; - err = ovl_create_or_link(dentry, inode, &attr, NULL); + err = ovl_create_or_link(dentry, inode, &attr, NULL, false); if (err) iput(inode); @@ -597,6 +604,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir, struct dentry *new) { int err; + bool locked = false; struct inode *inode; err = ovl_want_write(old); @@ -607,19 +615,30 @@ static int ovl_link(struct dentry *old, struct inode *newdir, if (err) goto out_drop_write; + err = ovl_nlink_start(old, &locked); + if (err) + goto out_drop_write; + inode = d_inode(old); ihold(inode); - err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old)); + err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old), + ovl_type_origin(old)); if (err) iput(inode); + ovl_nlink_end(old, locked); out_drop_write: ovl_drop_write(old); out: return err; } +static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper) +{ + return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper); +} + static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) { struct dentry *workdir = ovl_workdir(dentry); @@ -655,7 +674,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) err = -ESTALE; if ((opaquedir && upper != opaquedir) || (!opaquedir && ovl_dentry_upper(dentry) && - upper != ovl_dentry_upper(dentry))) { + !ovl_matches_upper(dentry, upper))) { goto out_dput_upper; } @@ -716,7 +735,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir) err = -ESTALE; if ((opaquedir && upper != opaquedir) || - (!opaquedir && upper != ovl_dentry_upper(dentry))) + (!opaquedir && !ovl_matches_upper(dentry, upper))) goto out_dput_upper; if (is_dir) @@ -744,8 +763,8 @@ out: static int ovl_do_remove(struct dentry *dentry, bool is_dir) { - enum ovl_path_type type; int err; + bool locked = false; const struct cred *old_cred; err = ovl_want_write(dentry); @@ -756,7 +775,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (err) goto out_drop_write; - type = ovl_path_type(dentry); + err = ovl_nlink_start(dentry, &locked); + if (err) + goto out_drop_write; old_cred = ovl_override_creds(dentry->d_sb); if (!ovl_lower_positive(dentry)) @@ -770,6 +791,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) else drop_nlink(dentry->d_inode); } + ovl_nlink_end(dentry, locked); out_drop_write: ovl_drop_write(dentry); out: @@ -871,18 +893,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, - redirect, strlen(redirect), 0); + err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + OVL_XATTR_REDIRECT, + redirect, strlen(redirect), -EXDEV); if (!err) { spin_lock(&dentry->d_lock); ovl_dentry_set_redirect(dentry, redirect); spin_unlock(&dentry->d_lock); } else { kfree(redirect); - if (err == -EOPNOTSUPP) - ovl_clear_redirect_dir(dentry->d_sb); - else - pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); + pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); /* Fall back to userspace copy-up */ err = -EXDEV; } @@ -894,6 +914,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, unsigned int flags) { int err; + bool locked = false; struct dentry *old_upperdir; struct dentry *new_upperdir; struct dentry *olddentry; @@ -937,6 +958,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, err = ovl_copy_up(new); if (err) goto out_drop_write; + } else { + err = ovl_nlink_start(new, &locked); + if (err) + goto out_drop_write; } old_cred = ovl_override_creds(old->d_sb); @@ -968,6 +993,25 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_upperdir = ovl_dentry_upper(old->d_parent); new_upperdir = ovl_dentry_upper(new->d_parent); + if (!samedir) { + /* + * When moving a merge dir or non-dir with copy up origin into + * a new parent, we are marking the new parent dir "impure". + * When ovl_iterate() iterates an "impure" upper dir, it will + * lookup the origin inodes of the entries to fill d_ino. + */ + if (ovl_type_origin(old)) { + err = ovl_set_impure(new->d_parent, new_upperdir); + if (err) + goto out_revert_creds; + } + if (!overwrite && ovl_type_origin(new)) { + err = ovl_set_impure(old->d_parent, old_upperdir); + if (err) + goto out_revert_creds; + } + } + trap = lock_rename(new_upperdir, old_upperdir); olddentry = lookup_one_len(old->d_name.name, old_upperdir, @@ -977,7 +1021,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, goto out_unlock; err = -ESTALE; - if (olddentry != ovl_dentry_upper(old)) + if (!ovl_matches_upper(old, olddentry)) goto out_dput_old; newdentry = lookup_one_len(new->d_name.name, new_upperdir, @@ -990,12 +1034,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, new_opaque = ovl_dentry_is_opaque(new); err = -ESTALE; - if (ovl_dentry_upper(new)) { + if (d_inode(new) && ovl_dentry_upper(new)) { if (opaquedir) { if (newdentry != opaquedir) goto out_dput; } else { - if (newdentry != ovl_dentry_upper(new)) + if (!ovl_matches_upper(new, newdentry)) goto out_dput; } } else { @@ -1017,7 +1061,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(old)) err = ovl_set_redirect(old, samedir); else if (!old_opaque && ovl_type_merge(new->d_parent)) - err = ovl_set_opaque(old, olddentry); + err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); if (err) goto out_dput; } @@ -1025,7 +1069,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(new)) err = ovl_set_redirect(new, samedir); else if (!new_opaque && ovl_type_merge(old->d_parent)) - err = ovl_set_opaque(new, newdentry); + err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); if (err) goto out_dput; } @@ -1038,6 +1082,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (cleanup_whiteout) ovl_cleanup(old_upperdir->d_inode, newdentry); + if (overwrite && d_inode(new)) { + if (new_is_dir) + clear_nlink(d_inode(new)); + else + drop_nlink(d_inode(new)); + } + ovl_dentry_version_inc(old->d_parent); ovl_dentry_version_inc(new->d_parent); @@ -1049,6 +1100,7 @@ out_unlock: unlock_rename(new_upperdir, old_upperdir); out_revert_creds: revert_creds(old_cred); + ovl_nlink_end(new, locked); out_drop_write: ovl_drop_write(old); out: @@ -1068,7 +1120,7 @@ const struct inode_operations ovl_dir_inode_operations = { .create = ovl_create, .mknod = ovl_mknod, .permission = ovl_permission, - .getattr = ovl_dir_getattr, + .getattr = ovl_getattr, .listxattr = ovl_listxattr, .get_acl = ovl_get_acl, .update_time = ovl_update_time, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 08643ac44a02..5bc71642b226 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -9,8 +9,10 @@ #include <linux/fs.h> #include <linux/slab.h> +#include <linux/cred.h> #include <linux/xattr.h> #include <linux/posix_acl.h> +#include <linux/ratelimit.h> #include "overlayfs.h" int ovl_setattr(struct dentry *dentry, struct iattr *attr) @@ -56,24 +58,98 @@ out: return err; } -static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; + enum ovl_path_type type; struct path realpath; const struct cred *old_cred; + bool is_dir = S_ISDIR(dentry->d_inode->i_mode); int err; - ovl_path_real(dentry, &realpath); + type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat); + err = vfs_getattr(&realpath, stat, request_mask, flags); + if (err) + goto out; + + /* + * When all layers are on the same fs, all real inode number are + * unique, so we use the overlay st_dev, which is friendly to du -x. + * + * We also use st_ino of the copy up origin, if we know it. + * This guaranties constant st_dev/st_ino across copy up. + * + * If filesystem supports NFS export ops, this also guaranties + * persistent st_ino across mount cycle. + */ + if (ovl_same_sb(dentry->d_sb)) { + if (OVL_TYPE_ORIGIN(type)) { + struct kstat lowerstat; + u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); + + ovl_path_lower(dentry, &realpath); + err = vfs_getattr(&realpath, &lowerstat, + lowermask, flags); + if (err) + goto out; + + WARN_ON_ONCE(stat->dev != lowerstat.dev); + /* + * Lower hardlinks may be broken on copy up to different + * upper files, so we cannot use the lower origin st_ino + * for those different files, even for the same fs case. + * With inodes index enabled, it is safe to use st_ino + * of an indexed hardlinked origin. The index validates + * that the upper hardlink is not broken. + */ + if (is_dir || lowerstat.nlink == 1 || + ovl_test_flag(OVL_INDEX, d_inode(dentry))) + stat->ino = lowerstat.ino; + } + stat->dev = dentry->d_sb->s_dev; + } else if (is_dir) { + /* + * If not all layers are on the same fs the pair {real st_ino; + * overlay st_dev} is not unique, so use the non persistent + * overlay st_ino. + * + * Always use the overlay st_dev for directories, so 'find + * -xdev' will scan the entire overlay mount and won't cross the + * overlay mount boundaries. + */ + stat->dev = dentry->d_sb->s_dev; + stat->ino = dentry->d_inode->i_ino; + } + + /* + * It's probably not worth it to count subdirs to get the + * correct link count. nlink=1 seems to pacify 'find' and + * other utilities. + */ + if (is_dir && OVL_TYPE_MERGE(type)) + stat->nlink = 1; + + /* + * Return the overlay inode nlinks for indexed upper inodes. + * Overlay inode nlink counts the union of the upper hardlinks + * and non-covered lower hardlinks. It does not include the upper + * index hardlink. + */ + if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) + stat->nlink = dentry->d_inode->i_nlink; + +out: revert_creds(old_cred); + return err; } int ovl_permission(struct inode *inode, int mask) { - bool is_upper; - struct inode *realinode = ovl_inode_real(inode, &is_upper); + struct inode *upperinode = ovl_inode_upper(inode); + struct inode *realinode = upperinode ?: ovl_inode_lower(inode); const struct cred *old_cred; int err; @@ -92,7 +168,8 @@ int ovl_permission(struct inode *inode, int mask) return err; old_cred = ovl_override_creds(inode->i_sb); - if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) { + if (!upperinode && + !special_file(realinode->i_mode) && mask & MAY_WRITE) { mask &= ~(MAY_WRITE | MAY_APPEND); /* Make sure mounter can read file for copy up later */ mask |= MAY_READ; @@ -125,37 +202,38 @@ bool ovl_is_private_xattr(const char *name) sizeof(OVL_XATTR_PREFIX) - 1) == 0; } -int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) +int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags) { int err; - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); + struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); const struct cred *old_cred; err = ovl_want_write(dentry); if (err) goto out; - if (!value && !OVL_TYPE_UPPER(type)) { - err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (!value && !upperdentry) { + err = vfs_getxattr(realdentry, name, NULL, 0); if (err < 0) goto out_drop_write; } - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; + if (!upperdentry) { + err = ovl_copy_up(dentry); + if (err) + goto out_drop_write; - if (!OVL_TYPE_UPPER(type)) - ovl_path_upper(dentry, &realpath); + realdentry = ovl_dentry_upper(dentry); + } old_cred = ovl_override_creds(dentry->d_sb); if (value) - err = vfs_setxattr(realpath.dentry, name, value, size, flags); + err = vfs_setxattr(realdentry, name, value, size, flags); else { WARN_ON(flags != XATTR_REPLACE); - err = vfs_removexattr(realpath.dentry, name); + err = vfs_removexattr(realdentry, name); } revert_creds(old_cred); @@ -165,12 +243,13 @@ out: return err; } -int ovl_xattr_get(struct dentry *dentry, const char *name, +int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { - struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; const struct cred *old_cred; + struct dentry *realdentry = + ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); old_cred = ovl_override_creds(dentry->d_sb); res = vfs_getxattr(realdentry, name, value, size); @@ -178,6 +257,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name, return res; } +static bool ovl_can_list(const char *s) +{ + /* List all non-trusted xatts */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ + return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); +} + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); @@ -201,7 +290,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (ovl_is_private_xattr(s)) { + if (!ovl_can_list(s)) { res -= slen; memmove(s, s + slen, len); } else { @@ -214,7 +303,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) struct posix_acl *ovl_get_acl(struct inode *inode, int type) { - struct inode *realinode = ovl_inode_real(inode, NULL); + struct inode *realinode = ovl_inode_real(inode); const struct cred *old_cred; struct posix_acl *acl; @@ -228,13 +317,13 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) return acl; } -static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, - struct dentry *realdentry) +static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) { - if (OVL_TYPE_UPPER(type)) + if (ovl_dentry_upper(dentry) && + ovl_dentry_has_upper_alias(dentry)) return false; - if (special_file(realdentry->d_inode->i_mode)) + if (special_file(d_inode(dentry)->i_mode)) return false; if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) @@ -246,11 +335,8 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) { int err = 0; - struct path realpath; - enum ovl_path_type type; - type = ovl_path_real(dentry, &realpath); - if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { + if (ovl_open_need_copy_up(dentry, file_flags)) { err = ovl_want_write(dentry); if (!err) { err = ovl_copy_up_flags(dentry, file_flags); @@ -301,6 +387,41 @@ static const struct inode_operations ovl_symlink_inode_operations = { .update_time = ovl_update_time, }; +/* + * It is possible to stack overlayfs instance on top of another + * overlayfs instance as lower layer. We need to annonate the + * stackable i_mutex locks according to stack level of the super + * block instance. An overlayfs instance can never be in stack + * depth 0 (there is always a real fs below it). An overlayfs + * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. + * + * For example, here is a snip from /proc/lockdep_chains after + * dir_iterate of nested overlayfs: + * + * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) + * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) + * [...] &type->i_mutex_dir_key (stack_depth=0) + */ +#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH + +static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) +{ +#ifdef CONFIG_LOCKDEP + static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; + static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; + + int depth = inode->i_sb->s_stack_depth - 1; + + if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) + depth = 0; + + if (S_ISDIR(inode->i_mode)) + lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); + else + lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); +#endif +} + static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_ino = get_next_ino(); @@ -310,6 +431,8 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; #endif + ovl_lockdep_annotate_inode_mutex_key(inode); + switch (mode & S_IFMT) { case S_IFREG: inode->i_op = &ovl_file_inode_operations; @@ -331,6 +454,103 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) } } +/* + * With inodes index enabled, an overlay inode nlink counts the union of upper + * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure + * upper inode, the following nlink modifying operations can happen: + * + * 1. Lower hardlink copy up + * 2. Upper hardlink created, unlinked or renamed over + * 3. Lower hardlink whiteout or renamed over + * + * For the first, copy up case, the union nlink does not change, whether the + * operation succeeds or fails, but the upper inode nlink may change. + * Therefore, before copy up, we store the union nlink value relative to the + * lower inode nlink in the index inode xattr trusted.overlay.nlink. + * + * For the second, upper hardlink case, the union nlink should be incremented + * or decremented IFF the operation succeeds, aligned with nlink change of the + * upper inode. Therefore, before link/unlink/rename, we store the union nlink + * value relative to the upper inode nlink in the index inode. + * + * For the last, lower cover up case, we simplify things by preceding the + * whiteout or cover up with copy up. This makes sure that there is an index + * upper inode where the nlink xattr can be stored before the copied up upper + * entry is unlink. + */ +#define OVL_NLINK_ADD_UPPER (1 << 0) + +/* + * On-disk format for indexed nlink: + * + * nlink relative to the upper inode - "U[+-]NUM" + * nlink relative to the lower inode - "L[+-]NUM" + */ + +static int ovl_set_nlink_common(struct dentry *dentry, + struct dentry *realdentry, const char *format) +{ + struct inode *inode = d_inode(dentry); + struct inode *realinode = d_inode(realdentry); + char buf[13]; + int len; + + len = snprintf(buf, sizeof(buf), format, + (int) (inode->i_nlink - realinode->i_nlink)); + + return ovl_do_setxattr(ovl_dentry_upper(dentry), + OVL_XATTR_NLINK, buf, len, 0); +} + +int ovl_set_nlink_upper(struct dentry *dentry) +{ + return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); +} + +int ovl_set_nlink_lower(struct dentry *dentry) +{ + return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); +} + +unsigned int ovl_get_nlink(struct dentry *lowerdentry, + struct dentry *upperdentry, + unsigned int fallback) +{ + int nlink_diff; + int nlink; + char buf[13]; + int err; + + if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) + return fallback; + + err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); + if (err < 0) + goto fail; + + buf[err] = '\0'; + if ((buf[0] != 'L' && buf[0] != 'U') || + (buf[1] != '+' && buf[1] != '-')) + goto fail; + + err = kstrtoint(buf + 1, 10, &nlink_diff); + if (err < 0) + goto fail; + + nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; + nlink += nlink_diff; + + if (nlink <= 0) + goto fail; + + return nlink; + +fail: + pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", + upperdentry, err); + return fallback; +} + struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) { struct inode *inode; @@ -344,27 +564,87 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) static int ovl_inode_test(struct inode *inode, void *data) { - return ovl_inode_real(inode, NULL) == data; + return inode->i_private == data; } static int ovl_inode_set(struct inode *inode, void *data) { - inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK); + inode->i_private = data; return 0; } -struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode) +static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, + struct dentry *upperdentry) +{ + struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL; + + /* Lower (origin) inode must match, even if NULL */ + if (ovl_inode_lower(inode) != lowerinode) + return false; + + /* + * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. + * This happens when finding a lower alias for a copied up hard link. + */ + if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) + return false; + + return true; +} +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry) { + struct dentry *lowerdentry = ovl_dentry_lower(dentry); + struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; - inode = iget5_locked(sb, (unsigned long) realinode, - ovl_inode_test, ovl_inode_set, realinode); - if (inode && inode->i_state & I_NEW) { - ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); - set_nlink(inode, realinode->i_nlink); - unlock_new_inode(inode); + if (!realinode) + realinode = d_inode(lowerdentry); + + if (!S_ISDIR(realinode->i_mode) && + (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) { + struct inode *key = d_inode(lowerdentry ?: upperdentry); + unsigned int nlink; + + inode = iget5_locked(dentry->d_sb, (unsigned long) key, + ovl_inode_test, ovl_inode_set, key); + if (!inode) + goto out_nomem; + if (!(inode->i_state & I_NEW)) { + /* + * Verify that the underlying files stored in the inode + * match those in the dentry. + */ + if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) { + iput(inode); + inode = ERR_PTR(-ESTALE); + goto out; + } + + dput(upperdentry); + goto out; + } + + nlink = ovl_get_nlink(lowerdentry, upperdentry, + realinode->i_nlink); + set_nlink(inode, nlink); + } else { + inode = new_inode(dentry->d_sb); + if (!inode) + goto out_nomem; } + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); + ovl_inode_init(inode, upperdentry, lowerdentry); + + if (upperdentry && ovl_is_impuredir(upperdentry)) + ovl_set_flag(OVL_IMPURE, inode); + if (inode->i_state & I_NEW) + unlock_new_inode(inode); +out: return inode; + +out_nomem: + inode = ERR_PTR(-ENOMEM); + goto out; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 023bb0b03352..8aef2b304b2d 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -8,9 +8,12 @@ */ #include <linux/fs.h> +#include <linux/cred.h> #include <linux/namei.h> #include <linux/xattr.h> #include <linux/ratelimit.h> +#include <linux/mount.h> +#include <linux/exportfs.h> #include "overlayfs.h" #include "ovl_entry.h" @@ -80,19 +83,109 @@ invalid: goto err_free; } -static bool ovl_is_opaquedir(struct dentry *dentry) +static int ovl_acceptable(void *ctx, struct dentry *dentry) +{ + return 1; +} + +static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) { int res; - char val; + struct ovl_fh *fh = NULL; + + res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + if (res < 0) { + if (res == -ENODATA || res == -EOPNOTSUPP) + return NULL; + goto fail; + } + /* Zero size value means "copied up but origin unknown" */ + if (res == 0) + return NULL; - if (!d_is_dir(dentry)) - return false; + fh = kzalloc(res, GFP_TEMPORARY); + if (!fh) + return ERR_PTR(-ENOMEM); - res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); - if (res == 1 && val == 'y') - return true; + res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + if (res < 0) + goto fail; + + if (res < sizeof(struct ovl_fh) || res < fh->len) + goto invalid; + + if (fh->magic != OVL_FH_MAGIC) + goto invalid; + + /* Treat larger version and unknown flags as "origin unknown" */ + if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL) + goto out; + + /* Treat endianness mismatch as "origin unknown" */ + if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) && + (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN) + goto out; + + return fh; + +out: + kfree(fh); + return NULL; - return false; +fail: + pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + goto out; +invalid: + pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + goto out; +} + +static struct dentry *ovl_get_origin(struct dentry *dentry, + struct vfsmount *mnt) +{ + struct dentry *origin = NULL; + struct ovl_fh *fh = ovl_get_origin_fh(dentry); + int bytes; + + if (IS_ERR_OR_NULL(fh)) + return (struct dentry *)fh; + + /* + * Make sure that the stored uuid matches the uuid of the lower + * layer where file handle will be decoded. + */ + if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) + goto out; + + bytes = (fh->len - offsetof(struct ovl_fh, fid)); + origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, + bytes >> 2, (int)fh->type, + ovl_acceptable, NULL); + if (IS_ERR(origin)) { + /* Treat stale file handle as "origin unknown" */ + if (origin == ERR_PTR(-ESTALE)) + origin = NULL; + goto out; + } + + if (ovl_dentry_weird(origin) || + ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) + goto invalid; + +out: + kfree(fh); + return origin; + +invalid: + pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin); + dput(origin); + origin = NULL; + goto out; +} + +static bool ovl_is_opaquedir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); } static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, @@ -191,6 +284,274 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, return 0; } + +static int ovl_check_origin(struct dentry *upperdentry, + struct path *lowerstack, unsigned int numlower, + struct path **stackp, unsigned int *ctrp) +{ + struct vfsmount *mnt; + struct dentry *origin = NULL; + int i; + + + for (i = 0; i < numlower; i++) { + mnt = lowerstack[i].mnt; + origin = ovl_get_origin(upperdentry, mnt); + if (IS_ERR(origin)) + return PTR_ERR(origin); + + if (origin) + break; + } + + if (!origin) + return 0; + + BUG_ON(*ctrp); + if (!*stackp) + *stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY); + if (!*stackp) { + dput(origin); + return -ENOMEM; + } + **stackp = (struct path) { .dentry = origin, .mnt = mnt }; + *ctrp = 1; + + return 0; +} + +/* + * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN. + * Return 0 on match, -ESTALE on mismatch, < 0 on error. + */ +static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh) +{ + struct ovl_fh *ofh = ovl_get_origin_fh(dentry); + int err = 0; + + if (!ofh) + return -ENODATA; + + if (IS_ERR(ofh)) + return PTR_ERR(ofh); + + if (fh->len != ofh->len || memcmp(fh, ofh, fh->len)) + err = -ESTALE; + + kfree(ofh); + return err; +} + +/* + * Verify that an inode matches the origin file handle stored in upper inode. + * + * If @set is true and there is no stored file handle, encode and store origin + * file handle in OVL_XATTR_ORIGIN. + * + * Return 0 on match, -ESTALE on mismatch, < 0 on error. + */ +int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, + struct dentry *origin, bool is_upper, bool set) +{ + struct inode *inode; + struct ovl_fh *fh; + int err; + + fh = ovl_encode_fh(origin, is_upper); + err = PTR_ERR(fh); + if (IS_ERR(fh)) + goto fail; + + err = ovl_verify_origin_fh(dentry, fh); + if (set && err == -ENODATA) + err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0); + if (err) + goto fail; + +out: + kfree(fh); + return err; + +fail: + inode = d_inode(origin); + pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n", + origin, inode ? inode->i_ino : 0, err); + goto out; +} + +/* + * Verify that an index entry name matches the origin file handle stored in + * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path. + * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error. + */ +int ovl_verify_index(struct dentry *index, struct path *lowerstack, + unsigned int numlower) +{ + struct ovl_fh *fh = NULL; + size_t len; + struct path origin = { }; + struct path *stack = &origin; + unsigned int ctr = 0; + int err; + + if (!d_inode(index)) + return 0; + + /* + * Directory index entries are going to be used for looking up + * redirected upper dirs by lower dir fh when decoding an overlay + * file handle of a merge dir. Whiteout index entries are going to be + * used as an indication that an exported overlay file handle should + * be treated as stale (i.e. after unlink of the overlay inode). + * We don't know the verification rules for directory and whiteout + * index entries, because they have not been implemented yet, so return + * EROFS if those entries are found to avoid corrupting an index that + * was created by a newer kernel. + */ + err = -EROFS; + if (d_is_dir(index) || ovl_is_whiteout(index)) + goto fail; + + err = -EINVAL; + if (index->d_name.len < sizeof(struct ovl_fh)*2) + goto fail; + + err = -ENOMEM; + len = index->d_name.len / 2; + fh = kzalloc(len, GFP_TEMPORARY); + if (!fh) + goto fail; + + err = -EINVAL; + if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len) + goto fail; + + err = ovl_verify_origin_fh(index, fh); + if (err) + goto fail; + + err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr); + if (!err && !ctr) + err = -ESTALE; + if (err) + goto fail; + + /* Check if index is orphan and don't warn before cleaning it */ + if (d_inode(index)->i_nlink == 1 && + ovl_get_nlink(index, origin.dentry, 0) == 0) + err = -ENOENT; + + dput(origin.dentry); +out: + kfree(fh); + return err; + +fail: + pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", + index, d_inode(index)->i_mode & S_IFMT, err); + goto out; +} + +/* + * Lookup in indexdir for the index entry of a lower real inode or a copy up + * origin inode. The index entry name is the hex representation of the lower + * inode file handle. + * + * If the index dentry in negative, then either no lower aliases have been + * copied up yet, or aliases have been copied up in older kernels and are + * not indexed. + * + * If the index dentry for a copy up origin inode is positive, but points + * to an inode different than the upper inode, then either the upper inode + * has been copied up and not indexed or it was indexed, but since then + * index dir was cleared. Either way, that index cannot be used to indentify + * the overlay inode. + */ +int ovl_get_index_name(struct dentry *origin, struct qstr *name) +{ + int err; + struct ovl_fh *fh; + char *n, *s; + + fh = ovl_encode_fh(origin, false); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + err = -ENOMEM; + n = kzalloc(fh->len * 2, GFP_TEMPORARY); + if (n) { + s = bin2hex(n, fh, fh->len); + *name = (struct qstr) QSTR_INIT(n, s - n); + err = 0; + } + kfree(fh); + + return err; + +} + +static struct dentry *ovl_lookup_index(struct dentry *dentry, + struct dentry *upper, + struct dentry *origin) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct dentry *index; + struct inode *inode; + struct qstr name; + int err; + + err = ovl_get_index_name(origin, &name); + if (err) + return ERR_PTR(err); + + index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); + if (IS_ERR(index)) { + pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" + "overlayfs: mount with '-o index=off' to disable inodes index.\n", + d_inode(origin)->i_ino, name.len, name.name, + err); + goto out; + } + + inode = d_inode(index); + if (d_is_negative(index)) { + if (upper && d_inode(origin)->i_nlink > 1) { + pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", + d_inode(origin)->i_ino); + goto fail; + } + + dput(index); + index = NULL; + } else if (upper && d_inode(upper) != inode) { + pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n", + index, inode->i_ino, d_inode(upper)->i_ino); + goto fail; + } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || + ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { + /* + * Index should always be of the same file type as origin + * except for the case of a whiteout index. A whiteout + * index should only exist if all lower aliases have been + * unlinked, which means that finding a lower origin on lookup + * whose index is a whiteout should be treated as an error. + */ + pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", + index, d_inode(index)->i_mode & S_IFMT, + d_inode(origin)->i_mode & S_IFMT); + goto fail; + } + +out: + kfree(name.name); + return index; + +fail: + dput(index); + index = ERR_PTR(-EIO); + goto out; +} + /* * Returns next layer in stack starting from top. * Returns -1 if this is the last layer. @@ -219,8 +580,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, const struct cred *old_cred; struct ovl_fs *ofs = dentry->d_sb->s_fs_info; struct ovl_entry *poe = dentry->d_parent->d_fsdata; + struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata; struct path *stack = NULL; struct dentry *upperdir, *upperdentry = NULL; + struct dentry *index = NULL; unsigned int ctr = 0; struct inode *inode = NULL; bool upperopaque = false; @@ -241,7 +604,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENAMETOOLONG); old_cred = ovl_override_creds(dentry->d_sb); - upperdir = ovl_upperdentry_dereference(poe); + upperdir = ovl_dentry_upper(dentry->d_parent); if (upperdir) { err = ovl_lookup_layer(upperdir, &d, &upperdentry); if (err) @@ -252,13 +615,30 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, err = -EREMOTE; goto out; } + if (upperdentry && !d.is_dir) { + BUG_ON(!d.stop || d.redirect); + /* + * Lookup copy up origin by decoding origin file handle. + * We may get a disconnected dentry, which is fine, + * because we only need to hold the origin inode in + * cache and use its inode number. We may even get a + * connected dentry, that is not under any of the lower + * layers root. That is also fine for using it's inode + * number - it's the same as if we held a reference + * to a dentry in lower layer that was moved under us. + */ + err = ovl_check_origin(upperdentry, roe->lowerstack, + roe->numlower, &stack, &ctr); + if (err) + goto out; + } if (d.redirect) { upperredirect = kstrdup(d.redirect, GFP_KERNEL); if (!upperredirect) goto out_put_upper; if (d.redirect[0] == '/') - poe = dentry->d_sb->s_root->d_fsdata; + poe = roe; } upperopaque = d.opaque; } @@ -289,10 +669,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (d.stop) break; - if (d.redirect && - d.redirect[0] == '/' && - poe != dentry->d_sb->s_root->d_fsdata) { - poe = dentry->d_sb->s_root->d_fsdata; + if (d.redirect && d.redirect[0] == '/' && poe != roe) { + poe = roe; /* Find the current layer on the root dentry */ for (i = 0; i < poe->numlower; i++) @@ -303,47 +681,56 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } } + /* Lookup index by lower inode and verify it matches upper inode */ + if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) { + struct dentry *origin = stack[0].dentry; + + index = ovl_lookup_index(dentry, upperdentry, origin); + if (IS_ERR(index)) { + err = PTR_ERR(index); + index = NULL; + goto out_put; + } + } + oe = ovl_alloc_entry(ctr); err = -ENOMEM; if (!oe) goto out_put; - if (upperdentry || ctr) { - struct dentry *realdentry; - struct inode *realinode; + oe->opaque = upperopaque; + memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); + dentry->d_fsdata = oe; - realdentry = upperdentry ? upperdentry : stack[0].dentry; - realinode = d_inode(realdentry); + if (upperdentry) + ovl_dentry_set_upper_alias(dentry); + else if (index) + upperdentry = dget(index); - err = -ENOMEM; - if (upperdentry && !d_is_dir(upperdentry)) { - inode = ovl_get_inode(dentry->d_sb, realinode); - } else { - inode = ovl_new_inode(dentry->d_sb, realinode->i_mode, - realinode->i_rdev); - if (inode) - ovl_inode_init(inode, realinode, !!upperdentry); - } - if (!inode) + if (upperdentry || ctr) { + inode = ovl_get_inode(dentry, upperdentry); + err = PTR_ERR(inode); + if (IS_ERR(inode)) goto out_free_oe; - ovl_copyattr(realdentry->d_inode, inode); + + OVL_I(inode)->redirect = upperredirect; + if (index) + ovl_set_flag(OVL_INDEX, inode); } revert_creds(old_cred); - oe->opaque = upperopaque; - oe->redirect = upperredirect; - oe->__upperdentry = upperdentry; - memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); + dput(index); kfree(stack); kfree(d.redirect); - dentry->d_fsdata = oe; d_add(dentry, inode); return NULL; out_free_oe: + dentry->d_fsdata = NULL; kfree(oe); out_put: + dput(index); for (i = 0; i < ctr; i++) dput(stack[i].dentry); kfree(stack); @@ -373,7 +760,7 @@ bool ovl_lower_positive(struct dentry *dentry) return oe->opaque; /* Negative upper -> positive lower */ - if (!oe->__upperdentry) + if (!ovl_dentry_upper(dentry)) return true; /* Positive upper -> have to look up lower to see whether it exists */ diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 8af450b0e57a..e927a62c97ae 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -8,20 +8,66 @@ */ #include <linux/kernel.h> +#include <linux/uuid.h> enum ovl_path_type { __OVL_PATH_UPPER = (1 << 0), __OVL_PATH_MERGE = (1 << 1), + __OVL_PATH_ORIGIN = (1 << 2), }; #define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) #define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) +#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN) #define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay." #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect" +#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" +#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" +#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink" -#define OVL_ISUPPER_MASK 1UL +enum ovl_flag { + OVL_IMPURE, + OVL_INDEX, +}; + +/* + * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, + * where: + * origin.fh - exported file handle of the lower file + * origin.uuid - uuid of the lower filesystem + */ +#define OVL_FH_VERSION 0 +#define OVL_FH_MAGIC 0xfb + +/* CPU byte order required for fid decoding: */ +#define OVL_FH_FLAG_BIG_ENDIAN (1 << 0) +#define OVL_FH_FLAG_ANY_ENDIAN (1 << 1) +/* Is the real inode encoded in fid an upper inode? */ +#define OVL_FH_FLAG_PATH_UPPER (1 << 2) + +#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \ + OVL_FH_FLAG_PATH_UPPER) + +#if defined(__LITTLE_ENDIAN) +#define OVL_FH_FLAG_CPU_ENDIAN 0 +#elif defined(__BIG_ENDIAN) +#define OVL_FH_FLAG_CPU_ENDIAN OVL_FH_FLAG_BIG_ENDIAN +#else +#error Endianness not defined +#endif + +/* On-disk and in-memeory format for redirect by file handle */ +struct ovl_fh { + u8 version; /* 0 */ + u8 magic; /* 0xfb */ + u8 len; /* size of this header + size of fid */ + u8 flags; /* OVL_FH_FLAG_* */ + u8 type; /* fid_type of fid */ + uuid_t uuid; /* uuid of filesystem */ + u8 fid[0]; /* file identifier */ +} __packed; static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) { @@ -127,14 +173,13 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) return err; } -static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper) +static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) { - unsigned long x = (unsigned long) READ_ONCE(inode->i_private); - - if (is_upper) - *is_upper = x & OVL_ISUPPER_MASK; + struct dentry *ret = vfs_tmpfile(dentry, mode, 0); + int err = IS_ERR(ret) ? PTR_ERR(ret) : 0; - return (struct inode *) (x & ~OVL_ISUPPER_MASK); + pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); + return ret; } /* util.c */ @@ -142,6 +187,9 @@ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); +struct super_block *ovl_same_sb(struct super_block *sb); +bool ovl_can_decode_fh(struct super_block *sb); +struct dentry *ovl_indexdir(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); bool ovl_dentry_weird(struct dentry *dentry); @@ -152,25 +200,53 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); +struct dentry *ovl_i_dentry_upper(struct inode *inode); +struct inode *ovl_inode_upper(struct inode *inode); +struct inode *ovl_inode_lower(struct inode *inode); +struct inode *ovl_inode_real(struct inode *inode); struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); +bool ovl_dentry_has_upper_alias(struct dentry *dentry); +void ovl_dentry_set_upper_alias(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); -void ovl_clear_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); -void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); -void ovl_inode_init(struct inode *inode, struct inode *realinode, - bool is_upper); -void ovl_inode_update(struct inode *inode, struct inode *upperinode); +void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, + struct dentry *lowerdentry); +void ovl_inode_update(struct inode *inode, struct dentry *upperdentry); void ovl_dentry_version_inc(struct dentry *dentry); u64 ovl_dentry_version_get(struct dentry *dentry); bool ovl_is_whiteout(struct dentry *dentry); struct file *ovl_path_open(struct path *path, int flags); +int ovl_copy_up_start(struct dentry *dentry); +void ovl_copy_up_end(struct dentry *dentry); +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr); +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); +void ovl_set_flag(unsigned long flag, struct inode *inode); +bool ovl_test_flag(unsigned long flag, struct inode *inode); +bool ovl_inuse_trylock(struct dentry *dentry); +void ovl_inuse_unlock(struct dentry *dentry); +int ovl_nlink_start(struct dentry *dentry, bool *locked); +void ovl_nlink_end(struct dentry *dentry, bool locked); + +static inline bool ovl_is_impuredir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); +} + /* namei.c */ +int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt, + struct dentry *origin, bool is_upper, bool set); +int ovl_verify_index(struct dentry *index, struct path *lowerstack, + unsigned int numlower); +int ovl_get_index_name(struct dentry *origin, struct qstr *name); int ovl_path_next(int idx, struct dentry *dentry, struct path *path); struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); @@ -183,13 +259,22 @@ void ovl_cache_free(struct list_head *list); int ovl_check_d_type_supported(struct path *realpath); void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, struct dentry *dentry, int level); +int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, + struct path *lowerstack, unsigned int numlower); /* inode.c */ +int ovl_set_nlink_upper(struct dentry *dentry); +int ovl_set_nlink_lower(struct dentry *dentry); +unsigned int ovl_get_nlink(struct dentry *lowerdentry, + struct dentry *upperdentry, + unsigned int fallback); int ovl_setattr(struct dentry *dentry, struct iattr *attr); +int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int ovl_permission(struct inode *inode, int mask); -int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags); -int ovl_xattr_get(struct dentry *dentry, const char *name, +int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags); +int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); @@ -198,7 +283,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode); +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry); static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; @@ -211,7 +296,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); +struct dentry *ovl_lookup_temp(struct dentry *workdir); struct cattr { dev_t rdev; umode_t mode; @@ -220,10 +305,11 @@ struct cattr { int ovl_create_real(struct inode *dir, struct dentry *newdentry, struct cattr *attr, struct dentry *hardlink, bool debug); -void ovl_cleanup(struct inode *dir, struct dentry *dentry); +int ovl_cleanup(struct inode *dir, struct dentry *dentry); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); +struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index d14bca1850d9..878a750986dd 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -14,6 +14,7 @@ struct ovl_config { char *workdir; bool default_permissions; bool redirect_dir; + bool index; }; /* private information held for overlayfs's superblock */ @@ -21,22 +22,28 @@ struct ovl_fs { struct vfsmount *upper_mnt; unsigned numlower; struct vfsmount **lower_mnt; + /* workbasedir is the path at workdir= mount option */ + struct dentry *workbasedir; + /* workdir is the 'work' directory under workbasedir */ struct dentry *workdir; + /* index directory listing overlay inodes by origin file handle */ + struct dentry *indexdir; long namelen; /* pathnames of lower and upper dirs, for show_options */ struct ovl_config config; /* creds of process who forced instantiation of super block */ const struct cred *creator_cred; + bool tmpfile; + bool noxattr; + /* sb common to all layers */ + struct super_block *same_sb; }; /* private information held for every overlayfs dentry */ struct ovl_entry { - struct dentry *__upperdentry; - struct ovl_dir_cache *cache; union { struct { - u64 version; - const char *redirect; + unsigned long has_upper; bool opaque; }; struct rcu_head rcu; @@ -47,7 +54,25 @@ struct ovl_entry { struct ovl_entry *ovl_alloc_entry(unsigned int numlower); -static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) +struct ovl_inode { + struct ovl_dir_cache *cache; + const char *redirect; + u64 version; + unsigned long flags; + struct inode vfs_inode; + struct dentry *__upperdentry; + struct inode *lower; + + /* synchronize copy up and more */ + struct mutex lock; +}; + +static inline struct ovl_inode *OVL_I(struct inode *inode) +{ + return container_of(inode, struct ovl_inode, vfs_inode); +} + +static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi) { - return lockless_dereference(oe->__upperdentry); + return lockless_dereference(oi->__upperdentry); } diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index f241b4ee3d8a..3d424a51cabb 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -667,3 +667,56 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt, ovl_cleanup(dir, dentry); } } + +int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, + struct path *lowerstack, unsigned int numlower) +{ + int err; + struct inode *dir = dentry->d_inode; + struct path path = { .mnt = mnt, .dentry = dentry }; + LIST_HEAD(list); + struct ovl_cache_entry *p; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .dentry = NULL, + .list = &list, + .root = RB_ROOT, + .is_lowest = false, + }; + + err = ovl_dir_read(&path, &rdd); + if (err) + goto out; + + inode_lock_nested(dir, I_MUTEX_PARENT); + list_for_each_entry(p, &list, l_node) { + struct dentry *index; + + if (p->name[0] == '.') { + if (p->len == 1) + continue; + if (p->len == 2 && p->name[1] == '.') + continue; + } + index = lookup_one_len(p->name, dentry, p->len); + if (IS_ERR(index)) { + err = PTR_ERR(index); + break; + } + err = ovl_verify_index(index, lowerstack, numlower); + if (err) { + if (err == -EROFS) + break; + err = ovl_cleanup(dir, index); + if (err) + break; + } + dput(index); + } + inode_unlock(dir); +out: + ovl_cache_free(&list); + if (err) + pr_err("overlayfs: failed index dir cleanup (%i)\n", err); + return err; +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 20f48abbb82f..d86e89f97201 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -7,6 +7,7 @@ * the Free Software Foundation. */ +#include <uapi/linux/magic.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/xattr.h> @@ -33,6 +34,11 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); MODULE_PARM_DESC(ovl_redirect_dir_def, "Default to on or off for the redirect_dir feature"); +static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); +module_param_named(index, ovl_index_def, bool, 0644); +MODULE_PARM_DESC(ovl_index_def, + "Default to on or off for the inodes index feature"); + static void ovl_dentry_release(struct dentry *dentry) { struct ovl_entry *oe = dentry->d_fsdata; @@ -40,19 +46,34 @@ static void ovl_dentry_release(struct dentry *dentry) if (oe) { unsigned int i; - dput(oe->__upperdentry); - kfree(oe->redirect); for (i = 0; i < oe->numlower; i++) dput(oe->lowerstack[i].dentry); kfree_rcu(oe, rcu); } } +static int ovl_check_append_only(struct inode *inode, int flag) +{ + /* + * This test was moot in vfs may_open() because overlay inode does + * not have the S_APPEND flag, so re-check on real upper inode + */ + if (IS_APPEND(inode)) { + if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) + return -EPERM; + if (flag & O_TRUNC) + return -EPERM; + } + + return 0; +} + static struct dentry *ovl_d_real(struct dentry *dentry, const struct inode *inode, unsigned int open_flags) { struct dentry *real; + int err; if (!d_is_reg(dentry)) { if (!inode || inode == d_inode(dentry)) @@ -64,15 +85,20 @@ static struct dentry *ovl_d_real(struct dentry *dentry, return dentry; if (open_flags) { - int err = ovl_open_maybe_copy_up(dentry, open_flags); - + err = ovl_open_maybe_copy_up(dentry, open_flags); if (err) return ERR_PTR(err); } real = ovl_dentry_upper(dentry); - if (real && (!inode || inode == d_inode(real))) + if (real && (!inode || inode == d_inode(real))) { + if (!inode) { + err = ovl_check_append_only(d_inode(real), open_flags); + if (err) + return ERR_PTR(err); + } return real; + } real = ovl_dentry_lower(dentry); if (!real) @@ -142,12 +168,52 @@ static const struct dentry_operations ovl_reval_dentry_operations = { .d_weak_revalidate = ovl_dentry_weak_revalidate, }; +static struct kmem_cache *ovl_inode_cachep; + +static struct inode *ovl_alloc_inode(struct super_block *sb) +{ + struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); + + oi->cache = NULL; + oi->redirect = NULL; + oi->version = 0; + oi->flags = 0; + oi->__upperdentry = NULL; + oi->lower = NULL; + mutex_init(&oi->lock); + + return &oi->vfs_inode; +} + +static void ovl_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); +} + +static void ovl_destroy_inode(struct inode *inode) +{ + struct ovl_inode *oi = OVL_I(inode); + + dput(oi->__upperdentry); + kfree(oi->redirect); + mutex_destroy(&oi->lock); + + call_rcu(&inode->i_rcu, ovl_i_callback); +} + static void ovl_put_super(struct super_block *sb) { struct ovl_fs *ufs = sb->s_fs_info; unsigned i; + dput(ufs->indexdir); dput(ufs->workdir); + ovl_inuse_unlock(ufs->workbasedir); + dput(ufs->workbasedir); + if (ufs->upper_mnt) + ovl_inuse_unlock(ufs->upper_mnt->mnt_root); mntput(ufs->upper_mnt); for (i = 0; i < ufs->numlower; i++) mntput(ufs->lower_mnt[i]); @@ -160,6 +226,25 @@ static void ovl_put_super(struct super_block *sb) kfree(ufs); } +static int ovl_sync_fs(struct super_block *sb, int wait) +{ + struct ovl_fs *ufs = sb->s_fs_info; + struct super_block *upper_sb; + int ret; + + if (!ufs->upper_mnt) + return 0; + upper_sb = ufs->upper_mnt->mnt_sb; + if (!upper_sb->s_op->sync_fs) + return 0; + + /* real inodes have already been synced by sync_filesystem(ovl_sb) */ + down_read(&upper_sb->s_umount); + ret = upper_sb->s_op->sync_fs(upper_sb, wait); + up_read(&upper_sb->s_umount); + return ret; +} + /** * ovl_statfs * @sb: The overlayfs super block @@ -186,6 +271,12 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) return err; } +/* Will this overlay be forced to mount/remount ro? */ +static bool ovl_force_readonly(struct ovl_fs *ufs) +{ + return (!ufs->upper_mnt || !ufs->workdir); +} + /** * ovl_show_options * @@ -207,6 +298,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) if (ufs->config.redirect_dir != ovl_redirect_dir_def) seq_printf(m, ",redirect_dir=%s", ufs->config.redirect_dir ? "on" : "off"); + if (ufs->config.index != ovl_index_def) + seq_printf(m, ",index=%s", + ufs->config.index ? "on" : "off"); return 0; } @@ -214,18 +308,21 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ufs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir)) + if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs)) return -EROFS; return 0; } static const struct super_operations ovl_super_operations = { + .alloc_inode = ovl_alloc_inode, + .destroy_inode = ovl_destroy_inode, + .drop_inode = generic_delete_inode, .put_super = ovl_put_super, + .sync_fs = ovl_sync_fs, .statfs = ovl_statfs, .show_options = ovl_show_options, .remount_fs = ovl_remount, - .drop_inode = generic_delete_inode, }; enum { @@ -235,6 +332,8 @@ enum { OPT_DEFAULT_PERMISSIONS, OPT_REDIRECT_DIR_ON, OPT_REDIRECT_DIR_OFF, + OPT_INDEX_ON, + OPT_INDEX_OFF, OPT_ERR, }; @@ -245,6 +344,8 @@ static const match_table_t ovl_tokens = { {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, + {OPT_INDEX_ON, "index=on"}, + {OPT_INDEX_OFF, "index=off"}, {OPT_ERR, NULL} }; @@ -317,6 +418,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->redirect_dir = false; break; + case OPT_INDEX_ON: + config->index = true; + break; + + case OPT_INDEX_OFF: + config->index = false; + break; + default: pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; @@ -335,23 +444,29 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) } #define OVL_WORKDIR_NAME "work" +#define OVL_INDEXDIR_NAME "index" -static struct dentry *ovl_workdir_create(struct vfsmount *mnt, - struct dentry *dentry) +static struct dentry *ovl_workdir_create(struct super_block *sb, + struct ovl_fs *ufs, + struct dentry *dentry, + const char *name, bool persist) { struct inode *dir = dentry->d_inode; + struct vfsmount *mnt = ufs->upper_mnt; struct dentry *work; int err; bool retried = false; + bool locked = false; err = mnt_want_write(mnt); if (err) - return ERR_PTR(err); + goto out_err; inode_lock_nested(dir, I_MUTEX_PARENT); + locked = true; + retry: - work = lookup_one_len(OVL_WORKDIR_NAME, dentry, - strlen(OVL_WORKDIR_NAME)); + work = lookup_one_len(name, dentry, strlen(name)); if (!IS_ERR(work)) { struct iattr attr = { @@ -364,6 +479,9 @@ retry: if (retried) goto out_dput; + if (persist) + goto out_unlock; + retried = true; ovl_workdir_cleanup(dir, mnt, work, 0); dput(work); @@ -403,16 +521,24 @@ retry: inode_unlock(work->d_inode); if (err) goto out_dput; + } else { + err = PTR_ERR(work); + goto out_err; } out_unlock: - inode_unlock(dir); mnt_drop_write(mnt); + if (locked) + inode_unlock(dir); return work; out_dput: dput(work); - work = ERR_PTR(err); +out_err: + pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", + ufs->config.workdir, name, -err); + sb->s_flags |= MS_RDONLY; + work = NULL; goto out_unlock; } @@ -512,6 +638,15 @@ static int ovl_lower_dir(const char *name, struct path *path, if (ovl_dentry_remote(path->dentry)) *remote = true; + /* + * The inodes index feature needs to encode and decode file + * handles, so it requires that all layers support them. + */ + if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { + ofs->config.index = false; + pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); + } + return 0; out_put: @@ -557,7 +692,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return ovl_xattr_get(dentry, handler->name, buffer, size); + return ovl_xattr_get(dentry, inode, handler->name, buffer, size); } static int __maybe_unused @@ -567,7 +702,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, size_t size, int flags) { struct dentry *workdir = ovl_workdir(dentry); - struct inode *realinode = ovl_inode_real(inode, NULL); + struct inode *realinode = ovl_inode_real(inode); struct posix_acl *acl = NULL; int err; @@ -607,9 +742,9 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, return err; } - err = ovl_xattr_set(dentry, handler->name, value, size, flags); + err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); if (!err) - ovl_copyattr(ovl_inode_real(inode, NULL), inode); + ovl_copyattr(ovl_inode_real(inode), inode); return err; @@ -637,7 +772,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return ovl_xattr_get(dentry, name, buffer, size); + return ovl_xattr_get(dentry, inode, name, buffer, size); } static int ovl_other_xattr_set(const struct xattr_handler *handler, @@ -645,7 +780,7 @@ static int ovl_other_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { - return ovl_xattr_set(dentry, name, value, size, flags); + return ovl_xattr_set(dentry, inode, name, value, size, flags); } static const struct xattr_handler __maybe_unused @@ -688,10 +823,9 @@ static const struct xattr_handler *ovl_xattr_handlers[] = { static int ovl_fill_super(struct super_block *sb, void *data, int silent) { - struct path upperpath = { NULL, NULL }; - struct path workpath = { NULL, NULL }; + struct path upperpath = { }; + struct path workpath = { }; struct dentry *root_dentry; - struct inode *realinode; struct ovl_entry *oe; struct ovl_fs *ufs; struct path *stack = NULL; @@ -701,6 +835,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) unsigned int stacklen = 0; unsigned int i; bool remote = false; + struct cred *cred; int err; err = -ENOMEM; @@ -709,6 +844,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out; ufs->config.redirect_dir = ovl_redirect_dir_def; + ufs->config.index = ovl_index_def; err = ovl_parse_opt((char *) data, &ufs->config); if (err) goto out_free_config; @@ -743,9 +879,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_put_upperpath; + err = -EBUSY; + if (!ovl_inuse_trylock(upperpath.dentry)) { + pr_err("overlayfs: upperdir is in-use by another mount\n"); + goto out_put_upperpath; + } + err = ovl_mount_dir(ufs->config.workdir, &workpath); if (err) - goto out_put_upperpath; + goto out_unlock_upperdentry; err = -EINVAL; if (upperpath.mnt != workpath.mnt) { @@ -756,12 +898,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); goto out_put_workpath; } + + err = -EBUSY; + if (!ovl_inuse_trylock(workpath.dentry)) { + pr_err("overlayfs: workdir is in-use by another mount\n"); + goto out_put_workpath; + } + + ufs->workbasedir = workpath.dentry; sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; } err = -ENOMEM; lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); if (!lowertmp) - goto out_put_workpath; + goto out_unlock_workdentry; err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); @@ -804,20 +954,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) pr_err("overlayfs: failed to clone upperpath\n"); goto out_put_lowerpath; } + /* Don't inherit atime flags */ ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; - ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); - err = PTR_ERR(ufs->workdir); - if (IS_ERR(ufs->workdir)) { - pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", - ufs->config.workdir, OVL_WORKDIR_NAME, -err); - sb->s_flags |= MS_RDONLY; - ufs->workdir = NULL; - } - + ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry, + OVL_WORKDIR_NAME, false); /* * Upper should support d_type, else whiteouts are visible. * Given workdir and upper are on same fs, we can do @@ -825,6 +969,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) * creation of workdir in previous step. */ if (ufs->workdir) { + struct dentry *temp; + err = ovl_check_d_type_supported(&workpath); if (err < 0) goto out_put_workdir; @@ -836,6 +982,34 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) */ if (!err) pr_warn("overlayfs: upper fs needs to support d_type.\n"); + + /* Check if upper/work fs supports O_TMPFILE */ + temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); + ufs->tmpfile = !IS_ERR(temp); + if (ufs->tmpfile) + dput(temp); + else + pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + + /* + * Check if upper/work fs supports trusted.overlay.* + * xattr + */ + err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, + "0", 1, 0); + if (err) { + ufs->noxattr = true; + pr_warn("overlayfs: upper fs does not support xattr.\n"); + } else { + vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); + } + + /* Check if upper/work fs supports file handles */ + if (ufs->config.index && + !ovl_can_decode_fh(ufs->workdir->d_sb)) { + ufs->config.index = false; + pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); + } } } @@ -859,20 +1033,66 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->lower_mnt[ufs->numlower] = mnt; ufs->numlower++; + + /* Check if all lower layers are on same sb */ + if (i == 0) + ufs->same_sb = mnt->mnt_sb; + else if (ufs->same_sb != mnt->mnt_sb) + ufs->same_sb = NULL; } /* If the upper fs is nonexistent, we mark overlayfs r/o too */ if (!ufs->upper_mnt) sb->s_flags |= MS_RDONLY; + else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) + ufs->same_sb = NULL; + + if (!(ovl_force_readonly(ufs)) && ufs->config.index) { + /* Verify lower root is upper root origin */ + err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], + stack[0].dentry, false, true); + if (err) { + pr_err("overlayfs: failed to verify upper root origin\n"); + goto out_put_lower_mnt; + } + + ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, + OVL_INDEXDIR_NAME, true); + if (ufs->indexdir) { + /* Verify upper root is index dir origin */ + err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, + upperpath.dentry, true, true); + if (err) + pr_err("overlayfs: failed to verify index dir origin\n"); + + /* Cleanup bad/stale/orphan index entries */ + if (!err) + err = ovl_indexdir_cleanup(ufs->indexdir, + ufs->upper_mnt, + stack, numlower); + } + if (err || !ufs->indexdir) + pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); + if (err) + goto out_put_indexdir; + } + + /* Show index=off/on in /proc/mounts for any of the reasons above */ + if (!ufs->indexdir) + ufs->config.index = false; if (remote) sb->s_d_op = &ovl_reval_dentry_operations; else sb->s_d_op = &ovl_dentry_operations; - ufs->creator_cred = prepare_creds(); - if (!ufs->creator_cred) - goto out_put_lower_mnt; + err = -ENOMEM; + ufs->creator_cred = cred = prepare_creds(); + if (!cred) + goto out_put_indexdir; + + /* Never override disk quota limits or use reserved space */ + cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); err = -ENOMEM; oe = ovl_alloc_entry(numlower); @@ -892,10 +1112,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) mntput(upperpath.mnt); for (i = 0; i < numlower; i++) mntput(stack[i].mnt); - path_put(&workpath); + mntput(workpath.mnt); kfree(lowertmp); - oe->__upperdentry = upperpath.dentry; + if (upperpath.dentry) { + oe->has_upper = true; + if (ovl_is_impuredir(upperpath.dentry)) + ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); + } for (i = 0; i < numlower; i++) { oe->lowerstack[i].dentry = stack[i].dentry; oe->lowerstack[i].mnt = ufs->lower_mnt[i]; @@ -904,9 +1128,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) root_dentry->d_fsdata = oe; - realinode = d_inode(ovl_dentry_real(root_dentry)); - ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry); - ovl_copyattr(realinode, d_inode(root_dentry)); + ovl_inode_init(d_inode(root_dentry), upperpath.dentry, + ovl_dentry_lower(root_dentry)); sb->s_root = root_dentry; @@ -916,6 +1139,8 @@ out_free_oe: kfree(oe); out_put_cred: put_cred(ufs->creator_cred); +out_put_indexdir: + dput(ufs->indexdir); out_put_lower_mnt: for (i = 0; i < ufs->numlower; i++) mntput(ufs->lower_mnt[i]); @@ -929,8 +1154,12 @@ out_put_lowerpath: kfree(stack); out_free_lowertmp: kfree(lowertmp); +out_unlock_workdentry: + ovl_inuse_unlock(workpath.dentry); out_put_workpath: path_put(&workpath); +out_unlock_upperdentry: + ovl_inuse_unlock(upperpath.dentry); out_put_upperpath: path_put(&upperpath); out_free_config: @@ -956,14 +1185,43 @@ static struct file_system_type ovl_fs_type = { }; MODULE_ALIAS_FS("overlay"); +static void ovl_inode_init_once(void *foo) +{ + struct ovl_inode *oi = foo; + + inode_init_once(&oi->vfs_inode); +} + static int __init ovl_init(void) { - return register_filesystem(&ovl_fs_type); + int err; + + ovl_inode_cachep = kmem_cache_create("ovl_inode", + sizeof(struct ovl_inode), 0, + (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD|SLAB_ACCOUNT), + ovl_inode_init_once); + if (ovl_inode_cachep == NULL) + return -ENOMEM; + + err = register_filesystem(&ovl_fs_type); + if (err) + kmem_cache_destroy(ovl_inode_cachep); + + return err; } static void __exit ovl_exit(void) { unregister_filesystem(&ovl_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(ovl_inode_cachep); + } module_init(ovl_init); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 952286f4826c..f46ad75dc96a 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -10,7 +10,12 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/slab.h> +#include <linux/cred.h> #include <linux/xattr.h> +#include <linux/exportfs.h> +#include <linux/uuid.h> +#include <linux/namei.h> +#include <linux/ratelimit.h> #include "overlayfs.h" #include "ovl_entry.h" @@ -39,6 +44,26 @@ const struct cred *ovl_override_creds(struct super_block *sb) return override_creds(ofs->creator_cred); } +struct super_block *ovl_same_sb(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->same_sb; +} + +bool ovl_can_decode_fh(struct super_block *sb) +{ + return (sb->s_export_op && sb->s_export_op->fh_to_dentry && + !uuid_is_null(&sb->s_uuid)); +} + +struct dentry *ovl_indexdir(struct super_block *sb) +{ + struct ovl_fs *ofs = sb->s_fs_info; + + return ofs->indexdir; +} + struct ovl_entry *ovl_alloc_entry(unsigned int numlower) { size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); @@ -70,15 +95,17 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) struct ovl_entry *oe = dentry->d_fsdata; enum ovl_path_type type = 0; - if (oe->__upperdentry) { + if (ovl_dentry_upper(dentry)) { type = __OVL_PATH_UPPER; /* - * Non-dir dentry can hold lower dentry from previous - * location. + * Non-dir dentry can hold lower dentry of its copy up origin. */ - if (oe->numlower && d_is_dir(dentry)) - type |= __OVL_PATH_MERGE; + if (oe->numlower) { + type |= __OVL_PATH_ORIGIN; + if (d_is_dir(dentry)) + type |= __OVL_PATH_MERGE; + } } else { if (oe->numlower > 1) type |= __OVL_PATH_MERGE; @@ -89,17 +116,16 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) void ovl_path_upper(struct dentry *dentry, struct path *path) { struct ovl_fs *ofs = dentry->d_sb->s_fs_info; - struct ovl_entry *oe = dentry->d_fsdata; path->mnt = ofs->upper_mnt; - path->dentry = ovl_upperdentry_dereference(oe); + path->dentry = ovl_dentry_upper(dentry); } void ovl_path_lower(struct dentry *dentry, struct path *path) { struct ovl_entry *oe = dentry->d_fsdata; - *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; + *path = oe->numlower ? oe->lowerstack[0] : (struct path) { }; } enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) @@ -116,47 +142,52 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) struct dentry *ovl_dentry_upper(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return ovl_upperdentry_dereference(oe); + return ovl_upperdentry_dereference(OVL_I(d_inode(dentry))); } -static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) +struct dentry *ovl_dentry_lower(struct dentry *dentry) { + struct ovl_entry *oe = dentry->d_fsdata; + return oe->numlower ? oe->lowerstack[0].dentry : NULL; } -struct dentry *ovl_dentry_lower(struct dentry *dentry) +struct dentry *ovl_dentry_real(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); +} - return __ovl_dentry_lower(oe); +struct dentry *ovl_i_dentry_upper(struct inode *inode) +{ + return ovl_upperdentry_dereference(OVL_I(inode)); } -struct dentry *ovl_dentry_real(struct dentry *dentry) +struct inode *ovl_inode_upper(struct inode *inode) { - struct ovl_entry *oe = dentry->d_fsdata; - struct dentry *realdentry; + struct dentry *upperdentry = ovl_i_dentry_upper(inode); + + return upperdentry ? d_inode(upperdentry) : NULL; +} - realdentry = ovl_upperdentry_dereference(oe); - if (!realdentry) - realdentry = __ovl_dentry_lower(oe); +struct inode *ovl_inode_lower(struct inode *inode) +{ + return OVL_I(inode)->lower; +} - return realdentry; +struct inode *ovl_inode_real(struct inode *inode) +{ + return ovl_inode_upper(inode) ?: ovl_inode_lower(inode); } + struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->cache; + return OVL_I(d_inode(dentry))->cache; } void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) { - struct ovl_entry *oe = dentry->d_fsdata; - - oe->cache = cache; + OVL_I(d_inode(dentry))->cache = cache; } bool ovl_dentry_is_opaque(struct dentry *dentry) @@ -177,79 +208,87 @@ void ovl_dentry_set_opaque(struct dentry *dentry) oe->opaque = true; } -bool ovl_redirect_dir(struct super_block *sb) +/* + * For hard links it's possible for ovl_dentry_upper() to return positive, while + * there's no actual upper alias for the inode. Copy up code needs to know + * about the existence of the upper alias, so it can't use ovl_dentry_upper(). + */ +bool ovl_dentry_has_upper_alias(struct dentry *dentry) { - struct ovl_fs *ofs = sb->s_fs_info; + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->has_upper; +} + +void ovl_dentry_set_upper_alias(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; - return ofs->config.redirect_dir; + oe->has_upper = true; } -void ovl_clear_redirect_dir(struct super_block *sb) +bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; - ofs->config.redirect_dir = false; + return ofs->config.redirect_dir && !ofs->noxattr; } const char *ovl_dentry_get_redirect(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->redirect; + return OVL_I(d_inode(dentry))->redirect; } void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect) { - struct ovl_entry *oe = dentry->d_fsdata; + struct ovl_inode *oi = OVL_I(d_inode(dentry)); - kfree(oe->redirect); - oe->redirect = redirect; + kfree(oi->redirect); + oi->redirect = redirect; } -void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) +void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, + struct dentry *lowerdentry) { - struct ovl_entry *oe = dentry->d_fsdata; + if (upperdentry) + OVL_I(inode)->__upperdentry = upperdentry; + if (lowerdentry) + OVL_I(inode)->lower = d_inode(lowerdentry); - WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode)); - WARN_ON(oe->__upperdentry); - /* - * Make sure upperdentry is consistent before making it visible to - * ovl_upperdentry_dereference(). - */ - smp_wmb(); - oe->__upperdentry = upperdentry; + ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); } -void ovl_inode_init(struct inode *inode, struct inode *realinode, bool is_upper) +void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) { - WRITE_ONCE(inode->i_private, (unsigned long) realinode | - (is_upper ? OVL_ISUPPER_MASK : 0)); -} + struct inode *upperinode = d_inode(upperdentry); -void ovl_inode_update(struct inode *inode, struct inode *upperinode) -{ - WARN_ON(!upperinode); - WARN_ON(!inode_unhashed(inode)); - WRITE_ONCE(inode->i_private, - (unsigned long) upperinode | OVL_ISUPPER_MASK); - if (!S_ISDIR(upperinode->i_mode)) + WARN_ON(OVL_I(inode)->__upperdentry); + + /* + * Make sure upperdentry is consistent before making it visible + */ + smp_wmb(); + OVL_I(inode)->__upperdentry = upperdentry; + if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) { + inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); + } } void ovl_dentry_version_inc(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + struct inode *inode = d_inode(dentry); - WARN_ON(!inode_is_locked(dentry->d_inode)); - oe->version++; + WARN_ON(!inode_is_locked(inode)); + OVL_I(inode)->version++; } u64 ovl_dentry_version_get(struct dentry *dentry) { - struct ovl_entry *oe = dentry->d_fsdata; + struct inode *inode = d_inode(dentry); - WARN_ON(!inode_is_locked(dentry->d_inode)); - return oe->version; + WARN_ON(!inode_is_locked(inode)); + return OVL_I(inode)->version; } bool ovl_is_whiteout(struct dentry *dentry) @@ -263,3 +302,246 @@ struct file *ovl_path_open(struct path *path, int flags) { return dentry_open(path, flags | O_NOATIME, current_cred()); } + +int ovl_copy_up_start(struct dentry *dentry) +{ + struct ovl_inode *oi = OVL_I(d_inode(dentry)); + int err; + + err = mutex_lock_interruptible(&oi->lock); + if (!err && ovl_dentry_has_upper_alias(dentry)) { + err = 1; /* Already copied up */ + mutex_unlock(&oi->lock); + } + + return err; +} + +void ovl_copy_up_end(struct dentry *dentry) +{ + mutex_unlock(&OVL_I(d_inode(dentry))->lock); +} + +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) +{ + int res; + char val; + + if (!d_is_dir(dentry)) + return false; + + res = vfs_getxattr(dentry, name, &val, 1); + if (res == 1 && val == 'y') + return true; + + return false; +} + +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr) +{ + int err; + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + + if (ofs->noxattr) + return xerr; + + err = ovl_do_setxattr(upperdentry, name, value, size, 0); + + if (err == -EOPNOTSUPP) { + pr_warn("overlayfs: cannot set %s xattr on upper\n", name); + ofs->noxattr = true; + return xerr; + } + + return err; +} + +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) +{ + int err; + + if (ovl_test_flag(OVL_IMPURE, d_inode(dentry))) + return 0; + + /* + * Do not fail when upper doesn't support xattrs. + * Upper inodes won't have origin nor redirect xattr anyway. + */ + err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, + "y", 1, 0); + if (!err) + ovl_set_flag(OVL_IMPURE, d_inode(dentry)); + + return err; +} + +void ovl_set_flag(unsigned long flag, struct inode *inode) +{ + set_bit(flag, &OVL_I(inode)->flags); +} + +bool ovl_test_flag(unsigned long flag, struct inode *inode) +{ + return test_bit(flag, &OVL_I(inode)->flags); +} + +/** + * Caller must hold a reference to inode to prevent it from being freed while + * it is marked inuse. + */ +bool ovl_inuse_trylock(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + bool locked = false; + + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_OVL_INUSE)) { + inode->i_state |= I_OVL_INUSE; + locked = true; + } + spin_unlock(&inode->i_lock); + + return locked; +} + +void ovl_inuse_unlock(struct dentry *dentry) +{ + if (dentry) { + struct inode *inode = d_inode(dentry); + + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_OVL_INUSE)); + inode->i_state &= ~I_OVL_INUSE; + spin_unlock(&inode->i_lock); + } +} + +/* Called must hold OVL_I(inode)->oi_lock */ +static void ovl_cleanup_index(struct dentry *dentry) +{ + struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; + struct dentry *lowerdentry = ovl_dentry_lower(dentry); + struct dentry *upperdentry = ovl_dentry_upper(dentry); + struct dentry *index = NULL; + struct inode *inode; + struct qstr name; + int err; + + err = ovl_get_index_name(lowerdentry, &name); + if (err) + goto fail; + + inode = d_inode(upperdentry); + if (inode->i_nlink != 1) { + pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", + upperdentry, inode->i_ino, inode->i_nlink); + /* + * We either have a bug with persistent union nlink or a lower + * hardlink was added while overlay is mounted. Adding a lower + * hardlink and then unlinking all overlay hardlinks would drop + * overlay nlink to zero before all upper inodes are unlinked. + * As a safety measure, when that situation is detected, set + * the overlay nlink to the index inode nlink minus one for the + * index entry itself. + */ + set_nlink(d_inode(dentry), inode->i_nlink - 1); + ovl_set_nlink_upper(dentry); + goto out; + } + + inode_lock_nested(dir, I_MUTEX_PARENT); + /* TODO: whiteout instead of cleanup to block future open by handle */ + index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len); + err = PTR_ERR(index); + if (!IS_ERR(index)) + err = ovl_cleanup(dir, index); + inode_unlock(dir); + if (err) + goto fail; + +out: + dput(index); + return; + +fail: + pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err); + goto out; +} + +/* + * Operations that change overlay inode and upper inode nlink need to be + * synchronized with copy up for persistent nlink accounting. + */ +int ovl_nlink_start(struct dentry *dentry, bool *locked) +{ + struct ovl_inode *oi = OVL_I(d_inode(dentry)); + const struct cred *old_cred; + int err; + + if (!d_inode(dentry) || d_is_dir(dentry)) + return 0; + + /* + * With inodes index is enabled, we store the union overlay nlink + * in an xattr on the index inode. When whiting out lower hardlinks + * we need to decrement the overlay persistent nlink, but before the + * first copy up, we have no upper index inode to store the xattr. + * + * As a workaround, before whiteout/rename over of a lower hardlink, + * copy up to create the upper index. Creating the upper index will + * initialize the overlay nlink, so it could be dropped if unlink + * or rename succeeds. + * + * TODO: implement metadata only index copy up when called with + * ovl_copy_up_flags(dentry, O_PATH). + */ + if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) && + d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) { + err = ovl_copy_up(dentry); + if (err) + return err; + } + + err = mutex_lock_interruptible(&oi->lock); + if (err) + return err; + + if (!ovl_test_flag(OVL_INDEX, d_inode(dentry))) + goto out; + + old_cred = ovl_override_creds(dentry->d_sb); + /* + * The overlay inode nlink should be incremented/decremented IFF the + * upper operation succeeds, along with nlink change of upper inode. + * Therefore, before link/unlink/rename, we store the union nlink + * value relative to the upper inode nlink in an upper inode xattr. + */ + err = ovl_set_nlink_upper(dentry); + revert_creds(old_cred); + +out: + if (err) + mutex_unlock(&oi->lock); + else + *locked = true; + + return err; +} + +void ovl_nlink_end(struct dentry *dentry, bool locked) +{ + if (locked) { + if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) && + d_inode(dentry)->i_nlink == 0) { + const struct cred *old_cred; + + old_cred = ovl_override_creds(dentry->d_sb); + ovl_cleanup_index(dentry); + revert_creds(old_cred); + } + + mutex_unlock(&OVL_I(d_inode(dentry))->lock); + } +} |