diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 266d2059b9b8..dfbcd1b00b0a 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -365,8 +365,8 @@ must be done in the RCU callback. [recommended] vfs now tries to do path walking in "rcu-walk mode", which avoids atomic operations and scalability hazards on dentries and inodes (see -Documentation/filesystems/path-walk.txt). d_hash and d_compare changes (above) -are examples of the changes required to support this. For more complex +Documentation/filesystems/path-lookup.txt). d_hash and d_compare changes +(above) are examples of the changes required to support this. For more complex filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so no changes are required to the filesystem. However, this is costly and loses the benefits of rcu-walk mode. We will begin to add filesystem callbacks that @@ -383,8 +383,8 @@ Documentation/filesystems/vfs.txt for more details. permission and check_acl are inode permission checks that are called on many or all directory inodes on the way down a path walk (to check for -exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See -Documentation/filesystems/vfs.txt for more details. +exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU). +See Documentation/filesystems/vfs.txt for more details. -- [mandatory] diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index fbb324e2bd43..cae6d27c9f5b 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -415,8 +415,8 @@ otherwise noted. permission: called by the VFS to check for access rights on a POSIX-like filesystem. - May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk - mode, the filesystem must check the permission without blocking or + May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk + mode, the filesystem must check the permission without blocking or storing to the inode. If a situation is encountered that rcu-walk cannot handle, return diff --git a/fs/namei.c b/fs/namei.c index 0b14f6910fc6..86643302079e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -479,6 +479,14 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry struct fs_struct *fs = current->fs; struct dentry *parent = nd->path.dentry; + /* + * It can be possible to revalidate the dentry that we started + * the path walk with. force_reval_path may also revalidate the + * dentry already committed to the nameidata. + */ + if (unlikely(parent == dentry)) + return nameidata_drop_rcu(nd); + BUG_ON(!(nd->flags & LOOKUP_RCU)); if (nd->root.mnt) { spin_lock(&fs->lock); @@ -583,6 +591,13 @@ void release_open_intent(struct nameidata *nd) fput(nd->intent.open.file); } +/* + * Call d_revalidate and handle filesystems that request rcu-walk + * to be dropped. This may be called and return in rcu-walk mode, + * regardless of success or error. If -ECHILD is returned, the caller + * must return -ECHILD back up the path walk stack so path walk may + * be restarted in ref-walk mode. + */ static int d_revalidate(struct dentry *dentry, struct nameidata *nd) { int status; @@ -673,6 +688,9 @@ force_reval_path(struct path *path, struct nameidata *nd) return 0; if (!status) { + /* Don't d_invalidate in rcu-walk mode */ + if (nameidata_drop_rcu(nd)) + return -ECHILD; d_invalidate(dentry); status = -ESTALE; } @@ -2105,11 +2123,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, dir = nd->path.dentry; case LAST_DOT: if (need_reval_dot(dir)) { - error = d_revalidate(nd->path.dentry, nd); - if (!error) - error = -ESTALE; - if (error < 0) + int status = d_revalidate(nd->path.dentry, nd); + if (!status) + status = -ESTALE; + if (status < 0) { + error = status; goto exit; + } } /* fallthrough */ case LAST_ROOT: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 64ee240f3c80..df8c03a02161 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1406,11 +1406,15 @@ no_open: static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) { struct dentry *parent = NULL; - struct inode *inode = dentry->d_inode; + struct inode *inode; struct inode *dir; struct nfs_open_context *ctx; int openflags, ret = 0; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; if (!is_atomic_open(nd) || d_mountpoint(dentry)) goto no_open; diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 9ee97e7f2be4..b2adbb4b2f73 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -16,7 +16,7 @@ * some fast and compact auxiliary data. */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#if defined(CONFIG_SMP) #define LIST_BL_LOCKMASK 1UL #else #define LIST_BL_LOCKMASK 0UL @@ -62,7 +62,8 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h, struct hlist_bl_node *n) { LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); - LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK)); + LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) != + LIST_BL_LOCKMASK); h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK); } diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index b872b493724d..cf1244fbf3b6 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -11,7 +11,8 @@ static inline void hlist_bl_set_first_rcu(struct hlist_bl_head *h, struct hlist_bl_node *n) { LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); - LIST_BL_BUG_ON(!((unsigned long)h->first & LIST_BL_LOCKMASK)); + LIST_BL_BUG_ON(((unsigned long)h->first & LIST_BL_LOCKMASK) != + LIST_BL_LOCKMASK); rcu_assign_pointer(h->first, (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK)); }